blob: 47bf2803edd6297b9513b3a2ebaaa56f51f9f29e [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Bjorn Reese70a9da52001-04-21 16:57:29 +000013#ifdef WIN32
14#undef LIBXML_DLL_IMPORT
15#endif
16
Daniel Veillard7f7d1111999-09-22 09:46:25 +000017#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000018#include <stdarg.h>
19
Daniel Veillard7f7d1111999-09-22 09:46:25 +000020
21#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000022#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000023#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000024#ifdef HAVE_SYS_STAT_H
25#include <sys/stat.h>
26#endif
27#ifdef HAVE_FCNTL_H
28#include <fcntl.h>
29#endif
30#ifdef HAVE_UNISTD_H
31#include <unistd.h>
32#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000033#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000034#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000035#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000036
Daniel Veillard361d8452000-04-03 19:48:13 +000037#include <libxml/xmlmemory.h>
38#include <libxml/HTMLparser.h>
39#include <libxml/HTMLtree.h>
40#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000041#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000042
Daniel Veillard361d8452000-04-03 19:48:13 +000043#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000044static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000045#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000046static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000047static int sax = 0;
48static int repeat = 0;
49static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000050static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000051static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000052
Daniel Veillard7c1206f1999-10-14 09:10:25 +000053xmlSAXHandler emptySAXHandlerStruct = {
54 NULL, /* internalSubset */
55 NULL, /* isStandalone */
56 NULL, /* hasInternalSubset */
57 NULL, /* hasExternalSubset */
58 NULL, /* resolveEntity */
59 NULL, /* getEntity */
60 NULL, /* entityDecl */
61 NULL, /* notationDecl */
62 NULL, /* attributeDecl */
63 NULL, /* elementDecl */
64 NULL, /* unparsedEntityDecl */
65 NULL, /* setDocumentLocator */
66 NULL, /* startDocument */
67 NULL, /* endDocument */
68 NULL, /* startElement */
69 NULL, /* endElement */
70 NULL, /* reference */
71 NULL, /* characters */
72 NULL, /* ignorableWhitespace */
73 NULL, /* processingInstruction */
74 NULL, /* comment */
75 NULL, /* xmlParserWarning */
76 NULL, /* xmlParserError */
77 NULL, /* xmlParserError */
78 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000079 NULL, /* cdataBlock */
80 NULL /* externalSubset */
Daniel Veillard7c1206f1999-10-14 09:10:25 +000081};
82
83xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
84extern xmlSAXHandlerPtr debugSAXHandler;
85
86/************************************************************************
87 * *
88 * Debug Handlers *
89 * *
90 ************************************************************************/
91
92/**
93 * isStandaloneDebug:
94 * @ctxt: An XML parser context
95 *
96 * Is this document tagged standalone ?
97 *
98 * Returns 1 if true
99 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000100static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000101isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000102{
103 fprintf(stdout, "SAX.isStandalone()\n");
104 return(0);
105}
106
107/**
108 * hasInternalSubsetDebug:
109 * @ctxt: An XML parser context
110 *
111 * Does this document has an internal subset
112 *
113 * Returns 1 if true
114 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000115static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000116hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000117{
118 fprintf(stdout, "SAX.hasInternalSubset()\n");
119 return(0);
120}
121
122/**
123 * hasExternalSubsetDebug:
124 * @ctxt: An XML parser context
125 *
126 * Does this document has an external subset
127 *
128 * Returns 1 if true
129 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000130static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000131hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000132{
133 fprintf(stdout, "SAX.hasExternalSubset()\n");
134 return(0);
135}
136
137/**
138 * hasInternalSubsetDebug:
139 * @ctxt: An XML parser context
140 *
141 * Does this document has an internal subset
142 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000143static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000144internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000145 const xmlChar *ExternalID, const xmlChar *SystemID)
146{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000147 fprintf(stdout, "SAX.internalSubset(%s,", name);
148 if (ExternalID == NULL)
149 fprintf(stdout, " ,");
150 else
151 fprintf(stdout, " %s,", ExternalID);
152 if (SystemID == NULL)
153 fprintf(stdout, " )\n");
154 else
155 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000156}
157
158/**
159 * resolveEntityDebug:
160 * @ctxt: An XML parser context
161 * @publicId: The public ID of the entity
162 * @systemId: The system ID of the entity
163 *
164 * Special entity resolver, better left to the parser, it has
165 * more context than the application layer.
166 * The default behaviour is to NOT resolve the entities, in that case
167 * the ENTITY_REF nodes are built in the structure (and the parameter
168 * values).
169 *
170 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
171 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000172static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000173resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000174{
175 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
176
177
178 fprintf(stdout, "SAX.resolveEntity(");
179 if (publicId != NULL)
180 fprintf(stdout, "%s", (char *)publicId);
181 else
182 fprintf(stdout, " ");
183 if (systemId != NULL)
184 fprintf(stdout, ", %s)\n", (char *)systemId);
185 else
186 fprintf(stdout, ", )\n");
187/*********
188 if (systemId != NULL) {
189 return(xmlNewInputFromFile(ctxt, (char *) systemId));
190 }
191 *********/
192 return(NULL);
193}
194
195/**
196 * getEntityDebug:
197 * @ctxt: An XML parser context
198 * @name: The entity name
199 *
200 * Get an entity by name
201 *
202 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
203 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000204static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000205getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000206{
207 fprintf(stdout, "SAX.getEntity(%s)\n", name);
208 return(NULL);
209}
210
211/**
212 * getParameterEntityDebug:
213 * @ctxt: An XML parser context
214 * @name: The entity name
215 *
216 * Get a parameter entity by name
217 *
218 * Returns the xmlParserInputPtr
219 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000220static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000221getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000222{
223 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
224 return(NULL);
225}
226
227
228/**
229 * entityDeclDebug:
230 * @ctxt: An XML parser context
231 * @name: the entity name
232 * @type: the entity type
233 * @publicId: The public ID of the entity
234 * @systemId: The system ID of the entity
235 * @content: the entity value (without processing).
236 *
237 * An entity definition has been parsed
238 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000239static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000240entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000241 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
242{
243 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
244 name, type, publicId, systemId, content);
245}
246
247/**
248 * attributeDeclDebug:
249 * @ctxt: An XML parser context
250 * @name: the attribute name
251 * @type: the attribute type
252 *
253 * An attribute definition has been parsed
254 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000255static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000256attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000257 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000258 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000259{
260 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
261 elem, name, type, def, defaultValue);
262}
263
264/**
265 * elementDeclDebug:
266 * @ctxt: An XML parser context
267 * @name: the element name
268 * @type: the element type
269 * @content: the element value (without processing).
270 *
271 * An element definition has been parsed
272 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000273static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000274elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
275 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000276{
277 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
278 name, type);
279}
280
281/**
282 * notationDeclDebug:
283 * @ctxt: An XML parser context
284 * @name: The name of the notation
285 * @publicId: The public ID of the entity
286 * @systemId: The system ID of the entity
287 *
288 * What to do when a notation declaration has been parsed.
289 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000291notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000292 const xmlChar *publicId, const xmlChar *systemId)
293{
294 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
295 (char *) name, (char *) publicId, (char *) systemId);
296}
297
298/**
299 * unparsedEntityDeclDebug:
300 * @ctxt: An XML parser context
301 * @name: The name of the entity
302 * @publicId: The public ID of the entity
303 * @systemId: The system ID of the entity
304 * @notationName: the name of the notation
305 *
306 * What to do when an unparsed entity declaration is parsed
307 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000308static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000309unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000310 const xmlChar *publicId, const xmlChar *systemId,
311 const xmlChar *notationName)
312{
313 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
314 (char *) name, (char *) publicId, (char *) systemId,
315 (char *) notationName);
316}
317
318/**
319 * setDocumentLocatorDebug:
320 * @ctxt: An XML parser context
321 * @loc: A SAX Locator
322 *
323 * Receive the document locator at startup, actually xmlDefaultSAXLocator
324 * Everything is available on the context, so this is useless in our case.
325 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000326static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000327setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000328{
329 fprintf(stdout, "SAX.setDocumentLocator()\n");
330}
331
332/**
333 * startDocumentDebug:
334 * @ctxt: An XML parser context
335 *
336 * called when the document start being processed.
337 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000338static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000339startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000340{
341 fprintf(stdout, "SAX.startDocument()\n");
342}
343
344/**
345 * endDocumentDebug:
346 * @ctxt: An XML parser context
347 *
348 * called when the document end has been detected.
349 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000350static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000351endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000352{
353 fprintf(stdout, "SAX.endDocument()\n");
354}
355
356/**
357 * startElementDebug:
358 * @ctxt: An XML parser context
359 * @name: The element name
360 *
361 * called when an opening tag has been processed.
362 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000363static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000364startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000365{
366 int i;
367
368 fprintf(stdout, "SAX.startElement(%s", (char *) name);
369 if (atts != NULL) {
370 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000371 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000372 if (atts[i] != NULL) {
373 unsigned char output[40];
374 const unsigned char *att = atts[i];
375 int outlen, attlen;
376 fprintf(stdout, "='");
377 while ((attlen = strlen((char*)att)) > 0) {
378 outlen = sizeof output - 1;
379 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
380 fprintf(stdout, "%.*s", outlen, output);
381 att += attlen;
382 }
383 fprintf(stdout, "'");
384 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000385 }
386 }
387 fprintf(stdout, ")\n");
388}
389
390/**
391 * endElementDebug:
392 * @ctxt: An XML parser context
393 * @name: The element name
394 *
395 * called when the end of an element has been detected.
396 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000397static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000398endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000399{
400 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
401}
402
403/**
404 * charactersDebug:
405 * @ctxt: An XML parser context
406 * @ch: a xmlChar string
407 * @len: the number of xmlChar
408 *
409 * receiving some chars from the parser.
410 * Question: how much at a time ???
411 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000412static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000413charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000414{
Daniel Veillarde010c172000-08-28 10:04:51 +0000415 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000416 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000417
Daniel Veillard4948eb42000-08-29 09:41:15 +0000418 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000419 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000420
421 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000422}
423
424/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000425 * cdataDebug:
426 * @ctxt: An XML parser context
427 * @ch: a xmlChar string
428 * @len: the number of xmlChar
429 *
430 * receiving some cdata chars from the parser.
431 * Question: how much at a time ???
432 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000433static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000434cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000435{
436 unsigned char output[40];
437 int inlen = len, outlen = 30;
438
439 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
440 output[outlen] = 0;
441
442 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
443}
444
445/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000446 * referenceDebug:
447 * @ctxt: An XML parser context
448 * @name: The entity name
449 *
450 * called when an entity reference is detected.
451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000452static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000453referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000454{
455 fprintf(stdout, "SAX.reference(%s)\n", name);
456}
457
458/**
459 * ignorableWhitespaceDebug:
460 * @ctxt: An XML parser context
461 * @ch: a xmlChar string
462 * @start: the first char in the string
463 * @len: the number of xmlChar
464 *
465 * receiving some ignorable whitespaces from the parser.
466 * Question: how much at a time ???
467 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000468static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000469ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000470{
Daniel Veillard87b95392000-08-12 21:12:04 +0000471 char output[40];
472 int i;
473
474 for (i = 0;(i<len) && (i < 30);i++)
475 output[i] = ch[i];
476 output[i] = 0;
477
478 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000479}
480
481/**
482 * processingInstructionDebug:
483 * @ctxt: An XML parser context
484 * @target: the target name
485 * @data: the PI data's
486 * @len: the number of xmlChar
487 *
488 * A processing instruction has been parsed.
489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000490static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000491processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000492 const xmlChar *data)
493{
494 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
495 (char *) target, (char *) data);
496}
497
498/**
499 * commentDebug:
500 * @ctxt: An XML parser context
501 * @value: the comment content
502 *
503 * A comment has been parsed.
504 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000505static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000506commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000507{
508 fprintf(stdout, "SAX.comment(%s)\n", value);
509}
510
511/**
512 * warningDebug:
513 * @ctxt: An XML parser context
514 * @msg: the message to display/transmit
515 * @...: extra parameters for the message display
516 *
517 * Display and format a warning messages, gives file, line, position and
518 * extra parameters.
519 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000520static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000521warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000522{
523 va_list args;
524
525 va_start(args, msg);
526 fprintf(stdout, "SAX.warning: ");
527 vfprintf(stdout, msg, args);
528 va_end(args);
529}
530
531/**
532 * errorDebug:
533 * @ctxt: An XML parser context
534 * @msg: the message to display/transmit
535 * @...: extra parameters for the message display
536 *
537 * Display and format a error messages, gives file, line, position and
538 * extra parameters.
539 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000540static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000541errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000542{
543 va_list args;
544
545 va_start(args, msg);
546 fprintf(stdout, "SAX.error: ");
547 vfprintf(stdout, msg, args);
548 va_end(args);
549}
550
551/**
552 * fatalErrorDebug:
553 * @ctxt: An XML parser context
554 * @msg: the message to display/transmit
555 * @...: extra parameters for the message display
556 *
557 * Display and format a fatalError messages, gives file, line, position and
558 * extra parameters.
559 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000560static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000561fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000562{
563 va_list args;
564
565 va_start(args, msg);
566 fprintf(stdout, "SAX.fatalError: ");
567 vfprintf(stdout, msg, args);
568 va_end(args);
569}
570
571xmlSAXHandler debugSAXHandlerStruct = {
572 internalSubsetDebug,
573 isStandaloneDebug,
574 hasInternalSubsetDebug,
575 hasExternalSubsetDebug,
576 resolveEntityDebug,
577 getEntityDebug,
578 entityDeclDebug,
579 notationDeclDebug,
580 attributeDeclDebug,
581 elementDeclDebug,
582 unparsedEntityDeclDebug,
583 setDocumentLocatorDebug,
584 startDocumentDebug,
585 endDocumentDebug,
586 startElementDebug,
587 endElementDebug,
588 referenceDebug,
589 charactersDebug,
590 ignorableWhitespaceDebug,
591 processingInstructionDebug,
592 commentDebug,
593 warningDebug,
594 errorDebug,
595 fatalErrorDebug,
596 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000597 cdataDebug,
598 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000599};
600
601xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000602/************************************************************************
603 * *
604 * Debug *
605 * *
606 ************************************************************************/
607
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000608static void
609parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000610 htmlDocPtr doc = NULL;
611
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000612 /*
613 * Empty callbacks for checking
614 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000615 if (push) {
616 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000617
Daniel Veillard87b95392000-08-12 21:12:04 +0000618 f = fopen(filename, "r");
619 if (f != NULL) {
620 int res, size = 3;
621 char chars[4096];
622 htmlParserCtxtPtr ctxt;
623
624 /* if (repeat) */
625 size = 4096;
626 res = fread(chars, 1, 4, f);
627 if (res > 0) {
628 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
629 chars, res, filename, 0);
630 while ((res = fread(chars, 1, size, f)) > 0) {
631 htmlParseChunk(ctxt, chars, res, 0);
632 }
633 htmlParseChunk(ctxt, chars, 0, 1);
634 doc = ctxt->myDoc;
635 htmlFreeParserCtxt(ctxt);
636 }
637 if (doc != NULL) {
638 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
639 xmlFreeDoc(doc);
640 }
641 fclose(f);
642 }
643 if (!noout) {
644 f = fopen(filename, "r");
645 if (f != NULL) {
646 int res, size = 3;
647 char chars[4096];
648 htmlParserCtxtPtr ctxt;
649
650 /* if (repeat) */
651 size = 4096;
652 res = fread(chars, 1, 4, f);
653 if (res > 0) {
654 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
655 chars, res, filename, 0);
656 while ((res = fread(chars, 1, size, f)) > 0) {
657 htmlParseChunk(ctxt, chars, res, 0);
658 }
659 htmlParseChunk(ctxt, chars, 0, 1);
660 doc = ctxt->myDoc;
661 htmlFreeParserCtxt(ctxt);
662 }
663 if (doc != NULL) {
664 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
665 xmlFreeDoc(doc);
666 }
667 fclose(f);
668 }
669 }
670 } else {
671 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000672 if (doc != NULL) {
673 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
674 xmlFreeDoc(doc);
675 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000676
677 if (!noout) {
678 /*
679 * Debug callback
680 */
681 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
682 if (doc != NULL) {
683 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
684 xmlFreeDoc(doc);
685 }
686 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000687 }
688}
689
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000690static void
691parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000692 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000693
694 /*
695 * build an HTML tree from a string;
696 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000697 if (push) {
698 FILE *f;
699
700 f = fopen(filename, "r");
701 if (f != NULL) {
702 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000703 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000704 htmlParserCtxtPtr ctxt;
705
Daniel Veillard87b95392000-08-12 21:12:04 +0000706 /* if (repeat) */
707 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000708 res = fread(chars, 1, 4, f);
709 if (res > 0) {
710 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
711 chars, res, filename, 0);
712 while ((res = fread(chars, 1, size, f)) > 0) {
713 htmlParseChunk(ctxt, chars, res, 0);
714 }
715 htmlParseChunk(ctxt, chars, 0, 1);
716 doc = ctxt->myDoc;
717 htmlFreeParserCtxt(ctxt);
718 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000719 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000720 }
721 } else {
722 doc = htmlParseFile(filename, NULL);
723 }
724 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000725 xmlGenericError(xmlGenericErrorContext,
726 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000727 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000728
729 /*
730 * test intermediate copy if needed.
731 */
732 if (copy) {
733 tmp = doc;
734 doc = xmlCopyDoc(doc, 1);
735 xmlFreeDoc(tmp);
736 }
737
738 /*
739 * print it.
740 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000741 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000742#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000743 if (!debug) {
744 if (encoding)
745 htmlSaveFileEnc("-", doc, encoding);
746 else
747 htmlDocDump(stdout, doc);
748 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000749 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000750#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000751 if (encoding)
752 htmlSaveFileEnc("-", doc, encoding);
753 else
754 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000755#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000756 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000757
758 /*
759 * free it.
760 */
761 xmlFreeDoc(doc);
762}
763
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000764int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000765 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000766 int files = 0;
767
768 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000769#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000770 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
771 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000772 else
773#endif
774 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000775 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000776 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
777 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000778 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
779 sax++;
780 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
781 noout++;
782 else if ((!strcmp(argv[i], "-repeat")) ||
783 (!strcmp(argv[i], "--repeat")))
784 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000785 else if ((!strcmp(argv[i], "-encode")) ||
786 (!strcmp(argv[i], "--encode"))) {
787 i++;
788 encoding = argv[i];
789 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000790 }
791 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000792 if ((!strcmp(argv[i], "-encode")) ||
793 (!strcmp(argv[i], "--encode"))) {
794 i++;
795 continue;
796 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000797 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000798 if (repeat) {
799 for (count = 0;count < 100 * repeat;count++) {
800 if (sax)
801 parseSAXFile(argv[i]);
802 else
803 parseAndPrintFile(argv[i]);
804 }
805 } else {
806 if (sax)
807 parseSAXFile(argv[i]);
808 else
809 parseAndPrintFile(argv[i]);
810 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000811 files ++;
812 }
813 }
814 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000815 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000816 argv[0]);
817 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000818#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000819 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000820#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000821 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000822 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000823 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000824 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000825 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000826 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000827 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000828 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000829 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000830
831 return(0);
832}
Daniel Veillard361d8452000-04-03 19:48:13 +0000833#else /* !LIBXML_HTML_ENABLED */
834#include <stdio.h>
835int main(int argc, char **argv) {
836 printf("%s : HTML support not compiled in\n", argv[0]);
837 return(0);
838}
839#endif