blob: 6ec56a2c67420041b34da361ba68a25a0b2ad5c1 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000038
Daniel Veillard361d8452000-04-03 19:48:13 +000039#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000040static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000041#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000042static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000043static int sax = 0;
44static int repeat = 0;
45static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000046static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000047static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000048
Daniel Veillard7c1206f1999-10-14 09:10:25 +000049xmlSAXHandler emptySAXHandlerStruct = {
50 NULL, /* internalSubset */
51 NULL, /* isStandalone */
52 NULL, /* hasInternalSubset */
53 NULL, /* hasExternalSubset */
54 NULL, /* resolveEntity */
55 NULL, /* getEntity */
56 NULL, /* entityDecl */
57 NULL, /* notationDecl */
58 NULL, /* attributeDecl */
59 NULL, /* elementDecl */
60 NULL, /* unparsedEntityDecl */
61 NULL, /* setDocumentLocator */
62 NULL, /* startDocument */
63 NULL, /* endDocument */
64 NULL, /* startElement */
65 NULL, /* endElement */
66 NULL, /* reference */
67 NULL, /* characters */
68 NULL, /* ignorableWhitespace */
69 NULL, /* processingInstruction */
70 NULL, /* comment */
71 NULL, /* xmlParserWarning */
72 NULL, /* xmlParserError */
73 NULL, /* xmlParserError */
74 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000075 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000076 NULL, /* externalSubset */
77 1
Daniel Veillard7c1206f1999-10-14 09:10:25 +000078};
79
80xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
81extern xmlSAXHandlerPtr debugSAXHandler;
82
83/************************************************************************
84 * *
85 * Debug Handlers *
86 * *
87 ************************************************************************/
88
89/**
90 * isStandaloneDebug:
91 * @ctxt: An XML parser context
92 *
93 * Is this document tagged standalone ?
94 *
95 * Returns 1 if true
96 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000097static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +000098isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +000099{
100 fprintf(stdout, "SAX.isStandalone()\n");
101 return(0);
102}
103
104/**
105 * hasInternalSubsetDebug:
106 * @ctxt: An XML parser context
107 *
108 * Does this document has an internal subset
109 *
110 * Returns 1 if true
111 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000112static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000113hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000114{
115 fprintf(stdout, "SAX.hasInternalSubset()\n");
116 return(0);
117}
118
119/**
120 * hasExternalSubsetDebug:
121 * @ctxt: An XML parser context
122 *
123 * Does this document has an external subset
124 *
125 * Returns 1 if true
126 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000127static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000128hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000129{
130 fprintf(stdout, "SAX.hasExternalSubset()\n");
131 return(0);
132}
133
134/**
135 * hasInternalSubsetDebug:
136 * @ctxt: An XML parser context
137 *
138 * Does this document has an internal subset
139 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000140static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000141internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000142 const xmlChar *ExternalID, const xmlChar *SystemID)
143{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000144 fprintf(stdout, "SAX.internalSubset(%s,", name);
145 if (ExternalID == NULL)
146 fprintf(stdout, " ,");
147 else
148 fprintf(stdout, " %s,", ExternalID);
149 if (SystemID == NULL)
150 fprintf(stdout, " )\n");
151 else
152 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000153}
154
155/**
156 * resolveEntityDebug:
157 * @ctxt: An XML parser context
158 * @publicId: The public ID of the entity
159 * @systemId: The system ID of the entity
160 *
161 * Special entity resolver, better left to the parser, it has
162 * more context than the application layer.
163 * The default behaviour is to NOT resolve the entities, in that case
164 * the ENTITY_REF nodes are built in the structure (and the parameter
165 * values).
166 *
167 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
168 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000169static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000170resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000171{
172 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
173
174
175 fprintf(stdout, "SAX.resolveEntity(");
176 if (publicId != NULL)
177 fprintf(stdout, "%s", (char *)publicId);
178 else
179 fprintf(stdout, " ");
180 if (systemId != NULL)
181 fprintf(stdout, ", %s)\n", (char *)systemId);
182 else
183 fprintf(stdout, ", )\n");
184/*********
185 if (systemId != NULL) {
186 return(xmlNewInputFromFile(ctxt, (char *) systemId));
187 }
188 *********/
189 return(NULL);
190}
191
192/**
193 * getEntityDebug:
194 * @ctxt: An XML parser context
195 * @name: The entity name
196 *
197 * Get an entity by name
198 *
199 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
200 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000201static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000202getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000203{
204 fprintf(stdout, "SAX.getEntity(%s)\n", name);
205 return(NULL);
206}
207
208/**
209 * getParameterEntityDebug:
210 * @ctxt: An XML parser context
211 * @name: The entity name
212 *
213 * Get a parameter entity by name
214 *
215 * Returns the xmlParserInputPtr
216 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000217static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000218getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000219{
220 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
221 return(NULL);
222}
223
224
225/**
226 * entityDeclDebug:
227 * @ctxt: An XML parser context
228 * @name: the entity name
229 * @type: the entity type
230 * @publicId: The public ID of the entity
231 * @systemId: The system ID of the entity
232 * @content: the entity value (without processing).
233 *
234 * An entity definition has been parsed
235 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000236static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000237entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000238 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
239{
240 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
241 name, type, publicId, systemId, content);
242}
243
244/**
245 * attributeDeclDebug:
246 * @ctxt: An XML parser context
247 * @name: the attribute name
248 * @type: the attribute type
249 *
250 * An attribute definition has been parsed
251 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000252static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000253attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000254 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000255 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000256{
257 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
258 elem, name, type, def, defaultValue);
259}
260
261/**
262 * elementDeclDebug:
263 * @ctxt: An XML parser context
264 * @name: the element name
265 * @type: the element type
266 * @content: the element value (without processing).
267 *
268 * An element definition has been parsed
269 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000270static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000271elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
272 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000273{
274 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
275 name, type);
276}
277
278/**
279 * notationDeclDebug:
280 * @ctxt: An XML parser context
281 * @name: The name of the notation
282 * @publicId: The public ID of the entity
283 * @systemId: The system ID of the entity
284 *
285 * What to do when a notation declaration has been parsed.
286 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000287static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000288notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000289 const xmlChar *publicId, const xmlChar *systemId)
290{
291 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
292 (char *) name, (char *) publicId, (char *) systemId);
293}
294
295/**
296 * unparsedEntityDeclDebug:
297 * @ctxt: An XML parser context
298 * @name: The name of the entity
299 * @publicId: The public ID of the entity
300 * @systemId: The system ID of the entity
301 * @notationName: the name of the notation
302 *
303 * What to do when an unparsed entity declaration is parsed
304 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000305static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000306unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000307 const xmlChar *publicId, const xmlChar *systemId,
308 const xmlChar *notationName)
309{
310 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
311 (char *) name, (char *) publicId, (char *) systemId,
312 (char *) notationName);
313}
314
315/**
316 * setDocumentLocatorDebug:
317 * @ctxt: An XML parser context
318 * @loc: A SAX Locator
319 *
320 * Receive the document locator at startup, actually xmlDefaultSAXLocator
321 * Everything is available on the context, so this is useless in our case.
322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000323static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000324setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000325{
326 fprintf(stdout, "SAX.setDocumentLocator()\n");
327}
328
329/**
330 * startDocumentDebug:
331 * @ctxt: An XML parser context
332 *
333 * called when the document start being processed.
334 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000335static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000336startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000337{
338 fprintf(stdout, "SAX.startDocument()\n");
339}
340
341/**
342 * endDocumentDebug:
343 * @ctxt: An XML parser context
344 *
345 * called when the document end has been detected.
346 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000347static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000348endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000349{
350 fprintf(stdout, "SAX.endDocument()\n");
351}
352
353/**
354 * startElementDebug:
355 * @ctxt: An XML parser context
356 * @name: The element name
357 *
358 * called when an opening tag has been processed.
359 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000360static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000361startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000362{
363 int i;
364
365 fprintf(stdout, "SAX.startElement(%s", (char *) name);
366 if (atts != NULL) {
367 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000368 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000369 if (atts[i] != NULL) {
370 unsigned char output[40];
371 const unsigned char *att = atts[i];
372 int outlen, attlen;
373 fprintf(stdout, "='");
374 while ((attlen = strlen((char*)att)) > 0) {
375 outlen = sizeof output - 1;
376 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
377 fprintf(stdout, "%.*s", outlen, output);
378 att += attlen;
379 }
380 fprintf(stdout, "'");
381 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000382 }
383 }
384 fprintf(stdout, ")\n");
385}
386
387/**
388 * endElementDebug:
389 * @ctxt: An XML parser context
390 * @name: The element name
391 *
392 * called when the end of an element has been detected.
393 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000395endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000396{
397 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
398}
399
400/**
401 * charactersDebug:
402 * @ctxt: An XML parser context
403 * @ch: a xmlChar string
404 * @len: the number of xmlChar
405 *
406 * receiving some chars from the parser.
407 * Question: how much at a time ???
408 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000409static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000410charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000411{
Daniel Veillarde010c172000-08-28 10:04:51 +0000412 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000413 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000414
Daniel Veillard4948eb42000-08-29 09:41:15 +0000415 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000416 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000417
418 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000419}
420
421/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000422 * cdataDebug:
423 * @ctxt: An XML parser context
424 * @ch: a xmlChar string
425 * @len: the number of xmlChar
426 *
427 * receiving some cdata chars from the parser.
428 * Question: how much at a time ???
429 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000430static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000431cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000432{
433 unsigned char output[40];
434 int inlen = len, outlen = 30;
435
436 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
437 output[outlen] = 0;
438
439 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
440}
441
442/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000443 * referenceDebug:
444 * @ctxt: An XML parser context
445 * @name: The entity name
446 *
447 * called when an entity reference is detected.
448 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000449static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000450referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000451{
452 fprintf(stdout, "SAX.reference(%s)\n", name);
453}
454
455/**
456 * ignorableWhitespaceDebug:
457 * @ctxt: An XML parser context
458 * @ch: a xmlChar string
459 * @start: the first char in the string
460 * @len: the number of xmlChar
461 *
462 * receiving some ignorable whitespaces from the parser.
463 * Question: how much at a time ???
464 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000465static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000466ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000467{
Daniel Veillard87b95392000-08-12 21:12:04 +0000468 char output[40];
469 int i;
470
471 for (i = 0;(i<len) && (i < 30);i++)
472 output[i] = ch[i];
473 output[i] = 0;
474
475 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000476}
477
478/**
479 * processingInstructionDebug:
480 * @ctxt: An XML parser context
481 * @target: the target name
482 * @data: the PI data's
483 * @len: the number of xmlChar
484 *
485 * A processing instruction has been parsed.
486 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000487static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000488processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000489 const xmlChar *data)
490{
491 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
492 (char *) target, (char *) data);
493}
494
495/**
496 * commentDebug:
497 * @ctxt: An XML parser context
498 * @value: the comment content
499 *
500 * A comment has been parsed.
501 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000502static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000503commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000504{
505 fprintf(stdout, "SAX.comment(%s)\n", value);
506}
507
508/**
509 * warningDebug:
510 * @ctxt: An XML parser context
511 * @msg: the message to display/transmit
512 * @...: extra parameters for the message display
513 *
514 * Display and format a warning messages, gives file, line, position and
515 * extra parameters.
516 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000517static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000518warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000519{
520 va_list args;
521
522 va_start(args, msg);
523 fprintf(stdout, "SAX.warning: ");
524 vfprintf(stdout, msg, args);
525 va_end(args);
526}
527
528/**
529 * errorDebug:
530 * @ctxt: An XML parser context
531 * @msg: the message to display/transmit
532 * @...: extra parameters for the message display
533 *
534 * Display and format a error messages, gives file, line, position and
535 * extra parameters.
536 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000537static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000538errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000539{
540 va_list args;
541
542 va_start(args, msg);
543 fprintf(stdout, "SAX.error: ");
544 vfprintf(stdout, msg, args);
545 va_end(args);
546}
547
548/**
549 * fatalErrorDebug:
550 * @ctxt: An XML parser context
551 * @msg: the message to display/transmit
552 * @...: extra parameters for the message display
553 *
554 * Display and format a fatalError messages, gives file, line, position and
555 * extra parameters.
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000558fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000559{
560 va_list args;
561
562 va_start(args, msg);
563 fprintf(stdout, "SAX.fatalError: ");
564 vfprintf(stdout, msg, args);
565 va_end(args);
566}
567
568xmlSAXHandler debugSAXHandlerStruct = {
569 internalSubsetDebug,
570 isStandaloneDebug,
571 hasInternalSubsetDebug,
572 hasExternalSubsetDebug,
573 resolveEntityDebug,
574 getEntityDebug,
575 entityDeclDebug,
576 notationDeclDebug,
577 attributeDeclDebug,
578 elementDeclDebug,
579 unparsedEntityDeclDebug,
580 setDocumentLocatorDebug,
581 startDocumentDebug,
582 endDocumentDebug,
583 startElementDebug,
584 endElementDebug,
585 referenceDebug,
586 charactersDebug,
587 ignorableWhitespaceDebug,
588 processingInstructionDebug,
589 commentDebug,
590 warningDebug,
591 errorDebug,
592 fatalErrorDebug,
593 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000594 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000595 NULL,
596 1
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000597};
598
599xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000600/************************************************************************
601 * *
602 * Debug *
603 * *
604 ************************************************************************/
605
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000606static void
607parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000608 htmlDocPtr doc = NULL;
609
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000610 /*
611 * Empty callbacks for checking
612 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000613 if (push) {
614 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000615
Daniel Veillard87b95392000-08-12 21:12:04 +0000616 f = fopen(filename, "r");
617 if (f != NULL) {
618 int res, size = 3;
619 char chars[4096];
620 htmlParserCtxtPtr ctxt;
621
622 /* if (repeat) */
623 size = 4096;
624 res = fread(chars, 1, 4, f);
625 if (res > 0) {
626 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
627 chars, res, filename, 0);
628 while ((res = fread(chars, 1, size, f)) > 0) {
629 htmlParseChunk(ctxt, chars, res, 0);
630 }
631 htmlParseChunk(ctxt, chars, 0, 1);
632 doc = ctxt->myDoc;
633 htmlFreeParserCtxt(ctxt);
634 }
635 if (doc != NULL) {
636 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
637 xmlFreeDoc(doc);
638 }
639 fclose(f);
640 }
641 if (!noout) {
642 f = fopen(filename, "r");
643 if (f != NULL) {
644 int res, size = 3;
645 char chars[4096];
646 htmlParserCtxtPtr ctxt;
647
648 /* if (repeat) */
649 size = 4096;
650 res = fread(chars, 1, 4, f);
651 if (res > 0) {
652 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
653 chars, res, filename, 0);
654 while ((res = fread(chars, 1, size, f)) > 0) {
655 htmlParseChunk(ctxt, chars, res, 0);
656 }
657 htmlParseChunk(ctxt, chars, 0, 1);
658 doc = ctxt->myDoc;
659 htmlFreeParserCtxt(ctxt);
660 }
661 if (doc != NULL) {
662 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
663 xmlFreeDoc(doc);
664 }
665 fclose(f);
666 }
667 }
668 } else {
669 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000670 if (doc != NULL) {
671 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
672 xmlFreeDoc(doc);
673 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000674
675 if (!noout) {
676 /*
677 * Debug callback
678 */
679 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
680 if (doc != NULL) {
681 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
682 xmlFreeDoc(doc);
683 }
684 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000685 }
686}
687
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000688static void
689parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000690 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000691
692 /*
693 * build an HTML tree from a string;
694 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000695 if (push) {
696 FILE *f;
697
698 f = fopen(filename, "r");
699 if (f != NULL) {
700 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000701 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000702 htmlParserCtxtPtr ctxt;
703
Daniel Veillard87b95392000-08-12 21:12:04 +0000704 /* if (repeat) */
705 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000706 res = fread(chars, 1, 4, f);
707 if (res > 0) {
708 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
709 chars, res, filename, 0);
710 while ((res = fread(chars, 1, size, f)) > 0) {
711 htmlParseChunk(ctxt, chars, res, 0);
712 }
713 htmlParseChunk(ctxt, chars, 0, 1);
714 doc = ctxt->myDoc;
715 htmlFreeParserCtxt(ctxt);
716 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000717 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000718 }
719 } else {
720 doc = htmlParseFile(filename, NULL);
721 }
722 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000723 xmlGenericError(xmlGenericErrorContext,
724 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000725 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000726
727 /*
728 * test intermediate copy if needed.
729 */
730 if (copy) {
731 tmp = doc;
732 doc = xmlCopyDoc(doc, 1);
733 xmlFreeDoc(tmp);
734 }
735
736 /*
737 * print it.
738 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000739 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000740#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000741 if (!debug) {
742 if (encoding)
743 htmlSaveFileEnc("-", doc, encoding);
744 else
745 htmlDocDump(stdout, doc);
746 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000747 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000748#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000749 if (encoding)
750 htmlSaveFileEnc("-", doc, encoding);
751 else
752 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000753#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000754 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000755
756 /*
757 * free it.
758 */
759 xmlFreeDoc(doc);
760}
761
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000762int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000763 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000764 int files = 0;
765
766 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000767#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000768 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
769 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000770 else
771#endif
772 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000773 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000774 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
775 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000776 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
777 sax++;
778 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
779 noout++;
780 else if ((!strcmp(argv[i], "-repeat")) ||
781 (!strcmp(argv[i], "--repeat")))
782 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000783 else if ((!strcmp(argv[i], "-encode")) ||
784 (!strcmp(argv[i], "--encode"))) {
785 i++;
786 encoding = argv[i];
787 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000788 }
789 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000790 if ((!strcmp(argv[i], "-encode")) ||
791 (!strcmp(argv[i], "--encode"))) {
792 i++;
793 continue;
794 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000795 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000796 if (repeat) {
797 for (count = 0;count < 100 * repeat;count++) {
798 if (sax)
799 parseSAXFile(argv[i]);
800 else
801 parseAndPrintFile(argv[i]);
802 }
803 } else {
804 if (sax)
805 parseSAXFile(argv[i]);
806 else
807 parseAndPrintFile(argv[i]);
808 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000809 files ++;
810 }
811 }
812 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000813 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000814 argv[0]);
815 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000816#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000817 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000818#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000819 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000820 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000821 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000822 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000823 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000824 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000825 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000826 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000827 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000828
829 return(0);
830}
Daniel Veillard361d8452000-04-03 19:48:13 +0000831#else /* !LIBXML_HTML_ENABLED */
832#include <stdio.h>
833int main(int argc, char **argv) {
834 printf("%s : HTML support not compiled in\n", argv[0]);
835 return(0);
836}
837#endif