blob: 9b42778c80d580b9515819b0998bb7d0cffffc37 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000038#include <libxml/globals.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000041static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000042#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +000047#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +000048static int push = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +000049#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard32bc74e2000-07-14 14:49:25 +000050static char *encoding = NULL;
Daniel Veillard9475a352003-09-26 12:47:50 +000051static int options = 0;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000052
Daniel Veillard24505b02005-07-28 23:49:35 +000053static xmlSAXHandler emptySAXHandlerStruct = {
Daniel Veillard7c1206f1999-10-14 09:10:25 +000054 NULL, /* internalSubset */
55 NULL, /* isStandalone */
56 NULL, /* hasInternalSubset */
57 NULL, /* hasExternalSubset */
58 NULL, /* resolveEntity */
59 NULL, /* getEntity */
60 NULL, /* entityDecl */
61 NULL, /* notationDecl */
62 NULL, /* attributeDecl */
63 NULL, /* elementDecl */
64 NULL, /* unparsedEntityDecl */
65 NULL, /* setDocumentLocator */
66 NULL, /* startDocument */
67 NULL, /* endDocument */
68 NULL, /* startElement */
69 NULL, /* endElement */
70 NULL, /* reference */
71 NULL, /* characters */
72 NULL, /* ignorableWhitespace */
73 NULL, /* processingInstruction */
74 NULL, /* comment */
75 NULL, /* xmlParserWarning */
76 NULL, /* xmlParserError */
77 NULL, /* xmlParserError */
78 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000079 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000080 NULL, /* externalSubset */
William M. Brack871611b2003-10-18 04:53:14 +000081 1, /* initialized */
82 NULL, /* private */
83 NULL, /* startElementNsSAX2Func */
84 NULL, /* endElementNsSAX2Func */
85 NULL /* xmlStructuredErrorFunc */
Daniel Veillard7c1206f1999-10-14 09:10:25 +000086};
87
Daniel Veillard24505b02005-07-28 23:49:35 +000088static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000089extern xmlSAXHandlerPtr debugSAXHandler;
90
91/************************************************************************
92 * *
93 * Debug Handlers *
94 * *
95 ************************************************************************/
96
97/**
98 * isStandaloneDebug:
99 * @ctxt: An XML parser context
100 *
101 * Is this document tagged standalone ?
102 *
103 * Returns 1 if true
104 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000105static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000106isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000107{
108 fprintf(stdout, "SAX.isStandalone()\n");
109 return(0);
110}
111
112/**
113 * hasInternalSubsetDebug:
114 * @ctxt: An XML parser context
115 *
116 * Does this document has an internal subset
117 *
118 * Returns 1 if true
119 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000120static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000121hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000122{
123 fprintf(stdout, "SAX.hasInternalSubset()\n");
124 return(0);
125}
126
127/**
128 * hasExternalSubsetDebug:
129 * @ctxt: An XML parser context
130 *
131 * Does this document has an external subset
132 *
133 * Returns 1 if true
134 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000135static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000136hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000137{
138 fprintf(stdout, "SAX.hasExternalSubset()\n");
139 return(0);
140}
141
142/**
143 * hasInternalSubsetDebug:
144 * @ctxt: An XML parser context
145 *
146 * Does this document has an internal subset
147 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000148static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000149internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000150 const xmlChar *ExternalID, const xmlChar *SystemID)
151{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000152 fprintf(stdout, "SAX.internalSubset(%s,", name);
153 if (ExternalID == NULL)
154 fprintf(stdout, " ,");
155 else
156 fprintf(stdout, " %s,", ExternalID);
157 if (SystemID == NULL)
158 fprintf(stdout, " )\n");
159 else
160 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000161}
162
163/**
164 * resolveEntityDebug:
165 * @ctxt: An XML parser context
166 * @publicId: The public ID of the entity
167 * @systemId: The system ID of the entity
168 *
169 * Special entity resolver, better left to the parser, it has
170 * more context than the application layer.
171 * The default behaviour is to NOT resolve the entities, in that case
172 * the ENTITY_REF nodes are built in the structure (and the parameter
173 * values).
174 *
175 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
176 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000177static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000178resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000179{
180 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
181
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800182
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000183 fprintf(stdout, "SAX.resolveEntity(");
184 if (publicId != NULL)
185 fprintf(stdout, "%s", (char *)publicId);
186 else
187 fprintf(stdout, " ");
188 if (systemId != NULL)
189 fprintf(stdout, ", %s)\n", (char *)systemId);
190 else
191 fprintf(stdout, ", )\n");
192/*********
193 if (systemId != NULL) {
194 return(xmlNewInputFromFile(ctxt, (char *) systemId));
195 }
196 *********/
197 return(NULL);
198}
199
200/**
201 * getEntityDebug:
202 * @ctxt: An XML parser context
203 * @name: The entity name
204 *
205 * Get an entity by name
206 *
207 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
208 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000209static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000210getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000211{
212 fprintf(stdout, "SAX.getEntity(%s)\n", name);
213 return(NULL);
214}
215
216/**
217 * getParameterEntityDebug:
218 * @ctxt: An XML parser context
219 * @name: The entity name
220 *
221 * Get a parameter entity by name
222 *
223 * Returns the xmlParserInputPtr
224 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000225static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000226getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000227{
228 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
229 return(NULL);
230}
231
232
233/**
234 * entityDeclDebug:
235 * @ctxt: An XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800236 * @name: the entity name
237 * @type: the entity type
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000238 * @publicId: The public ID of the entity
239 * @systemId: The system ID of the entity
240 * @content: the entity value (without processing).
241 *
242 * An entity definition has been parsed
243 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000244static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000245entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000246 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
247{
248 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
249 name, type, publicId, systemId, content);
250}
251
252/**
253 * attributeDeclDebug:
254 * @ctxt: An XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800255 * @name: the attribute name
256 * @type: the attribute type
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000257 *
258 * An attribute definition has been parsed
259 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000260static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000261attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000262 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000263 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000264{
265 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
266 elem, name, type, def, defaultValue);
267}
268
269/**
270 * elementDeclDebug:
271 * @ctxt: An XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800272 * @name: the element name
273 * @type: the element type
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000274 * @content: the element value (without processing).
275 *
276 * An element definition has been parsed
277 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000278static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000279elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
280 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000281{
282 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
283 name, type);
284}
285
286/**
287 * notationDeclDebug:
288 * @ctxt: An XML parser context
289 * @name: The name of the notation
290 * @publicId: The public ID of the entity
291 * @systemId: The system ID of the entity
292 *
293 * What to do when a notation declaration has been parsed.
294 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000295static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000296notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000297 const xmlChar *publicId, const xmlChar *systemId)
298{
299 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
300 (char *) name, (char *) publicId, (char *) systemId);
301}
302
303/**
304 * unparsedEntityDeclDebug:
305 * @ctxt: An XML parser context
306 * @name: The name of the entity
307 * @publicId: The public ID of the entity
308 * @systemId: The system ID of the entity
309 * @notationName: the name of the notation
310 *
311 * What to do when an unparsed entity declaration is parsed
312 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000314unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000315 const xmlChar *publicId, const xmlChar *systemId,
316 const xmlChar *notationName)
317{
318 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
319 (char *) name, (char *) publicId, (char *) systemId,
320 (char *) notationName);
321}
322
323/**
324 * setDocumentLocatorDebug:
325 * @ctxt: An XML parser context
326 * @loc: A SAX Locator
327 *
328 * Receive the document locator at startup, actually xmlDefaultSAXLocator
329 * Everything is available on the context, so this is useless in our case.
330 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000331static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000332setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000333{
334 fprintf(stdout, "SAX.setDocumentLocator()\n");
335}
336
337/**
338 * startDocumentDebug:
339 * @ctxt: An XML parser context
340 *
341 * called when the document start being processed.
342 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000343static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000344startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000345{
346 fprintf(stdout, "SAX.startDocument()\n");
347}
348
349/**
350 * endDocumentDebug:
351 * @ctxt: An XML parser context
352 *
353 * called when the document end has been detected.
354 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000355static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000356endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000357{
358 fprintf(stdout, "SAX.endDocument()\n");
359}
360
361/**
362 * startElementDebug:
363 * @ctxt: An XML parser context
364 * @name: The element name
365 *
366 * called when an opening tag has been processed.
367 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000368static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000369startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000370{
371 int i;
372
373 fprintf(stdout, "SAX.startElement(%s", (char *) name);
374 if (atts != NULL) {
375 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000376 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000377 if (atts[i] != NULL) {
378 unsigned char output[40];
379 const unsigned char *att = atts[i];
380 int outlen, attlen;
381 fprintf(stdout, "='");
382 while ((attlen = strlen((char*)att)) > 0) {
383 outlen = sizeof output - 1;
384 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
Daniel Veillard5f704af2003-03-05 10:01:43 +0000385 output[outlen] = 0;
William M. Brackc1939562003-08-05 15:52:22 +0000386 fprintf(stdout, "%s", (char *) output);
Daniel Veillarde010c172000-08-28 10:04:51 +0000387 att += attlen;
388 }
389 fprintf(stdout, "'");
390 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000391 }
392 }
393 fprintf(stdout, ")\n");
394}
395
396/**
397 * endElementDebug:
398 * @ctxt: An XML parser context
399 * @name: The element name
400 *
401 * called when the end of an element has been detected.
402 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000403static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000404endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000405{
406 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
407}
408
409/**
410 * charactersDebug:
411 * @ctxt: An XML parser context
412 * @ch: a xmlChar string
413 * @len: the number of xmlChar
414 *
415 * receiving some chars from the parser.
416 * Question: how much at a time ???
417 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000418static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000419charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000420{
Daniel Veillarde010c172000-08-28 10:04:51 +0000421 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000422 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000423
Daniel Veillard4948eb42000-08-29 09:41:15 +0000424 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000425 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000426
427 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000428}
429
430/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000431 * cdataDebug:
432 * @ctxt: An XML parser context
433 * @ch: a xmlChar string
434 * @len: the number of xmlChar
435 *
436 * receiving some cdata chars from the parser.
437 * Question: how much at a time ???
438 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000439static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000440cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000441{
442 unsigned char output[40];
443 int inlen = len, outlen = 30;
444
445 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
446 output[outlen] = 0;
447
448 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
449}
450
451/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000452 * referenceDebug:
453 * @ctxt: An XML parser context
454 * @name: The entity name
455 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800456 * called when an entity reference is detected.
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000457 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000458static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000459referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000460{
461 fprintf(stdout, "SAX.reference(%s)\n", name);
462}
463
464/**
465 * ignorableWhitespaceDebug:
466 * @ctxt: An XML parser context
467 * @ch: a xmlChar string
468 * @start: the first char in the string
469 * @len: the number of xmlChar
470 *
471 * receiving some ignorable whitespaces from the parser.
472 * Question: how much at a time ???
473 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000474static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000475ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000476{
Daniel Veillard87b95392000-08-12 21:12:04 +0000477 char output[40];
478 int i;
479
480 for (i = 0;(i<len) && (i < 30);i++)
481 output[i] = ch[i];
482 output[i] = 0;
483
484 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000485}
486
487/**
488 * processingInstructionDebug:
489 * @ctxt: An XML parser context
490 * @target: the target name
491 * @data: the PI data's
492 * @len: the number of xmlChar
493 *
494 * A processing instruction has been parsed.
495 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000496static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000497processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000498 const xmlChar *data)
499{
500 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
501 (char *) target, (char *) data);
502}
503
504/**
505 * commentDebug:
506 * @ctxt: An XML parser context
507 * @value: the comment content
508 *
509 * A comment has been parsed.
510 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000511static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000512commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000513{
514 fprintf(stdout, "SAX.comment(%s)\n", value);
515}
516
517/**
518 * warningDebug:
519 * @ctxt: An XML parser context
520 * @msg: the message to display/transmit
521 * @...: extra parameters for the message display
522 *
523 * Display and format a warning messages, gives file, line, position and
524 * extra parameters.
525 */
Daniel Veillardffa3c742005-07-21 13:24:09 +0000526static void XMLCDECL
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000527warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000528{
529 va_list args;
530
531 va_start(args, msg);
532 fprintf(stdout, "SAX.warning: ");
533 vfprintf(stdout, msg, args);
534 va_end(args);
535}
536
537/**
538 * errorDebug:
539 * @ctxt: An XML parser context
540 * @msg: the message to display/transmit
541 * @...: extra parameters for the message display
542 *
543 * Display and format a error messages, gives file, line, position and
544 * extra parameters.
545 */
Daniel Veillardffa3c742005-07-21 13:24:09 +0000546static void XMLCDECL
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000547errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000548{
549 va_list args;
550
551 va_start(args, msg);
552 fprintf(stdout, "SAX.error: ");
553 vfprintf(stdout, msg, args);
554 va_end(args);
555}
556
557/**
558 * fatalErrorDebug:
559 * @ctxt: An XML parser context
560 * @msg: the message to display/transmit
561 * @...: extra parameters for the message display
562 *
563 * Display and format a fatalError messages, gives file, line, position and
564 * extra parameters.
565 */
Daniel Veillardffa3c742005-07-21 13:24:09 +0000566static void XMLCDECL
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000567fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000568{
569 va_list args;
570
571 va_start(args, msg);
572 fprintf(stdout, "SAX.fatalError: ");
573 vfprintf(stdout, msg, args);
574 va_end(args);
575}
576
Daniel Veillard24505b02005-07-28 23:49:35 +0000577static xmlSAXHandler debugSAXHandlerStruct = {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000578 internalSubsetDebug,
579 isStandaloneDebug,
580 hasInternalSubsetDebug,
581 hasExternalSubsetDebug,
582 resolveEntityDebug,
583 getEntityDebug,
584 entityDeclDebug,
585 notationDeclDebug,
586 attributeDeclDebug,
587 elementDeclDebug,
588 unparsedEntityDeclDebug,
589 setDocumentLocatorDebug,
590 startDocumentDebug,
591 endDocumentDebug,
592 startElementDebug,
593 endElementDebug,
594 referenceDebug,
595 charactersDebug,
596 ignorableWhitespaceDebug,
597 processingInstructionDebug,
598 commentDebug,
599 warningDebug,
600 errorDebug,
601 fatalErrorDebug,
602 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000603 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000604 NULL,
Daniel Veillard092643b2003-09-25 14:29:29 +0000605 1,
606 NULL,
607 NULL,
William M. Brack871611b2003-10-18 04:53:14 +0000608 NULL,
Daniel Veillard092643b2003-09-25 14:29:29 +0000609 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000610};
611
612xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000613/************************************************************************
614 * *
615 * Debug *
616 * *
617 ************************************************************************/
618
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000619static void
620parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000621 htmlDocPtr doc = NULL;
622
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000623 /*
624 * Empty callbacks for checking
625 */
Daniel Veillard73b013f2003-09-30 12:36:01 +0000626#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard87b95392000-08-12 21:12:04 +0000627 if (push) {
628 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000629
William M. Brack3403add2004-06-27 02:07:51 +0000630#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
631 f = fopen(filename, "rb");
632#else
Daniel Veillard87b95392000-08-12 21:12:04 +0000633 f = fopen(filename, "r");
William M. Brack3403add2004-06-27 02:07:51 +0000634#endif
Daniel Veillard87b95392000-08-12 21:12:04 +0000635 if (f != NULL) {
636 int res, size = 3;
637 char chars[4096];
638 htmlParserCtxtPtr ctxt;
639
640 /* if (repeat) */
641 size = 4096;
642 res = fread(chars, 1, 4, f);
643 if (res > 0) {
644 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000645 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000646 while ((res = fread(chars, 1, size, f)) > 0) {
647 htmlParseChunk(ctxt, chars, res, 0);
648 }
649 htmlParseChunk(ctxt, chars, 0, 1);
650 doc = ctxt->myDoc;
651 htmlFreeParserCtxt(ctxt);
652 }
653 if (doc != NULL) {
654 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
655 xmlFreeDoc(doc);
656 }
657 fclose(f);
658 }
659 if (!noout) {
William M. Brack3403add2004-06-27 02:07:51 +0000660#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
661 f = fopen(filename, "rb");
662#else
663 f = fopen(filename, "r");
664#endif
Daniel Veillard87b95392000-08-12 21:12:04 +0000665 if (f != NULL) {
666 int res, size = 3;
667 char chars[4096];
668 htmlParserCtxtPtr ctxt;
669
670 /* if (repeat) */
671 size = 4096;
672 res = fread(chars, 1, 4, f);
673 if (res > 0) {
674 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000675 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000676 while ((res = fread(chars, 1, size, f)) > 0) {
677 htmlParseChunk(ctxt, chars, res, 0);
678 }
679 htmlParseChunk(ctxt, chars, 0, 1);
680 doc = ctxt->myDoc;
681 htmlFreeParserCtxt(ctxt);
682 }
683 if (doc != NULL) {
684 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
685 xmlFreeDoc(doc);
686 }
687 fclose(f);
688 }
689 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800690 } else {
Daniel Veillard73b013f2003-09-30 12:36:01 +0000691#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard87b95392000-08-12 21:12:04 +0000692 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000693 if (doc != NULL) {
694 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
695 xmlFreeDoc(doc);
696 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000697
698 if (!noout) {
699 /*
700 * Debug callback
701 */
702 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
703 if (doc != NULL) {
704 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
705 xmlFreeDoc(doc);
706 }
707 }
Daniel Veillard73b013f2003-09-30 12:36:01 +0000708#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000709 }
Daniel Veillard73b013f2003-09-30 12:36:01 +0000710#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000711}
712
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000713static void
714parseAndPrintFile(char *filename) {
Daniel Veillard2156d432004-03-04 15:59:36 +0000715 htmlDocPtr doc = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000716
717 /*
718 * build an HTML tree from a string;
719 */
Daniel Veillardc2c0d142004-01-13 20:51:03 +0000720#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000721 if (push) {
722 FILE *f;
723
William M. Brack3403add2004-06-27 02:07:51 +0000724#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
725 f = fopen(filename, "rb");
726#else
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000727 f = fopen(filename, "r");
William M. Brack3403add2004-06-27 02:07:51 +0000728#endif
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000729 if (f != NULL) {
730 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000731 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000732 htmlParserCtxtPtr ctxt;
733
Daniel Veillard87b95392000-08-12 21:12:04 +0000734 /* if (repeat) */
735 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000736 res = fread(chars, 1, 4, f);
737 if (res > 0) {
738 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000739 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000740 while ((res = fread(chars, 1, size, f)) > 0) {
741 htmlParseChunk(ctxt, chars, res, 0);
742 }
743 htmlParseChunk(ctxt, chars, 0, 1);
744 doc = ctxt->myDoc;
745 htmlFreeParserCtxt(ctxt);
746 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000747 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000748 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800749 } else {
Daniel Veillard9475a352003-09-26 12:47:50 +0000750 doc = htmlReadFile(filename, NULL, options);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000751 }
Daniel Veillardc2c0d142004-01-13 20:51:03 +0000752#else
753 doc = htmlReadFile(filename,NULL,options);
754#endif
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000755 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000756 xmlGenericError(xmlGenericErrorContext,
757 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000758 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000759
Daniel Veillard2156d432004-03-04 15:59:36 +0000760#ifdef LIBXML_TREE_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000761 /*
762 * test intermediate copy if needed.
763 */
764 if (copy) {
Daniel Veillard2156d432004-03-04 15:59:36 +0000765 htmlDocPtr tmp;
766
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000767 tmp = doc;
768 doc = xmlCopyDoc(doc, 1);
769 xmlFreeDoc(tmp);
770 }
Daniel Veillard2156d432004-03-04 15:59:36 +0000771#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000772
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000773#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000774 /*
775 * print it.
776 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800777 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000778#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000779 if (!debug) {
780 if (encoding)
781 htmlSaveFileEnc("-", doc, encoding);
782 else
783 htmlDocDump(stdout, doc);
784 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000785 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000786#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000787 if (encoding)
788 htmlSaveFileEnc("-", doc, encoding);
789 else
790 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000791#endif
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800792 }
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000793#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000794
795 /*
796 * free it.
797 */
798 xmlFreeDoc(doc);
799}
800
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000801int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000802 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000803 int files = 0;
804
805 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000806#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000807 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
808 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000809 else
810#endif
811 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000812 copy++;
Daniel Veillard73b013f2003-09-30 12:36:01 +0000813#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000814 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
815 push++;
Daniel Veillard73b013f2003-09-30 12:36:01 +0000816#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000817 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
818 sax++;
819 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
820 noout++;
821 else if ((!strcmp(argv[i], "-repeat")) ||
822 (!strcmp(argv[i], "--repeat")))
823 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000824 else if ((!strcmp(argv[i], "-encode")) ||
825 (!strcmp(argv[i], "--encode"))) {
826 i++;
827 encoding = argv[i];
828 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000829 }
830 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000831 if ((!strcmp(argv[i], "-encode")) ||
832 (!strcmp(argv[i], "--encode"))) {
833 i++;
834 continue;
835 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000836 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000837 if (repeat) {
838 for (count = 0;count < 100 * repeat;count++) {
839 if (sax)
840 parseSAXFile(argv[i]);
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800841 else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000842 parseAndPrintFile(argv[i]);
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800843 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000844 } else {
845 if (sax)
846 parseSAXFile(argv[i]);
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800847 else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000848 parseAndPrintFile(argv[i]);
849 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000850 files ++;
851 }
852 }
853 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000854 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000855 argv[0]);
856 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000857#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000858 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000859#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000860 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000861 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000862 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000863 printf("\t--noout : do not print the result\n");
Daniel Veillard73b013f2003-09-30 12:36:01 +0000864#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000865 printf("\t--push : use the push mode parser\n");
Daniel Veillard73b013f2003-09-30 12:36:01 +0000866#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000867 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000868 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000869 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000870 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000871
872 return(0);
873}
Daniel Veillard361d8452000-04-03 19:48:13 +0000874#else /* !LIBXML_HTML_ENABLED */
875#include <stdio.h>
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000876int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000877 printf("%s : HTML support not compiled in\n", argv[0]);
878 return(0);
879}
880#endif