blob: 4a6074ac1b3d264cd5dfc3bc6c46b1297c1a1bbf [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000038#include <libxml/globals.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000041static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000042#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +000047#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +000048static int push = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +000049#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard32bc74e2000-07-14 14:49:25 +000050static char *encoding = NULL;
Daniel Veillard9475a352003-09-26 12:47:50 +000051static int options = 0;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000052
Daniel Veillard7c1206f1999-10-14 09:10:25 +000053xmlSAXHandler emptySAXHandlerStruct = {
54 NULL, /* internalSubset */
55 NULL, /* isStandalone */
56 NULL, /* hasInternalSubset */
57 NULL, /* hasExternalSubset */
58 NULL, /* resolveEntity */
59 NULL, /* getEntity */
60 NULL, /* entityDecl */
61 NULL, /* notationDecl */
62 NULL, /* attributeDecl */
63 NULL, /* elementDecl */
64 NULL, /* unparsedEntityDecl */
65 NULL, /* setDocumentLocator */
66 NULL, /* startDocument */
67 NULL, /* endDocument */
68 NULL, /* startElement */
69 NULL, /* endElement */
70 NULL, /* reference */
71 NULL, /* characters */
72 NULL, /* ignorableWhitespace */
73 NULL, /* processingInstruction */
74 NULL, /* comment */
75 NULL, /* xmlParserWarning */
76 NULL, /* xmlParserError */
77 NULL, /* xmlParserError */
78 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000079 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000080 NULL, /* externalSubset */
Daniel Veillard092643b2003-09-25 14:29:29 +000081 1,
82 NULL,
83 NULL,
84 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +000085};
86
87xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
88extern xmlSAXHandlerPtr debugSAXHandler;
89
90/************************************************************************
91 * *
92 * Debug Handlers *
93 * *
94 ************************************************************************/
95
96/**
97 * isStandaloneDebug:
98 * @ctxt: An XML parser context
99 *
100 * Is this document tagged standalone ?
101 *
102 * Returns 1 if true
103 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000104static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000105isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000106{
107 fprintf(stdout, "SAX.isStandalone()\n");
108 return(0);
109}
110
111/**
112 * hasInternalSubsetDebug:
113 * @ctxt: An XML parser context
114 *
115 * Does this document has an internal subset
116 *
117 * Returns 1 if true
118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000119static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000120hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000121{
122 fprintf(stdout, "SAX.hasInternalSubset()\n");
123 return(0);
124}
125
126/**
127 * hasExternalSubsetDebug:
128 * @ctxt: An XML parser context
129 *
130 * Does this document has an external subset
131 *
132 * Returns 1 if true
133 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000134static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000135hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000136{
137 fprintf(stdout, "SAX.hasExternalSubset()\n");
138 return(0);
139}
140
141/**
142 * hasInternalSubsetDebug:
143 * @ctxt: An XML parser context
144 *
145 * Does this document has an internal subset
146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000148internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000149 const xmlChar *ExternalID, const xmlChar *SystemID)
150{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000151 fprintf(stdout, "SAX.internalSubset(%s,", name);
152 if (ExternalID == NULL)
153 fprintf(stdout, " ,");
154 else
155 fprintf(stdout, " %s,", ExternalID);
156 if (SystemID == NULL)
157 fprintf(stdout, " )\n");
158 else
159 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000160}
161
162/**
163 * resolveEntityDebug:
164 * @ctxt: An XML parser context
165 * @publicId: The public ID of the entity
166 * @systemId: The system ID of the entity
167 *
168 * Special entity resolver, better left to the parser, it has
169 * more context than the application layer.
170 * The default behaviour is to NOT resolve the entities, in that case
171 * the ENTITY_REF nodes are built in the structure (and the parameter
172 * values).
173 *
174 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
175 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000176static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000177resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000178{
179 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
180
181
182 fprintf(stdout, "SAX.resolveEntity(");
183 if (publicId != NULL)
184 fprintf(stdout, "%s", (char *)publicId);
185 else
186 fprintf(stdout, " ");
187 if (systemId != NULL)
188 fprintf(stdout, ", %s)\n", (char *)systemId);
189 else
190 fprintf(stdout, ", )\n");
191/*********
192 if (systemId != NULL) {
193 return(xmlNewInputFromFile(ctxt, (char *) systemId));
194 }
195 *********/
196 return(NULL);
197}
198
199/**
200 * getEntityDebug:
201 * @ctxt: An XML parser context
202 * @name: The entity name
203 *
204 * Get an entity by name
205 *
206 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
207 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000209getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000210{
211 fprintf(stdout, "SAX.getEntity(%s)\n", name);
212 return(NULL);
213}
214
215/**
216 * getParameterEntityDebug:
217 * @ctxt: An XML parser context
218 * @name: The entity name
219 *
220 * Get a parameter entity by name
221 *
222 * Returns the xmlParserInputPtr
223 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000225getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000226{
227 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
228 return(NULL);
229}
230
231
232/**
233 * entityDeclDebug:
234 * @ctxt: An XML parser context
235 * @name: the entity name
236 * @type: the entity type
237 * @publicId: The public ID of the entity
238 * @systemId: The system ID of the entity
239 * @content: the entity value (without processing).
240 *
241 * An entity definition has been parsed
242 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000243static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000244entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000245 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
246{
247 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
248 name, type, publicId, systemId, content);
249}
250
251/**
252 * attributeDeclDebug:
253 * @ctxt: An XML parser context
254 * @name: the attribute name
255 * @type: the attribute type
256 *
257 * An attribute definition has been parsed
258 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000259static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000260attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000261 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000262 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000263{
264 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
265 elem, name, type, def, defaultValue);
266}
267
268/**
269 * elementDeclDebug:
270 * @ctxt: An XML parser context
271 * @name: the element name
272 * @type: the element type
273 * @content: the element value (without processing).
274 *
275 * An element definition has been parsed
276 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000277static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000278elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
279 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000280{
281 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
282 name, type);
283}
284
285/**
286 * notationDeclDebug:
287 * @ctxt: An XML parser context
288 * @name: The name of the notation
289 * @publicId: The public ID of the entity
290 * @systemId: The system ID of the entity
291 *
292 * What to do when a notation declaration has been parsed.
293 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000295notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000296 const xmlChar *publicId, const xmlChar *systemId)
297{
298 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
299 (char *) name, (char *) publicId, (char *) systemId);
300}
301
302/**
303 * unparsedEntityDeclDebug:
304 * @ctxt: An XML parser context
305 * @name: The name of the entity
306 * @publicId: The public ID of the entity
307 * @systemId: The system ID of the entity
308 * @notationName: the name of the notation
309 *
310 * What to do when an unparsed entity declaration is parsed
311 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000312static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000313unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000314 const xmlChar *publicId, const xmlChar *systemId,
315 const xmlChar *notationName)
316{
317 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
318 (char *) name, (char *) publicId, (char *) systemId,
319 (char *) notationName);
320}
321
322/**
323 * setDocumentLocatorDebug:
324 * @ctxt: An XML parser context
325 * @loc: A SAX Locator
326 *
327 * Receive the document locator at startup, actually xmlDefaultSAXLocator
328 * Everything is available on the context, so this is useless in our case.
329 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000330static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000331setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000332{
333 fprintf(stdout, "SAX.setDocumentLocator()\n");
334}
335
336/**
337 * startDocumentDebug:
338 * @ctxt: An XML parser context
339 *
340 * called when the document start being processed.
341 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000342static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000343startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000344{
345 fprintf(stdout, "SAX.startDocument()\n");
346}
347
348/**
349 * endDocumentDebug:
350 * @ctxt: An XML parser context
351 *
352 * called when the document end has been detected.
353 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000354static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000355endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000356{
357 fprintf(stdout, "SAX.endDocument()\n");
358}
359
360/**
361 * startElementDebug:
362 * @ctxt: An XML parser context
363 * @name: The element name
364 *
365 * called when an opening tag has been processed.
366 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000367static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000368startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000369{
370 int i;
371
372 fprintf(stdout, "SAX.startElement(%s", (char *) name);
373 if (atts != NULL) {
374 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000375 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000376 if (atts[i] != NULL) {
377 unsigned char output[40];
378 const unsigned char *att = atts[i];
379 int outlen, attlen;
380 fprintf(stdout, "='");
381 while ((attlen = strlen((char*)att)) > 0) {
382 outlen = sizeof output - 1;
383 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
Daniel Veillard5f704af2003-03-05 10:01:43 +0000384 output[outlen] = 0;
William M. Brackc1939562003-08-05 15:52:22 +0000385 fprintf(stdout, "%s", (char *) output);
Daniel Veillarde010c172000-08-28 10:04:51 +0000386 att += attlen;
387 }
388 fprintf(stdout, "'");
389 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000390 }
391 }
392 fprintf(stdout, ")\n");
393}
394
395/**
396 * endElementDebug:
397 * @ctxt: An XML parser context
398 * @name: The element name
399 *
400 * called when the end of an element has been detected.
401 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000402static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000403endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000404{
405 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
406}
407
408/**
409 * charactersDebug:
410 * @ctxt: An XML parser context
411 * @ch: a xmlChar string
412 * @len: the number of xmlChar
413 *
414 * receiving some chars from the parser.
415 * Question: how much at a time ???
416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000417static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000418charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000419{
Daniel Veillarde010c172000-08-28 10:04:51 +0000420 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000421 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000422
Daniel Veillard4948eb42000-08-29 09:41:15 +0000423 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000424 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000425
426 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000427}
428
429/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000430 * cdataDebug:
431 * @ctxt: An XML parser context
432 * @ch: a xmlChar string
433 * @len: the number of xmlChar
434 *
435 * receiving some cdata chars from the parser.
436 * Question: how much at a time ???
437 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000438static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000439cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000440{
441 unsigned char output[40];
442 int inlen = len, outlen = 30;
443
444 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
445 output[outlen] = 0;
446
447 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
448}
449
450/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000451 * referenceDebug:
452 * @ctxt: An XML parser context
453 * @name: The entity name
454 *
455 * called when an entity reference is detected.
456 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000457static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000458referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000459{
460 fprintf(stdout, "SAX.reference(%s)\n", name);
461}
462
463/**
464 * ignorableWhitespaceDebug:
465 * @ctxt: An XML parser context
466 * @ch: a xmlChar string
467 * @start: the first char in the string
468 * @len: the number of xmlChar
469 *
470 * receiving some ignorable whitespaces from the parser.
471 * Question: how much at a time ???
472 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000473static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000474ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000475{
Daniel Veillard87b95392000-08-12 21:12:04 +0000476 char output[40];
477 int i;
478
479 for (i = 0;(i<len) && (i < 30);i++)
480 output[i] = ch[i];
481 output[i] = 0;
482
483 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000484}
485
486/**
487 * processingInstructionDebug:
488 * @ctxt: An XML parser context
489 * @target: the target name
490 * @data: the PI data's
491 * @len: the number of xmlChar
492 *
493 * A processing instruction has been parsed.
494 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000495static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000496processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000497 const xmlChar *data)
498{
499 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
500 (char *) target, (char *) data);
501}
502
503/**
504 * commentDebug:
505 * @ctxt: An XML parser context
506 * @value: the comment content
507 *
508 * A comment has been parsed.
509 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000510static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000511commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000512{
513 fprintf(stdout, "SAX.comment(%s)\n", value);
514}
515
516/**
517 * warningDebug:
518 * @ctxt: An XML parser context
519 * @msg: the message to display/transmit
520 * @...: extra parameters for the message display
521 *
522 * Display and format a warning messages, gives file, line, position and
523 * extra parameters.
524 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000525static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000526warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000527{
528 va_list args;
529
530 va_start(args, msg);
531 fprintf(stdout, "SAX.warning: ");
532 vfprintf(stdout, msg, args);
533 va_end(args);
534}
535
536/**
537 * errorDebug:
538 * @ctxt: An XML parser context
539 * @msg: the message to display/transmit
540 * @...: extra parameters for the message display
541 *
542 * Display and format a error messages, gives file, line, position and
543 * extra parameters.
544 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000545static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000546errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000547{
548 va_list args;
549
550 va_start(args, msg);
551 fprintf(stdout, "SAX.error: ");
552 vfprintf(stdout, msg, args);
553 va_end(args);
554}
555
556/**
557 * fatalErrorDebug:
558 * @ctxt: An XML parser context
559 * @msg: the message to display/transmit
560 * @...: extra parameters for the message display
561 *
562 * Display and format a fatalError messages, gives file, line, position and
563 * extra parameters.
564 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000565static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000566fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000567{
568 va_list args;
569
570 va_start(args, msg);
571 fprintf(stdout, "SAX.fatalError: ");
572 vfprintf(stdout, msg, args);
573 va_end(args);
574}
575
576xmlSAXHandler debugSAXHandlerStruct = {
577 internalSubsetDebug,
578 isStandaloneDebug,
579 hasInternalSubsetDebug,
580 hasExternalSubsetDebug,
581 resolveEntityDebug,
582 getEntityDebug,
583 entityDeclDebug,
584 notationDeclDebug,
585 attributeDeclDebug,
586 elementDeclDebug,
587 unparsedEntityDeclDebug,
588 setDocumentLocatorDebug,
589 startDocumentDebug,
590 endDocumentDebug,
591 startElementDebug,
592 endElementDebug,
593 referenceDebug,
594 charactersDebug,
595 ignorableWhitespaceDebug,
596 processingInstructionDebug,
597 commentDebug,
598 warningDebug,
599 errorDebug,
600 fatalErrorDebug,
601 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000602 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000603 NULL,
Daniel Veillard092643b2003-09-25 14:29:29 +0000604 1,
605 NULL,
606 NULL,
607 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000608};
609
610xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000611/************************************************************************
612 * *
613 * Debug *
614 * *
615 ************************************************************************/
616
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000617static void
618parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000619 htmlDocPtr doc = NULL;
620
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000621 /*
622 * Empty callbacks for checking
623 */
Daniel Veillard73b013f2003-09-30 12:36:01 +0000624#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard87b95392000-08-12 21:12:04 +0000625 if (push) {
626 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000627
Daniel Veillard87b95392000-08-12 21:12:04 +0000628 f = fopen(filename, "r");
629 if (f != NULL) {
630 int res, size = 3;
631 char chars[4096];
632 htmlParserCtxtPtr ctxt;
633
634 /* if (repeat) */
635 size = 4096;
636 res = fread(chars, 1, 4, f);
637 if (res > 0) {
638 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000639 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000640 while ((res = fread(chars, 1, size, f)) > 0) {
641 htmlParseChunk(ctxt, chars, res, 0);
642 }
643 htmlParseChunk(ctxt, chars, 0, 1);
644 doc = ctxt->myDoc;
645 htmlFreeParserCtxt(ctxt);
646 }
647 if (doc != NULL) {
648 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
649 xmlFreeDoc(doc);
650 }
651 fclose(f);
652 }
653 if (!noout) {
654 f = fopen(filename, "r");
655 if (f != NULL) {
656 int res, size = 3;
657 char chars[4096];
658 htmlParserCtxtPtr ctxt;
659
660 /* if (repeat) */
661 size = 4096;
662 res = fread(chars, 1, 4, f);
663 if (res > 0) {
664 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000665 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000666 while ((res = fread(chars, 1, size, f)) > 0) {
667 htmlParseChunk(ctxt, chars, res, 0);
668 }
669 htmlParseChunk(ctxt, chars, 0, 1);
670 doc = ctxt->myDoc;
671 htmlFreeParserCtxt(ctxt);
672 }
673 if (doc != NULL) {
674 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
675 xmlFreeDoc(doc);
676 }
677 fclose(f);
678 }
679 }
680 } else {
Daniel Veillard73b013f2003-09-30 12:36:01 +0000681#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard87b95392000-08-12 21:12:04 +0000682 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000683 if (doc != NULL) {
684 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
685 xmlFreeDoc(doc);
686 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000687
688 if (!noout) {
689 /*
690 * Debug callback
691 */
692 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
693 if (doc != NULL) {
694 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
695 xmlFreeDoc(doc);
696 }
697 }
Daniel Veillard73b013f2003-09-30 12:36:01 +0000698#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000699 }
Daniel Veillard73b013f2003-09-30 12:36:01 +0000700#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000701}
702
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000703static void
704parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000705 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000706
707 /*
708 * build an HTML tree from a string;
709 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000710 if (push) {
711 FILE *f;
712
713 f = fopen(filename, "r");
714 if (f != NULL) {
715 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000716 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000717 htmlParserCtxtPtr ctxt;
718
Daniel Veillard87b95392000-08-12 21:12:04 +0000719 /* if (repeat) */
720 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000721 res = fread(chars, 1, 4, f);
722 if (res > 0) {
723 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000724 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000725 while ((res = fread(chars, 1, size, f)) > 0) {
726 htmlParseChunk(ctxt, chars, res, 0);
727 }
728 htmlParseChunk(ctxt, chars, 0, 1);
729 doc = ctxt->myDoc;
730 htmlFreeParserCtxt(ctxt);
731 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000732 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000733 }
734 } else {
Daniel Veillard9475a352003-09-26 12:47:50 +0000735 doc = htmlReadFile(filename, NULL, options);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000736 }
737 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000738 xmlGenericError(xmlGenericErrorContext,
739 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000740 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000741
742 /*
743 * test intermediate copy if needed.
744 */
745 if (copy) {
746 tmp = doc;
747 doc = xmlCopyDoc(doc, 1);
748 xmlFreeDoc(tmp);
749 }
750
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000751#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000752 /*
753 * print it.
754 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000755 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000756#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000757 if (!debug) {
758 if (encoding)
759 htmlSaveFileEnc("-", doc, encoding);
760 else
761 htmlDocDump(stdout, doc);
762 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000763 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000764#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000765 if (encoding)
766 htmlSaveFileEnc("-", doc, encoding);
767 else
768 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000769#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000770 }
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000771#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000772
773 /*
774 * free it.
775 */
776 xmlFreeDoc(doc);
777}
778
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000779int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000780 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000781 int files = 0;
782
783 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000784#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000785 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
786 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000787 else
788#endif
789 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000790 copy++;
Daniel Veillard73b013f2003-09-30 12:36:01 +0000791#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000792 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
793 push++;
Daniel Veillard73b013f2003-09-30 12:36:01 +0000794#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000795 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
796 sax++;
797 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
798 noout++;
799 else if ((!strcmp(argv[i], "-repeat")) ||
800 (!strcmp(argv[i], "--repeat")))
801 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000802 else if ((!strcmp(argv[i], "-encode")) ||
803 (!strcmp(argv[i], "--encode"))) {
804 i++;
805 encoding = argv[i];
806 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000807 }
808 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000809 if ((!strcmp(argv[i], "-encode")) ||
810 (!strcmp(argv[i], "--encode"))) {
811 i++;
812 continue;
813 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000814 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000815 if (repeat) {
816 for (count = 0;count < 100 * repeat;count++) {
817 if (sax)
818 parseSAXFile(argv[i]);
819 else
820 parseAndPrintFile(argv[i]);
821 }
822 } else {
823 if (sax)
824 parseSAXFile(argv[i]);
825 else
826 parseAndPrintFile(argv[i]);
827 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000828 files ++;
829 }
830 }
831 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000832 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000833 argv[0]);
834 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000835#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000836 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000837#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000838 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000839 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000840 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000841 printf("\t--noout : do not print the result\n");
Daniel Veillard73b013f2003-09-30 12:36:01 +0000842#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000843 printf("\t--push : use the push mode parser\n");
Daniel Veillard73b013f2003-09-30 12:36:01 +0000844#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000845 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000846 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000847 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000848 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000849
850 return(0);
851}
Daniel Veillard361d8452000-04-03 19:48:13 +0000852#else /* !LIBXML_HTML_ENABLED */
853#include <stdio.h>
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000854int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000855 printf("%s : HTML support not compiled in\n", argv[0]);
856 return(0);
857}
858#endif