blob: ba33aaa7a00eb794b7dcea22c1fbb81401628c05 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardc2def842000-11-07 14:21:01 +000011#undef LIBXML_DLL_IMPORT
Daniel Veillardbe70ff71999-07-05 16:50:46 +000012#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000014#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#include <stdio.h>
20#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000021#include <stdarg.h>
22
Daniel Veillard7f7d1111999-09-22 09:46:25 +000023
24#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000025#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000026#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000027#ifdef HAVE_SYS_STAT_H
28#include <sys/stat.h>
29#endif
30#ifdef HAVE_FCNTL_H
31#include <fcntl.h>
32#endif
33#ifdef HAVE_UNISTD_H
34#include <unistd.h>
35#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000036#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000037#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000038#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#include <libxml/xmlmemory.h>
41#include <libxml/HTMLparser.h>
42#include <libxml/HTMLtree.h>
43#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000044#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000045
Daniel Veillard361d8452000-04-03 19:48:13 +000046#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000047static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000048#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000049static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000050static int sax = 0;
51static int repeat = 0;
52static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000053static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000054static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000055
Daniel Veillard7c1206f1999-10-14 09:10:25 +000056xmlSAXHandler emptySAXHandlerStruct = {
57 NULL, /* internalSubset */
58 NULL, /* isStandalone */
59 NULL, /* hasInternalSubset */
60 NULL, /* hasExternalSubset */
61 NULL, /* resolveEntity */
62 NULL, /* getEntity */
63 NULL, /* entityDecl */
64 NULL, /* notationDecl */
65 NULL, /* attributeDecl */
66 NULL, /* elementDecl */
67 NULL, /* unparsedEntityDecl */
68 NULL, /* setDocumentLocator */
69 NULL, /* startDocument */
70 NULL, /* endDocument */
71 NULL, /* startElement */
72 NULL, /* endElement */
73 NULL, /* reference */
74 NULL, /* characters */
75 NULL, /* ignorableWhitespace */
76 NULL, /* processingInstruction */
77 NULL, /* comment */
78 NULL, /* xmlParserWarning */
79 NULL, /* xmlParserError */
80 NULL, /* xmlParserError */
81 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000082 NULL, /* cdataBlock */
83 NULL /* externalSubset */
Daniel Veillard7c1206f1999-10-14 09:10:25 +000084};
85
86xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
87extern xmlSAXHandlerPtr debugSAXHandler;
88
89/************************************************************************
90 * *
91 * Debug Handlers *
92 * *
93 ************************************************************************/
94
95/**
96 * isStandaloneDebug:
97 * @ctxt: An XML parser context
98 *
99 * Is this document tagged standalone ?
100 *
101 * Returns 1 if true
102 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000103static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000104isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000105{
106 fprintf(stdout, "SAX.isStandalone()\n");
107 return(0);
108}
109
110/**
111 * hasInternalSubsetDebug:
112 * @ctxt: An XML parser context
113 *
114 * Does this document has an internal subset
115 *
116 * Returns 1 if true
117 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000118static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000119hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000120{
121 fprintf(stdout, "SAX.hasInternalSubset()\n");
122 return(0);
123}
124
125/**
126 * hasExternalSubsetDebug:
127 * @ctxt: An XML parser context
128 *
129 * Does this document has an external subset
130 *
131 * Returns 1 if true
132 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000133static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000134hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000135{
136 fprintf(stdout, "SAX.hasExternalSubset()\n");
137 return(0);
138}
139
140/**
141 * hasInternalSubsetDebug:
142 * @ctxt: An XML parser context
143 *
144 * Does this document has an internal subset
145 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000146static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000147internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000148 const xmlChar *ExternalID, const xmlChar *SystemID)
149{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000150 fprintf(stdout, "SAX.internalSubset(%s,", name);
151 if (ExternalID == NULL)
152 fprintf(stdout, " ,");
153 else
154 fprintf(stdout, " %s,", ExternalID);
155 if (SystemID == NULL)
156 fprintf(stdout, " )\n");
157 else
158 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000159}
160
161/**
162 * resolveEntityDebug:
163 * @ctxt: An XML parser context
164 * @publicId: The public ID of the entity
165 * @systemId: The system ID of the entity
166 *
167 * Special entity resolver, better left to the parser, it has
168 * more context than the application layer.
169 * The default behaviour is to NOT resolve the entities, in that case
170 * the ENTITY_REF nodes are built in the structure (and the parameter
171 * values).
172 *
173 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
174 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000175static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000176resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000177{
178 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
179
180
181 fprintf(stdout, "SAX.resolveEntity(");
182 if (publicId != NULL)
183 fprintf(stdout, "%s", (char *)publicId);
184 else
185 fprintf(stdout, " ");
186 if (systemId != NULL)
187 fprintf(stdout, ", %s)\n", (char *)systemId);
188 else
189 fprintf(stdout, ", )\n");
190/*********
191 if (systemId != NULL) {
192 return(xmlNewInputFromFile(ctxt, (char *) systemId));
193 }
194 *********/
195 return(NULL);
196}
197
198/**
199 * getEntityDebug:
200 * @ctxt: An XML parser context
201 * @name: The entity name
202 *
203 * Get an entity by name
204 *
205 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
206 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000207static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000208getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000209{
210 fprintf(stdout, "SAX.getEntity(%s)\n", name);
211 return(NULL);
212}
213
214/**
215 * getParameterEntityDebug:
216 * @ctxt: An XML parser context
217 * @name: The entity name
218 *
219 * Get a parameter entity by name
220 *
221 * Returns the xmlParserInputPtr
222 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000223static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000224getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000225{
226 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
227 return(NULL);
228}
229
230
231/**
232 * entityDeclDebug:
233 * @ctxt: An XML parser context
234 * @name: the entity name
235 * @type: the entity type
236 * @publicId: The public ID of the entity
237 * @systemId: The system ID of the entity
238 * @content: the entity value (without processing).
239 *
240 * An entity definition has been parsed
241 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000242static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000243entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000244 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
245{
246 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
247 name, type, publicId, systemId, content);
248}
249
250/**
251 * attributeDeclDebug:
252 * @ctxt: An XML parser context
253 * @name: the attribute name
254 * @type: the attribute type
255 *
256 * An attribute definition has been parsed
257 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000258static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000259attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000260 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000261 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000262{
263 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
264 elem, name, type, def, defaultValue);
265}
266
267/**
268 * elementDeclDebug:
269 * @ctxt: An XML parser context
270 * @name: the element name
271 * @type: the element type
272 * @content: the element value (without processing).
273 *
274 * An element definition has been parsed
275 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000276static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000277elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
278 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000279{
280 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
281 name, type);
282}
283
284/**
285 * notationDeclDebug:
286 * @ctxt: An XML parser context
287 * @name: The name of the notation
288 * @publicId: The public ID of the entity
289 * @systemId: The system ID of the entity
290 *
291 * What to do when a notation declaration has been parsed.
292 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000293static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000294notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000295 const xmlChar *publicId, const xmlChar *systemId)
296{
297 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
298 (char *) name, (char *) publicId, (char *) systemId);
299}
300
301/**
302 * unparsedEntityDeclDebug:
303 * @ctxt: An XML parser context
304 * @name: The name of the entity
305 * @publicId: The public ID of the entity
306 * @systemId: The system ID of the entity
307 * @notationName: the name of the notation
308 *
309 * What to do when an unparsed entity declaration is parsed
310 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000311static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000312unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000313 const xmlChar *publicId, const xmlChar *systemId,
314 const xmlChar *notationName)
315{
316 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
317 (char *) name, (char *) publicId, (char *) systemId,
318 (char *) notationName);
319}
320
321/**
322 * setDocumentLocatorDebug:
323 * @ctxt: An XML parser context
324 * @loc: A SAX Locator
325 *
326 * Receive the document locator at startup, actually xmlDefaultSAXLocator
327 * Everything is available on the context, so this is useless in our case.
328 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000329static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000330setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000331{
332 fprintf(stdout, "SAX.setDocumentLocator()\n");
333}
334
335/**
336 * startDocumentDebug:
337 * @ctxt: An XML parser context
338 *
339 * called when the document start being processed.
340 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000341static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000342startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000343{
344 fprintf(stdout, "SAX.startDocument()\n");
345}
346
347/**
348 * endDocumentDebug:
349 * @ctxt: An XML parser context
350 *
351 * called when the document end has been detected.
352 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000353static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000354endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000355{
356 fprintf(stdout, "SAX.endDocument()\n");
357}
358
359/**
360 * startElementDebug:
361 * @ctxt: An XML parser context
362 * @name: The element name
363 *
364 * called when an opening tag has been processed.
365 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000366static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000367startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000368{
369 int i;
370
371 fprintf(stdout, "SAX.startElement(%s", (char *) name);
372 if (atts != NULL) {
373 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000374 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000375 if (atts[i] != NULL) {
376 unsigned char output[40];
377 const unsigned char *att = atts[i];
378 int outlen, attlen;
379 fprintf(stdout, "='");
380 while ((attlen = strlen((char*)att)) > 0) {
381 outlen = sizeof output - 1;
382 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
383 fprintf(stdout, "%.*s", outlen, output);
384 att += attlen;
385 }
386 fprintf(stdout, "'");
387 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000388 }
389 }
390 fprintf(stdout, ")\n");
391}
392
393/**
394 * endElementDebug:
395 * @ctxt: An XML parser context
396 * @name: The element name
397 *
398 * called when the end of an element has been detected.
399 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000400static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000401endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000402{
403 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
404}
405
406/**
407 * charactersDebug:
408 * @ctxt: An XML parser context
409 * @ch: a xmlChar string
410 * @len: the number of xmlChar
411 *
412 * receiving some chars from the parser.
413 * Question: how much at a time ???
414 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000415static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000416charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000417{
Daniel Veillarde010c172000-08-28 10:04:51 +0000418 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000419 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000420
Daniel Veillard4948eb42000-08-29 09:41:15 +0000421 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000422 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000423
424 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000425}
426
427/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000428 * cdataDebug:
429 * @ctxt: An XML parser context
430 * @ch: a xmlChar string
431 * @len: the number of xmlChar
432 *
433 * receiving some cdata chars from the parser.
434 * Question: how much at a time ???
435 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000436static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000437cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000438{
439 unsigned char output[40];
440 int inlen = len, outlen = 30;
441
442 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
443 output[outlen] = 0;
444
445 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
446}
447
448/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000449 * referenceDebug:
450 * @ctxt: An XML parser context
451 * @name: The entity name
452 *
453 * called when an entity reference is detected.
454 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000455static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000456referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000457{
458 fprintf(stdout, "SAX.reference(%s)\n", name);
459}
460
461/**
462 * ignorableWhitespaceDebug:
463 * @ctxt: An XML parser context
464 * @ch: a xmlChar string
465 * @start: the first char in the string
466 * @len: the number of xmlChar
467 *
468 * receiving some ignorable whitespaces from the parser.
469 * Question: how much at a time ???
470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000471static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000472ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000473{
Daniel Veillard87b95392000-08-12 21:12:04 +0000474 char output[40];
475 int i;
476
477 for (i = 0;(i<len) && (i < 30);i++)
478 output[i] = ch[i];
479 output[i] = 0;
480
481 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000482}
483
484/**
485 * processingInstructionDebug:
486 * @ctxt: An XML parser context
487 * @target: the target name
488 * @data: the PI data's
489 * @len: the number of xmlChar
490 *
491 * A processing instruction has been parsed.
492 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000493static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000494processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000495 const xmlChar *data)
496{
497 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
498 (char *) target, (char *) data);
499}
500
501/**
502 * commentDebug:
503 * @ctxt: An XML parser context
504 * @value: the comment content
505 *
506 * A comment has been parsed.
507 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000508static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000509commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000510{
511 fprintf(stdout, "SAX.comment(%s)\n", value);
512}
513
514/**
515 * warningDebug:
516 * @ctxt: An XML parser context
517 * @msg: the message to display/transmit
518 * @...: extra parameters for the message display
519 *
520 * Display and format a warning messages, gives file, line, position and
521 * extra parameters.
522 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000523static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000524warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000525{
526 va_list args;
527
528 va_start(args, msg);
529 fprintf(stdout, "SAX.warning: ");
530 vfprintf(stdout, msg, args);
531 va_end(args);
532}
533
534/**
535 * errorDebug:
536 * @ctxt: An XML parser context
537 * @msg: the message to display/transmit
538 * @...: extra parameters for the message display
539 *
540 * Display and format a error messages, gives file, line, position and
541 * extra parameters.
542 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000543static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000544errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000545{
546 va_list args;
547
548 va_start(args, msg);
549 fprintf(stdout, "SAX.error: ");
550 vfprintf(stdout, msg, args);
551 va_end(args);
552}
553
554/**
555 * fatalErrorDebug:
556 * @ctxt: An XML parser context
557 * @msg: the message to display/transmit
558 * @...: extra parameters for the message display
559 *
560 * Display and format a fatalError messages, gives file, line, position and
561 * extra parameters.
562 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000563static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000564fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000565{
566 va_list args;
567
568 va_start(args, msg);
569 fprintf(stdout, "SAX.fatalError: ");
570 vfprintf(stdout, msg, args);
571 va_end(args);
572}
573
574xmlSAXHandler debugSAXHandlerStruct = {
575 internalSubsetDebug,
576 isStandaloneDebug,
577 hasInternalSubsetDebug,
578 hasExternalSubsetDebug,
579 resolveEntityDebug,
580 getEntityDebug,
581 entityDeclDebug,
582 notationDeclDebug,
583 attributeDeclDebug,
584 elementDeclDebug,
585 unparsedEntityDeclDebug,
586 setDocumentLocatorDebug,
587 startDocumentDebug,
588 endDocumentDebug,
589 startElementDebug,
590 endElementDebug,
591 referenceDebug,
592 charactersDebug,
593 ignorableWhitespaceDebug,
594 processingInstructionDebug,
595 commentDebug,
596 warningDebug,
597 errorDebug,
598 fatalErrorDebug,
599 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000600 cdataDebug,
601 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000602};
603
604xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000605/************************************************************************
606 * *
607 * Debug *
608 * *
609 ************************************************************************/
610
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000611static void
612parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000613 htmlDocPtr doc = NULL;
614
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000615 /*
616 * Empty callbacks for checking
617 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000618 if (push) {
619 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000620
Daniel Veillard87b95392000-08-12 21:12:04 +0000621 f = fopen(filename, "r");
622 if (f != NULL) {
623 int res, size = 3;
624 char chars[4096];
625 htmlParserCtxtPtr ctxt;
626
627 /* if (repeat) */
628 size = 4096;
629 res = fread(chars, 1, 4, f);
630 if (res > 0) {
631 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
632 chars, res, filename, 0);
633 while ((res = fread(chars, 1, size, f)) > 0) {
634 htmlParseChunk(ctxt, chars, res, 0);
635 }
636 htmlParseChunk(ctxt, chars, 0, 1);
637 doc = ctxt->myDoc;
638 htmlFreeParserCtxt(ctxt);
639 }
640 if (doc != NULL) {
641 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
642 xmlFreeDoc(doc);
643 }
644 fclose(f);
645 }
646 if (!noout) {
647 f = fopen(filename, "r");
648 if (f != NULL) {
649 int res, size = 3;
650 char chars[4096];
651 htmlParserCtxtPtr ctxt;
652
653 /* if (repeat) */
654 size = 4096;
655 res = fread(chars, 1, 4, f);
656 if (res > 0) {
657 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
658 chars, res, filename, 0);
659 while ((res = fread(chars, 1, size, f)) > 0) {
660 htmlParseChunk(ctxt, chars, res, 0);
661 }
662 htmlParseChunk(ctxt, chars, 0, 1);
663 doc = ctxt->myDoc;
664 htmlFreeParserCtxt(ctxt);
665 }
666 if (doc != NULL) {
667 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
668 xmlFreeDoc(doc);
669 }
670 fclose(f);
671 }
672 }
673 } else {
674 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000675 if (doc != NULL) {
676 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
677 xmlFreeDoc(doc);
678 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000679
680 if (!noout) {
681 /*
682 * Debug callback
683 */
684 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
685 if (doc != NULL) {
686 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
687 xmlFreeDoc(doc);
688 }
689 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000690 }
691}
692
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000693static void
694parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000695 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000696
697 /*
698 * build an HTML tree from a string;
699 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000700 if (push) {
701 FILE *f;
702
703 f = fopen(filename, "r");
704 if (f != NULL) {
705 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000706 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000707 htmlParserCtxtPtr ctxt;
708
Daniel Veillard87b95392000-08-12 21:12:04 +0000709 /* if (repeat) */
710 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000711 res = fread(chars, 1, 4, f);
712 if (res > 0) {
713 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
714 chars, res, filename, 0);
715 while ((res = fread(chars, 1, size, f)) > 0) {
716 htmlParseChunk(ctxt, chars, res, 0);
717 }
718 htmlParseChunk(ctxt, chars, 0, 1);
719 doc = ctxt->myDoc;
720 htmlFreeParserCtxt(ctxt);
721 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000722 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000723 }
724 } else {
725 doc = htmlParseFile(filename, NULL);
726 }
727 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000728 xmlGenericError(xmlGenericErrorContext,
729 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000730 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000731
732 /*
733 * test intermediate copy if needed.
734 */
735 if (copy) {
736 tmp = doc;
737 doc = xmlCopyDoc(doc, 1);
738 xmlFreeDoc(tmp);
739 }
740
741 /*
742 * print it.
743 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000744 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000745#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000746 if (!debug) {
747 if (encoding)
748 htmlSaveFileEnc("-", doc, encoding);
749 else
750 htmlDocDump(stdout, doc);
751 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000752 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000753#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000754 if (encoding)
755 htmlSaveFileEnc("-", doc, encoding);
756 else
757 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000758#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000759 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000760
761 /*
762 * free it.
763 */
764 xmlFreeDoc(doc);
765}
766
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000767int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000768 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000769 int files = 0;
770
771 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000772#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000773 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
774 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000775 else
776#endif
777 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000778 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000779 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
780 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000781 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
782 sax++;
783 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
784 noout++;
785 else if ((!strcmp(argv[i], "-repeat")) ||
786 (!strcmp(argv[i], "--repeat")))
787 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000788 else if ((!strcmp(argv[i], "-encode")) ||
789 (!strcmp(argv[i], "--encode"))) {
790 i++;
791 encoding = argv[i];
792 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000793 }
794 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000795 if ((!strcmp(argv[i], "-encode")) ||
796 (!strcmp(argv[i], "--encode"))) {
797 i++;
798 continue;
799 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000800 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000801 if (repeat) {
802 for (count = 0;count < 100 * repeat;count++) {
803 if (sax)
804 parseSAXFile(argv[i]);
805 else
806 parseAndPrintFile(argv[i]);
807 }
808 } else {
809 if (sax)
810 parseSAXFile(argv[i]);
811 else
812 parseAndPrintFile(argv[i]);
813 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000814 files ++;
815 }
816 }
817 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000818 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000819 argv[0]);
820 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000821#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000822 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000823#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000824 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000825 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000826 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000827 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000828 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000829 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000830 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000831 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000832 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000833
834 return(0);
835}
Daniel Veillard361d8452000-04-03 19:48:13 +0000836#else /* !LIBXML_HTML_ENABLED */
837#include <stdio.h>
838int main(int argc, char **argv) {
839 printf("%s : HTML support not compiled in\n", argv[0]);
840 return(0);
841}
842#endif