blob: 4067fd4d7daf52516c598b09ee51a761f14a0c25 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000010
Daniel Veillard361d8452000-04-03 19:48:13 +000011#ifdef LIBXML_HTML_ENABLED
12
Daniel Veillard7f7d1111999-09-22 09:46:25 +000013#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000014#include <stdarg.h>
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016
17#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000018#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000019#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020#ifdef HAVE_SYS_STAT_H
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000029#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000030#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000032
Daniel Veillard361d8452000-04-03 19:48:13 +000033#include <libxml/xmlmemory.h>
34#include <libxml/HTMLparser.h>
35#include <libxml/HTMLtree.h>
36#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000037#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000038#include <libxml/globals.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000039
Daniel Veillard361d8452000-04-03 19:48:13 +000040#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000041static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000042#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000043static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000044static int sax = 0;
45static int repeat = 0;
46static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000047static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000048static char *encoding = NULL;
Daniel Veillard9475a352003-09-26 12:47:50 +000049static int options = 0;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000050
Daniel Veillard7c1206f1999-10-14 09:10:25 +000051xmlSAXHandler emptySAXHandlerStruct = {
52 NULL, /* internalSubset */
53 NULL, /* isStandalone */
54 NULL, /* hasInternalSubset */
55 NULL, /* hasExternalSubset */
56 NULL, /* resolveEntity */
57 NULL, /* getEntity */
58 NULL, /* entityDecl */
59 NULL, /* notationDecl */
60 NULL, /* attributeDecl */
61 NULL, /* elementDecl */
62 NULL, /* unparsedEntityDecl */
63 NULL, /* setDocumentLocator */
64 NULL, /* startDocument */
65 NULL, /* endDocument */
66 NULL, /* startElement */
67 NULL, /* endElement */
68 NULL, /* reference */
69 NULL, /* characters */
70 NULL, /* ignorableWhitespace */
71 NULL, /* processingInstruction */
72 NULL, /* comment */
73 NULL, /* xmlParserWarning */
74 NULL, /* xmlParserError */
75 NULL, /* xmlParserError */
76 NULL, /* getParameterEntity */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000077 NULL, /* cdataBlock */
Daniel Veillardd0463562001-10-13 09:15:48 +000078 NULL, /* externalSubset */
Daniel Veillard092643b2003-09-25 14:29:29 +000079 1,
80 NULL,
81 NULL,
82 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +000083};
84
85xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
86extern xmlSAXHandlerPtr debugSAXHandler;
87
88/************************************************************************
89 * *
90 * Debug Handlers *
91 * *
92 ************************************************************************/
93
94/**
95 * isStandaloneDebug:
96 * @ctxt: An XML parser context
97 *
98 * Is this document tagged standalone ?
99 *
100 * Returns 1 if true
101 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000102static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000103isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000104{
105 fprintf(stdout, "SAX.isStandalone()\n");
106 return(0);
107}
108
109/**
110 * hasInternalSubsetDebug:
111 * @ctxt: An XML parser context
112 *
113 * Does this document has an internal subset
114 *
115 * Returns 1 if true
116 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000117static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000118hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000119{
120 fprintf(stdout, "SAX.hasInternalSubset()\n");
121 return(0);
122}
123
124/**
125 * hasExternalSubsetDebug:
126 * @ctxt: An XML parser context
127 *
128 * Does this document has an external subset
129 *
130 * Returns 1 if true
131 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000132static int
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000133hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000134{
135 fprintf(stdout, "SAX.hasExternalSubset()\n");
136 return(0);
137}
138
139/**
140 * hasInternalSubsetDebug:
141 * @ctxt: An XML parser context
142 *
143 * Does this document has an internal subset
144 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000145static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000146internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000147 const xmlChar *ExternalID, const xmlChar *SystemID)
148{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000149 fprintf(stdout, "SAX.internalSubset(%s,", name);
150 if (ExternalID == NULL)
151 fprintf(stdout, " ,");
152 else
153 fprintf(stdout, " %s,", ExternalID);
154 if (SystemID == NULL)
155 fprintf(stdout, " )\n");
156 else
157 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000158}
159
160/**
161 * resolveEntityDebug:
162 * @ctxt: An XML parser context
163 * @publicId: The public ID of the entity
164 * @systemId: The system ID of the entity
165 *
166 * Special entity resolver, better left to the parser, it has
167 * more context than the application layer.
168 * The default behaviour is to NOT resolve the entities, in that case
169 * the ENTITY_REF nodes are built in the structure (and the parameter
170 * values).
171 *
172 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
173 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000174static xmlParserInputPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000175resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000176{
177 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
178
179
180 fprintf(stdout, "SAX.resolveEntity(");
181 if (publicId != NULL)
182 fprintf(stdout, "%s", (char *)publicId);
183 else
184 fprintf(stdout, " ");
185 if (systemId != NULL)
186 fprintf(stdout, ", %s)\n", (char *)systemId);
187 else
188 fprintf(stdout, ", )\n");
189/*********
190 if (systemId != NULL) {
191 return(xmlNewInputFromFile(ctxt, (char *) systemId));
192 }
193 *********/
194 return(NULL);
195}
196
197/**
198 * getEntityDebug:
199 * @ctxt: An XML parser context
200 * @name: The entity name
201 *
202 * Get an entity by name
203 *
204 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
205 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000206static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000207getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000208{
209 fprintf(stdout, "SAX.getEntity(%s)\n", name);
210 return(NULL);
211}
212
213/**
214 * getParameterEntityDebug:
215 * @ctxt: An XML parser context
216 * @name: The entity name
217 *
218 * Get a parameter entity by name
219 *
220 * Returns the xmlParserInputPtr
221 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000222static xmlEntityPtr
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000223getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000224{
225 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
226 return(NULL);
227}
228
229
230/**
231 * entityDeclDebug:
232 * @ctxt: An XML parser context
233 * @name: the entity name
234 * @type: the entity type
235 * @publicId: The public ID of the entity
236 * @systemId: The system ID of the entity
237 * @content: the entity value (without processing).
238 *
239 * An entity definition has been parsed
240 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000241static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000242entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000243 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
244{
245 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
246 name, type, publicId, systemId, content);
247}
248
249/**
250 * attributeDeclDebug:
251 * @ctxt: An XML parser context
252 * @name: the attribute name
253 * @type: the attribute type
254 *
255 * An attribute definition has been parsed
256 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000257static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000258attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000259 int type, int def, const xmlChar *defaultValue,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000260 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000261{
262 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
263 elem, name, type, def, defaultValue);
264}
265
266/**
267 * elementDeclDebug:
268 * @ctxt: An XML parser context
269 * @name: the element name
270 * @type: the element type
271 * @content: the element value (without processing).
272 *
273 * An element definition has been parsed
274 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000275static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000276elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
277 xmlElementContentPtr content ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000278{
279 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
280 name, type);
281}
282
283/**
284 * notationDeclDebug:
285 * @ctxt: An XML parser context
286 * @name: The name of the notation
287 * @publicId: The public ID of the entity
288 * @systemId: The system ID of the entity
289 *
290 * What to do when a notation declaration has been parsed.
291 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000292static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000293notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000294 const xmlChar *publicId, const xmlChar *systemId)
295{
296 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
297 (char *) name, (char *) publicId, (char *) systemId);
298}
299
300/**
301 * unparsedEntityDeclDebug:
302 * @ctxt: An XML parser context
303 * @name: The name of the entity
304 * @publicId: The public ID of the entity
305 * @systemId: The system ID of the entity
306 * @notationName: the name of the notation
307 *
308 * What to do when an unparsed entity declaration is parsed
309 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000311unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000312 const xmlChar *publicId, const xmlChar *systemId,
313 const xmlChar *notationName)
314{
315 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
316 (char *) name, (char *) publicId, (char *) systemId,
317 (char *) notationName);
318}
319
320/**
321 * setDocumentLocatorDebug:
322 * @ctxt: An XML parser context
323 * @loc: A SAX Locator
324 *
325 * Receive the document locator at startup, actually xmlDefaultSAXLocator
326 * Everything is available on the context, so this is useless in our case.
327 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000328static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000329setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000330{
331 fprintf(stdout, "SAX.setDocumentLocator()\n");
332}
333
334/**
335 * startDocumentDebug:
336 * @ctxt: An XML parser context
337 *
338 * called when the document start being processed.
339 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000340static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000341startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000342{
343 fprintf(stdout, "SAX.startDocument()\n");
344}
345
346/**
347 * endDocumentDebug:
348 * @ctxt: An XML parser context
349 *
350 * called when the document end has been detected.
351 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000352static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000353endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000354{
355 fprintf(stdout, "SAX.endDocument()\n");
356}
357
358/**
359 * startElementDebug:
360 * @ctxt: An XML parser context
361 * @name: The element name
362 *
363 * called when an opening tag has been processed.
364 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000365static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000366startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000367{
368 int i;
369
370 fprintf(stdout, "SAX.startElement(%s", (char *) name);
371 if (atts != NULL) {
372 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000373 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000374 if (atts[i] != NULL) {
375 unsigned char output[40];
376 const unsigned char *att = atts[i];
377 int outlen, attlen;
378 fprintf(stdout, "='");
379 while ((attlen = strlen((char*)att)) > 0) {
380 outlen = sizeof output - 1;
381 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
Daniel Veillard5f704af2003-03-05 10:01:43 +0000382 output[outlen] = 0;
William M. Brackc1939562003-08-05 15:52:22 +0000383 fprintf(stdout, "%s", (char *) output);
Daniel Veillarde010c172000-08-28 10:04:51 +0000384 att += attlen;
385 }
386 fprintf(stdout, "'");
387 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000388 }
389 }
390 fprintf(stdout, ")\n");
391}
392
393/**
394 * endElementDebug:
395 * @ctxt: An XML parser context
396 * @name: The element name
397 *
398 * called when the end of an element has been detected.
399 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000400static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000401endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000402{
403 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
404}
405
406/**
407 * charactersDebug:
408 * @ctxt: An XML parser context
409 * @ch: a xmlChar string
410 * @len: the number of xmlChar
411 *
412 * receiving some chars from the parser.
413 * Question: how much at a time ???
414 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000415static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000416charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000417{
Daniel Veillarde010c172000-08-28 10:04:51 +0000418 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000419 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000420
Daniel Veillard4948eb42000-08-29 09:41:15 +0000421 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000422 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000423
424 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000425}
426
427/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000428 * cdataDebug:
429 * @ctxt: An XML parser context
430 * @ch: a xmlChar string
431 * @len: the number of xmlChar
432 *
433 * receiving some cdata chars from the parser.
434 * Question: how much at a time ???
435 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000436static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000437cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7eda8452000-10-14 23:38:43 +0000438{
439 unsigned char output[40];
440 int inlen = len, outlen = 30;
441
442 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
443 output[outlen] = 0;
444
445 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
446}
447
448/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000449 * referenceDebug:
450 * @ctxt: An XML parser context
451 * @name: The entity name
452 *
453 * called when an entity reference is detected.
454 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000455static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000456referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000457{
458 fprintf(stdout, "SAX.reference(%s)\n", name);
459}
460
461/**
462 * ignorableWhitespaceDebug:
463 * @ctxt: An XML parser context
464 * @ch: a xmlChar string
465 * @start: the first char in the string
466 * @len: the number of xmlChar
467 *
468 * receiving some ignorable whitespaces from the parser.
469 * Question: how much at a time ???
470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000471static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000472ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000473{
Daniel Veillard87b95392000-08-12 21:12:04 +0000474 char output[40];
475 int i;
476
477 for (i = 0;(i<len) && (i < 30);i++)
478 output[i] = ch[i];
479 output[i] = 0;
480
481 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000482}
483
484/**
485 * processingInstructionDebug:
486 * @ctxt: An XML parser context
487 * @target: the target name
488 * @data: the PI data's
489 * @len: the number of xmlChar
490 *
491 * A processing instruction has been parsed.
492 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000493static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000494processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000495 const xmlChar *data)
496{
497 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
498 (char *) target, (char *) data);
499}
500
501/**
502 * commentDebug:
503 * @ctxt: An XML parser context
504 * @value: the comment content
505 *
506 * A comment has been parsed.
507 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000508static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000509commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000510{
511 fprintf(stdout, "SAX.comment(%s)\n", value);
512}
513
514/**
515 * warningDebug:
516 * @ctxt: An XML parser context
517 * @msg: the message to display/transmit
518 * @...: extra parameters for the message display
519 *
520 * Display and format a warning messages, gives file, line, position and
521 * extra parameters.
522 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000523static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000524warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000525{
526 va_list args;
527
528 va_start(args, msg);
529 fprintf(stdout, "SAX.warning: ");
530 vfprintf(stdout, msg, args);
531 va_end(args);
532}
533
534/**
535 * errorDebug:
536 * @ctxt: An XML parser context
537 * @msg: the message to display/transmit
538 * @...: extra parameters for the message display
539 *
540 * Display and format a error messages, gives file, line, position and
541 * extra parameters.
542 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000543static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000544errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000545{
546 va_list args;
547
548 va_start(args, msg);
549 fprintf(stdout, "SAX.error: ");
550 vfprintf(stdout, msg, args);
551 va_end(args);
552}
553
554/**
555 * fatalErrorDebug:
556 * @ctxt: An XML parser context
557 * @msg: the message to display/transmit
558 * @...: extra parameters for the message display
559 *
560 * Display and format a fatalError messages, gives file, line, position and
561 * extra parameters.
562 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000563static void
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000564fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000565{
566 va_list args;
567
568 va_start(args, msg);
569 fprintf(stdout, "SAX.fatalError: ");
570 vfprintf(stdout, msg, args);
571 va_end(args);
572}
573
574xmlSAXHandler debugSAXHandlerStruct = {
575 internalSubsetDebug,
576 isStandaloneDebug,
577 hasInternalSubsetDebug,
578 hasExternalSubsetDebug,
579 resolveEntityDebug,
580 getEntityDebug,
581 entityDeclDebug,
582 notationDeclDebug,
583 attributeDeclDebug,
584 elementDeclDebug,
585 unparsedEntityDeclDebug,
586 setDocumentLocatorDebug,
587 startDocumentDebug,
588 endDocumentDebug,
589 startElementDebug,
590 endElementDebug,
591 referenceDebug,
592 charactersDebug,
593 ignorableWhitespaceDebug,
594 processingInstructionDebug,
595 commentDebug,
596 warningDebug,
597 errorDebug,
598 fatalErrorDebug,
599 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000600 cdataDebug,
Daniel Veillardd0463562001-10-13 09:15:48 +0000601 NULL,
Daniel Veillard092643b2003-09-25 14:29:29 +0000602 1,
603 NULL,
604 NULL,
605 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000606};
607
608xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000609/************************************************************************
610 * *
611 * Debug *
612 * *
613 ************************************************************************/
614
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000615static void
616parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000617 htmlDocPtr doc = NULL;
618
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000619 /*
620 * Empty callbacks for checking
621 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000622 if (push) {
623 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000624
Daniel Veillard87b95392000-08-12 21:12:04 +0000625 f = fopen(filename, "r");
626 if (f != NULL) {
627 int res, size = 3;
628 char chars[4096];
629 htmlParserCtxtPtr ctxt;
630
631 /* if (repeat) */
632 size = 4096;
633 res = fread(chars, 1, 4, f);
634 if (res > 0) {
635 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000636 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000637 while ((res = fread(chars, 1, size, f)) > 0) {
638 htmlParseChunk(ctxt, chars, res, 0);
639 }
640 htmlParseChunk(ctxt, chars, 0, 1);
641 doc = ctxt->myDoc;
642 htmlFreeParserCtxt(ctxt);
643 }
644 if (doc != NULL) {
645 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
646 xmlFreeDoc(doc);
647 }
648 fclose(f);
649 }
650 if (!noout) {
651 f = fopen(filename, "r");
652 if (f != NULL) {
653 int res, size = 3;
654 char chars[4096];
655 htmlParserCtxtPtr ctxt;
656
657 /* if (repeat) */
658 size = 4096;
659 res = fread(chars, 1, 4, f);
660 if (res > 0) {
661 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000662 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard87b95392000-08-12 21:12:04 +0000663 while ((res = fread(chars, 1, size, f)) > 0) {
664 htmlParseChunk(ctxt, chars, res, 0);
665 }
666 htmlParseChunk(ctxt, chars, 0, 1);
667 doc = ctxt->myDoc;
668 htmlFreeParserCtxt(ctxt);
669 }
670 if (doc != NULL) {
671 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
672 xmlFreeDoc(doc);
673 }
674 fclose(f);
675 }
676 }
677 } else {
678 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000679 if (doc != NULL) {
680 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
681 xmlFreeDoc(doc);
682 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000683
684 if (!noout) {
685 /*
686 * Debug callback
687 */
688 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
689 if (doc != NULL) {
690 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
691 xmlFreeDoc(doc);
692 }
693 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000694 }
695}
696
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000697static void
698parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000699 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000700
701 /*
702 * build an HTML tree from a string;
703 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000704 if (push) {
705 FILE *f;
706
707 f = fopen(filename, "r");
708 if (f != NULL) {
709 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000710 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000711 htmlParserCtxtPtr ctxt;
712
Daniel Veillard87b95392000-08-12 21:12:04 +0000713 /* if (repeat) */
714 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000715 res = fread(chars, 1, 4, f);
716 if (res > 0) {
717 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
William M. Brack779af002003-08-01 15:55:39 +0000718 chars, res, filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000719 while ((res = fread(chars, 1, size, f)) > 0) {
720 htmlParseChunk(ctxt, chars, res, 0);
721 }
722 htmlParseChunk(ctxt, chars, 0, 1);
723 doc = ctxt->myDoc;
724 htmlFreeParserCtxt(ctxt);
725 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000726 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000727 }
728 } else {
Daniel Veillard9475a352003-09-26 12:47:50 +0000729 doc = htmlReadFile(filename, NULL, options);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000730 }
731 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000732 xmlGenericError(xmlGenericErrorContext,
733 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000734 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000735
736 /*
737 * test intermediate copy if needed.
738 */
739 if (copy) {
740 tmp = doc;
741 doc = xmlCopyDoc(doc, 1);
742 xmlFreeDoc(tmp);
743 }
744
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000745#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000746 /*
747 * print it.
748 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000749 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000750#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000751 if (!debug) {
752 if (encoding)
753 htmlSaveFileEnc("-", doc, encoding);
754 else
755 htmlDocDump(stdout, doc);
756 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000757 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000758#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000759 if (encoding)
760 htmlSaveFileEnc("-", doc, encoding);
761 else
762 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000763#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000764 }
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000765#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000766
767 /*
768 * free it.
769 */
770 xmlFreeDoc(doc);
771}
772
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000773int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000774 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000775 int files = 0;
776
777 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000778#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000779 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
780 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000781 else
782#endif
783 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000784 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000785 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
786 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000787 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
788 sax++;
789 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
790 noout++;
791 else if ((!strcmp(argv[i], "-repeat")) ||
792 (!strcmp(argv[i], "--repeat")))
793 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000794 else if ((!strcmp(argv[i], "-encode")) ||
795 (!strcmp(argv[i], "--encode"))) {
796 i++;
797 encoding = argv[i];
798 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000799 }
800 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000801 if ((!strcmp(argv[i], "-encode")) ||
802 (!strcmp(argv[i], "--encode"))) {
803 i++;
804 continue;
805 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000806 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000807 if (repeat) {
808 for (count = 0;count < 100 * repeat;count++) {
809 if (sax)
810 parseSAXFile(argv[i]);
811 else
812 parseAndPrintFile(argv[i]);
813 }
814 } else {
815 if (sax)
816 parseSAXFile(argv[i]);
817 else
818 parseAndPrintFile(argv[i]);
819 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000820 files ++;
821 }
822 }
823 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000824 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000825 argv[0]);
826 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000827#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000828 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000829#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000830 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000831 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000832 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000833 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000834 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000835 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000836 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000837 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000838 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000839
840 return(0);
841}
Daniel Veillard361d8452000-04-03 19:48:13 +0000842#else /* !LIBXML_HTML_ENABLED */
843#include <stdio.h>
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000844int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000845 printf("%s : HTML support not compiled in\n", argv[0]);
846 return(0);
847}
848#endif