blob: dcd716122724a4016a755381fd93c866e8bc3746 [file] [log] [blame]
Daniel Veillardce8b83b2000-04-05 18:38:42 +00001/*
2 * xmllint.c : a small tester program for XML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
10#include "win32config.h"
11#else
12#include "config.h"
13#endif
14
15#include <stdio.h>
16#include <string.h>
17#include <stdio.h>
18#include <stdarg.h>
19
20#ifdef HAVE_SYS_TYPES_H
21#include <sys/types.h>
22#endif
23#ifdef HAVE_SYS_STAT_H
24#include <sys/stat.h>
25#endif
26#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
35#ifdef HAVE_LIBREADLINE
36#include <readline/readline.h>
37#ifdef HAVE_LIBHISTORY
38#include <readline/history.h>
39#endif
40#endif
41
42#include <libxml/xmlmemory.h>
43#include <libxml/parser.h>
44#include <libxml/parserInternals.h>
45#include <libxml/HTMLparser.h>
46#include <libxml/HTMLtree.h>
47#include <libxml/tree.h>
48#include <libxml/xpath.h>
49#include <libxml/debugXML.h>
50
51#ifdef LIBXML_DEBUG_ENABLED
52static int debug = 0;
53static int shell = 0;
54static int debugent = 0;
55#endif
56static int copy = 0;
57static int recovery = 0;
58static int noent = 0;
59static int noout = 0;
60static int nowrap = 0;
61static int valid = 0;
62static int postvalid = 0;
63static int repeat = 0;
64static int insert = 0;
65static int compress = 0;
66static int html = 0;
67static int htmlout = 0;
68static int push = 0;
69static int noblanks = 0;
Daniel Veillard5e873c42000-04-12 13:27:38 +000070static int testIO = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +000071static char *encoding = NULL;
Daniel Veillardce8b83b2000-04-05 18:38:42 +000072
73extern int xmlDoValidityCheckingDefaultValue;
74extern int xmlGetWarningsDefaultValue;
75
76/************************************************************************
77 * *
78 * HTML ouput *
79 * *
80 ************************************************************************/
81char buffer[50000];
82
83void
84xmlHTMLEncodeSend(void) {
85 char *result;
86
87 result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
88 if (result) {
89 fprintf(stderr, "%s", result);
90 xmlFree(result);
91 }
92 buffer[0] = 0;
93}
94
95/**
96 * xmlHTMLPrintFileInfo:
97 * @input: an xmlParserInputPtr input
98 *
99 * Displays the associated file and line informations for the current input
100 */
101
102void
103xmlHTMLPrintFileInfo(xmlParserInputPtr input) {
104 fprintf(stderr, "<p>");
105 if (input != NULL) {
106 if (input->filename) {
107 sprintf(&buffer[strlen(buffer)], "%s:%d: ", input->filename,
108 input->line);
109 } else {
110 sprintf(&buffer[strlen(buffer)], "Entity: line %d: ", input->line);
111 }
112 }
113 xmlHTMLEncodeSend();
114}
115
116/**
117 * xmlHTMLPrintFileContext:
118 * @input: an xmlParserInputPtr input
119 *
120 * Displays current context within the input content for error tracking
121 */
122
123void
124xmlHTMLPrintFileContext(xmlParserInputPtr input) {
125 const xmlChar *cur, *base;
126 int n;
127
128 if (input == NULL) return;
129 fprintf(stderr, "<pre>\n");
130 cur = input->cur;
131 base = input->base;
132 while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) {
133 cur--;
134 }
135 n = 0;
136 while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r'))
137 cur--;
138 if ((*cur == '\n') || (*cur == '\r')) cur++;
139 base = cur;
140 n = 0;
141 while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
142 sprintf(&buffer[strlen(buffer)], "%c", (unsigned char) *cur++);
143 n++;
144 }
145 sprintf(&buffer[strlen(buffer)], "\n");
146 cur = input->cur;
147 while ((*cur == '\n') || (*cur == '\r'))
148 cur--;
149 n = 0;
150 while ((cur != base) && (n++ < 80)) {
151 sprintf(&buffer[strlen(buffer)], " ");
152 base++;
153 }
154 sprintf(&buffer[strlen(buffer)],"^\n");
155 xmlHTMLEncodeSend();
156 fprintf(stderr, "</pre>");
157}
158
159/**
160 * xmlHTMLError:
161 * @ctx: an XML parser context
162 * @msg: the message to display/transmit
163 * @...: extra parameters for the message display
164 *
165 * Display and format an error messages, gives file, line, position and
166 * extra parameters.
167 */
168void
169xmlHTMLError(void *ctx, const char *msg, ...)
170{
171 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
172 xmlParserInputPtr input;
173 xmlParserInputPtr cur = NULL;
174 va_list args;
175
176 buffer[0] = 0;
177 input = ctxt->input;
178 if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
179 cur = input;
180 input = ctxt->inputTab[ctxt->inputNr - 2];
181 }
182
183 xmlHTMLPrintFileInfo(input);
184
185 fprintf(stderr, "<b>error</b>: ");
186 va_start(args, msg);
187 vsprintf(&buffer[strlen(buffer)], msg, args);
188 va_end(args);
189 xmlHTMLEncodeSend();
190 fprintf(stderr, "</p>\n");
191
192 xmlHTMLPrintFileContext(input);
193 xmlHTMLEncodeSend();
194}
195
196/**
197 * xmlHTMLWarning:
198 * @ctx: an XML parser context
199 * @msg: the message to display/transmit
200 * @...: extra parameters for the message display
201 *
202 * Display and format a warning messages, gives file, line, position and
203 * extra parameters.
204 */
205void
206xmlHTMLWarning(void *ctx, const char *msg, ...)
207{
208 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
209 xmlParserInputPtr input;
210 xmlParserInputPtr cur = NULL;
211 va_list args;
212
213 buffer[0] = 0;
214 input = ctxt->input;
215 if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
216 cur = input;
217 input = ctxt->inputTab[ctxt->inputNr - 2];
218 }
219
220
221 xmlHTMLPrintFileInfo(input);
222
223 fprintf(stderr, "<b>warning</b>: ");
224 va_start(args, msg);
225 vsprintf(&buffer[strlen(buffer)], msg, args);
226 va_end(args);
227 xmlHTMLEncodeSend();
228 fprintf(stderr, "</p>\n");
229
230 xmlHTMLPrintFileContext(input);
231 xmlHTMLEncodeSend();
232}
233
234/**
235 * xmlHTMLValidityError:
236 * @ctx: an XML parser context
237 * @msg: the message to display/transmit
238 * @...: extra parameters for the message display
239 *
240 * Display and format an validity error messages, gives file,
241 * line, position and extra parameters.
242 */
243void
244xmlHTMLValidityError(void *ctx, const char *msg, ...)
245{
246 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
247 xmlParserInputPtr input;
248 va_list args;
249
250 buffer[0] = 0;
251 input = ctxt->input;
252 if ((input->filename == NULL) && (ctxt->inputNr > 1))
253 input = ctxt->inputTab[ctxt->inputNr - 2];
254
255 xmlHTMLPrintFileInfo(input);
256
257 fprintf(stderr, "<b>validity error</b>: ");
258 va_start(args, msg);
259 vsprintf(&buffer[strlen(buffer)], msg, args);
260 va_end(args);
261 xmlHTMLEncodeSend();
262 fprintf(stderr, "</p>\n");
263
264 xmlHTMLPrintFileContext(input);
265 xmlHTMLEncodeSend();
266}
267
268/**
269 * xmlHTMLValidityWarning:
270 * @ctx: an XML parser context
271 * @msg: the message to display/transmit
272 * @...: extra parameters for the message display
273 *
274 * Display and format a validity warning messages, gives file, line,
275 * position and extra parameters.
276 */
277void
278xmlHTMLValidityWarning(void *ctx, const char *msg, ...)
279{
280 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
281 xmlParserInputPtr input;
282 va_list args;
283
284 buffer[0] = 0;
285 input = ctxt->input;
286 if ((input->filename == NULL) && (ctxt->inputNr > 1))
287 input = ctxt->inputTab[ctxt->inputNr - 2];
288
289 xmlHTMLPrintFileInfo(input);
290
291 fprintf(stderr, "<b>validity warning</b>: ");
292 va_start(args, msg);
293 vsprintf(&buffer[strlen(buffer)], msg, args);
294 va_end(args);
295 xmlHTMLEncodeSend();
296 fprintf(stderr, "</p>\n");
297
298 xmlHTMLPrintFileContext(input);
299 xmlHTMLEncodeSend();
300}
301
302/************************************************************************
303 * *
304 * Shell Interface *
305 * *
306 ************************************************************************/
307/**
308 * xmlShellReadline:
309 * @prompt: the prompt value
310 *
311 * Read a string
312 *
313 * Returns a pointer to it or NULL on EOF the caller is expected to
314 * free the returned string.
315 */
316char *
317xmlShellReadline(char *prompt) {
318#ifdef HAVE_LIBREADLINE
319 char *line_read;
320
321 /* Get a line from the user. */
322 line_read = readline (prompt);
323
324 /* If the line has any text in it, save it on the history. */
325 if (line_read && *line_read)
326 add_history (line_read);
327
328 return (line_read);
329#else
330 char line_read[501];
331
332 if (prompt != NULL)
333 fprintf(stdout, "%s", prompt);
334 if (!fgets(line_read, 500, stdin))
335 return(NULL);
336 line_read[500] = 0;
337 return(strdup(line_read));
338#endif
339}
340
341/************************************************************************
342 * *
Daniel Veillard5e873c42000-04-12 13:27:38 +0000343 * I/O Interfaces *
344 * *
345 ************************************************************************/
346
347int myRead(FILE *f, char * buffer, int len) {
348 return(fread(buffer, 1, len, f));
349}
350void myClose(FILE *f) {
351 fclose(f);
352}
353
354/************************************************************************
355 * *
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000356 * Test processing *
357 * *
358 ************************************************************************/
359void parseAndPrintFile(char *filename) {
360 xmlDocPtr doc = NULL, tmp;
361
362#ifdef LIBXML_HTML_ENABLED
363 if (html) {
364 doc = htmlParseFile(filename, NULL);
365 } else {
366#endif /* LIBXML_HTML_ENABLED */
367 /*
368 * build an XML tree from a string;
369 */
370 if (push) {
371 FILE *f;
372
373 f = fopen(filename, "r");
374 if (f != NULL) {
375 int res, size = 3;
376 char chars[1024];
377 xmlParserCtxtPtr ctxt;
378
379 if (repeat)
380 size = 1024;
381 res = fread(chars, 1, 4, f);
382 if (res > 0) {
383 ctxt = xmlCreatePushParserCtxt(NULL, NULL,
384 chars, res, filename);
385 while ((res = fread(chars, 1, size, f)) > 0) {
386 xmlParseChunk(ctxt, chars, res, 0);
387 }
388 xmlParseChunk(ctxt, chars, 0, 1);
389 doc = ctxt->myDoc;
390 xmlFreeParserCtxt(ctxt);
391 }
392 }
Daniel Veillard5e873c42000-04-12 13:27:38 +0000393 } else if (testIO) {
394 int ret;
395 FILE *f;
396
397 f = fopen(filename, "r");
398 if (f != NULL) {
399 xmlParserCtxtPtr ctxt;
400
401 ctxt = xmlCreateIOParserCtxt(NULL, NULL,
402 (xmlInputReadCallback) myRead,
403 (xmlInputCloseCallback) myClose,
404 f, XML_CHAR_ENCODING_NONE);
405 xmlParseDocument(ctxt);
406
407 ret = ctxt->wellFormed;
408 doc = ctxt->myDoc;
409 xmlFreeParserCtxt(ctxt);
410 if (!ret) {
411 xmlFreeDoc(doc);
412 doc = NULL;
413 }
414 }
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000415 } else if (recovery) {
416 doc = xmlRecoverFile(filename);
417 } else if (htmlout) {
418 int ret;
419 xmlParserCtxtPtr ctxt;
420 xmlSAXHandler silent, *old;
421
422 ctxt = xmlCreateFileParserCtxt(filename);
423 memcpy(&silent, ctxt->sax, sizeof(silent));
424 old = ctxt->sax;
425 silent.error = xmlHTMLError;
426 if (xmlGetWarningsDefaultValue)
427 silent.warning = xmlHTMLWarning;
428 else
429 silent.warning = NULL;
430 silent.fatalError = xmlHTMLError;
431 ctxt->sax = &silent;
432 ctxt->vctxt.error = xmlHTMLValidityError;
433 if (xmlGetWarningsDefaultValue)
434 ctxt->vctxt.warning = xmlHTMLValidityWarning;
435 else
436 ctxt->vctxt.warning = NULL;
437
438 xmlParseDocument(ctxt);
439
440 ret = ctxt->wellFormed;
441 doc = ctxt->myDoc;
442 ctxt->sax = old;
443 xmlFreeParserCtxt(ctxt);
444 if (!ret) {
445 xmlFreeDoc(doc);
446 doc = NULL;
447 }
448 } else
449 doc = xmlParseFile(filename);
450#ifdef LIBXML_HTML_ENABLED
451 }
452#endif
453
454#ifdef LIBXML_DEBUG_ENABLED
455 /*
456 * shell interraction
457 */
458 if (shell)
459 xmlShell(doc, filename, xmlShellReadline, stdout);
460#endif
461
462 /*
463 * test intermediate copy if needed.
464 */
465 if (copy) {
466 tmp = doc;
467 doc = xmlCopyDoc(doc, 1);
468 xmlFreeDoc(tmp);
469 }
470
471 if ((insert) && (!html)) {
472 const xmlChar* list[256];
473 int nb, i;
474 xmlNodePtr node;
475
476 if (doc->children != NULL) {
477 node = doc->children;
478 while ((node != NULL) && (node->last == NULL)) node = node->next;
479 if (node != NULL) {
480 nb = xmlValidGetValidElements(node->last, NULL, list, 256);
481 if (nb < 0) {
482 printf("could not get valid list of elements\n");
483 } else if (nb == 0) {
484 printf("No element can be indersted under root\n");
485 } else {
486 printf("%d element types can be indersted under root:\n",
487 nb);
488 for (i = 0;i < nb;i++) {
489 printf("%s\n", list[i]);
490 }
491 }
492 }
493 }
494 }else if (noout == 0) {
495 /*
496 * print it.
497 */
498#ifdef LIBXML_DEBUG_ENABLED
499 if (!debug) {
500#endif
501 if (compress)
502 xmlSaveFile("-", doc);
Daniel Veillardbe803962000-06-28 23:40:59 +0000503 else if (encoding != NULL)
504 xmlSaveFileEnc("-", doc, encoding);
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000505 else
506 xmlDocDump(stdout, doc);
507#ifdef LIBXML_DEBUG_ENABLED
508 } else
509 xmlDebugDumpDocument(stdout, doc);
510#endif
511 }
512
513 /*
514 * A posteriori validation test
515 */
516 if (postvalid) {
517 xmlValidCtxt cvp;
518 cvp.userData = (void *) stderr; cvp.error = (xmlValidityErrorFunc) fprintf; cvp.warning = (xmlValidityWarningFunc) fprintf;
519 xmlValidateDocument(&cvp, doc);
520 }
521
522#ifdef LIBXML_DEBUG_ENABLED
523 if ((debugent) && (!html))
524 xmlDebugDumpEntities(stdout, doc);
525#endif
526
527 /*
528 * free it.
529 */
530 xmlFreeDoc(doc);
531}
532
533int main(int argc, char **argv) {
534 int i, count;
535 int files = 0;
536
Daniel Veillardbe803962000-06-28 23:40:59 +0000537 LIBXML_TEST_VERSION
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000538 for (i = 1; i < argc ; i++) {
539#ifdef LIBXML_DEBUG_ENABLED
540 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
541 debug++;
542 else if ((!strcmp(argv[i], "-debugent")) || (!strcmp(argv[i], "--debugent")))
543 debugent++;
544 else if ((!strcmp(argv[i], "-shell")) ||
545 (!strcmp(argv[i], "--shell"))) {
546 shell++;
547 noout = 1;
548 } else
549#endif
550 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
551 copy++;
552 else if ((!strcmp(argv[i], "-recover")) ||
553 (!strcmp(argv[i], "--recover")))
554 recovery++;
555 else if ((!strcmp(argv[i], "-noent")) ||
556 (!strcmp(argv[i], "--noent")))
557 noent++;
558 else if ((!strcmp(argv[i], "-noout")) ||
559 (!strcmp(argv[i], "--noout")))
560 noout++;
561 else if ((!strcmp(argv[i], "-htmlout")) ||
562 (!strcmp(argv[i], "--htmlout")))
563 htmlout++;
564#ifdef LIBXML_HTML_ENABLED
565 else if ((!strcmp(argv[i], "-html")) ||
566 (!strcmp(argv[i], "--html"))) {
567 html++;
568 }
569#endif /* LIBXML_HTML_ENABLED */
570 else if ((!strcmp(argv[i], "-nowrap")) ||
571 (!strcmp(argv[i], "--nowrap")))
572 nowrap++;
573 else if ((!strcmp(argv[i], "-valid")) ||
574 (!strcmp(argv[i], "--valid")))
575 valid++;
576 else if ((!strcmp(argv[i], "-postvalid")) ||
577 (!strcmp(argv[i], "--postvalid")))
578 postvalid++;
579 else if ((!strcmp(argv[i], "-insert")) ||
580 (!strcmp(argv[i], "--insert")))
581 insert++;
582 else if ((!strcmp(argv[i], "-repeat")) ||
583 (!strcmp(argv[i], "--repeat")))
584 repeat++;
585 else if ((!strcmp(argv[i], "-push")) ||
586 (!strcmp(argv[i], "--push")))
587 push++;
Daniel Veillard5e873c42000-04-12 13:27:38 +0000588 else if ((!strcmp(argv[i], "-testIO")) ||
589 (!strcmp(argv[i], "--testIO")))
590 testIO++;
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000591 else if ((!strcmp(argv[i], "-compress")) ||
592 (!strcmp(argv[i], "--compress"))) {
593 compress++;
594 xmlSetCompressMode(9);
595 }
596 else if ((!strcmp(argv[i], "-nowarning")) ||
597 (!strcmp(argv[i], "--nowarning"))) {
598 xmlGetWarningsDefaultValue = 0;
599 }
Daniel Veillardbe803962000-06-28 23:40:59 +0000600 else if ((!strcmp(argv[i], "-encode")) ||
601 (!strcmp(argv[i], "--encode"))) {
602 i++;
603 encoding = argv[i];
604 }
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000605 else if ((!strcmp(argv[i], "-noblanks")) ||
606 (!strcmp(argv[i], "--noblanks"))) {
607 noblanks++;
608 xmlKeepBlanksDefault(0);
609 }
610 }
611 if (noent != 0) xmlSubstituteEntitiesDefault(1);
612 if (valid != 0) xmlDoValidityCheckingDefaultValue = 1;
613 if ((htmlout) && (!nowrap)) {
614 fprintf(stderr,
615 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n");
616 fprintf(stderr, "\t\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
617 fprintf(stderr,
618 "<html><head><title>%s output</title></head>\n",
619 argv[0]);
620 fprintf(stderr,
621 "<body bgcolor=\"#ffffff\"><h1 align=\"center\">%s output</h1>\n",
622 argv[0]);
623 }
624 for (i = 1; i < argc ; i++) {
Daniel Veillardbe803962000-06-28 23:40:59 +0000625 if ((!strcmp(argv[i], "-encode")) ||
626 (!strcmp(argv[i], "--encode"))) {
627 i++;
628 continue;
629 }
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000630 if (argv[i][0] != '-') {
631 if (repeat) {
632 for (count = 0;count < 100 * repeat;count++)
633 parseAndPrintFile(argv[i]);
634 } else
635 parseAndPrintFile(argv[i]);
636 files ++;
637 }
638 }
639 if ((htmlout) && (!nowrap)) {
640 fprintf(stderr, "</body></html>\n");
641 }
642 if (files == 0) {
643 printf("Usage : %s [--debug] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
644 argv[0]);
645 printf("\tParse the XML files and output the result of the parsing\n");
646#ifdef LIBXML_DEBUG_ENABLED
647 printf("\t--debug : dump a debug tree of the in-memory document\n");
648 printf("\t--shell : run a navigating shell\n");
649 printf("\t--debugent : debug the entities defined in the document\n");
650#endif
651 printf("\t--copy : used to test the internal copy implementation\n");
652 printf("\t--recover : output what was parsable on broken XML documents\n");
653 printf("\t--noent : substitute entity references by their value\n");
654 printf("\t--noout : don't output the result tree\n");
655 printf("\t--htmlout : output results as HTML\n");
656 printf("\t--nowarp : do not put HTML doc wrapper\n");
657 printf("\t--valid : validate the document in addition to std well-formed check\n");
658 printf("\t--postvalid : do a posteriori validation, i.e after parsing\n");
659 printf("\t--repeat : repeat 100 times, for timing or profiling\n");
660 printf("\t--insert : ad-hoc test for valid insertions\n");
661 printf("\t--compress : turn on gzip compression of output\n");
662#ifdef LIBXML_HTML_ENABLED
663 printf("\t--html : use the HTML parser\n");
664#endif
665 printf("\t--push : use the push mode of the parser\n");
666 printf("\t--nowarning : do not emit warnings from parser/validator\n");
667 printf("\t--noblanks : drop (ignorable?) blanks spaces\n");
Daniel Veillard5e873c42000-04-12 13:27:38 +0000668 printf("\t--testIO : test user I/O support\n");
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000669 }
670 xmlCleanupParser();
671 xmlMemoryDump();
672
673 return(0);
674}