blob: 24c17f71ea4f8a211ec619f0df9cbc4f53972c56 [file] [log] [blame]
Daniel Veillardce8b83b2000-04-05 18:38:42 +00001/*
2 * xmllint.c : a small tester program for XML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
10#include "win32config.h"
11#else
12#include "config.h"
13#endif
14
15#include <stdio.h>
16#include <string.h>
17#include <stdio.h>
18#include <stdarg.h>
19
20#ifdef HAVE_SYS_TYPES_H
21#include <sys/types.h>
22#endif
23#ifdef HAVE_SYS_STAT_H
24#include <sys/stat.h>
25#endif
26#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
35#ifdef HAVE_LIBREADLINE
36#include <readline/readline.h>
37#ifdef HAVE_LIBHISTORY
38#include <readline/history.h>
39#endif
40#endif
41
42#include <libxml/xmlmemory.h>
43#include <libxml/parser.h>
44#include <libxml/parserInternals.h>
45#include <libxml/HTMLparser.h>
46#include <libxml/HTMLtree.h>
47#include <libxml/tree.h>
48#include <libxml/xpath.h>
49#include <libxml/debugXML.h>
50
51#ifdef LIBXML_DEBUG_ENABLED
52static int debug = 0;
53static int shell = 0;
54static int debugent = 0;
55#endif
56static int copy = 0;
57static int recovery = 0;
58static int noent = 0;
59static int noout = 0;
60static int nowrap = 0;
61static int valid = 0;
62static int postvalid = 0;
63static int repeat = 0;
64static int insert = 0;
65static int compress = 0;
66static int html = 0;
67static int htmlout = 0;
68static int push = 0;
69static int noblanks = 0;
Daniel Veillard5e873c42000-04-12 13:27:38 +000070static int testIO = 0;
Daniel Veillardce8b83b2000-04-05 18:38:42 +000071
72extern int xmlDoValidityCheckingDefaultValue;
73extern int xmlGetWarningsDefaultValue;
74
75/************************************************************************
76 * *
77 * HTML ouput *
78 * *
79 ************************************************************************/
80char buffer[50000];
81
82void
83xmlHTMLEncodeSend(void) {
84 char *result;
85
86 result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
87 if (result) {
88 fprintf(stderr, "%s", result);
89 xmlFree(result);
90 }
91 buffer[0] = 0;
92}
93
94/**
95 * xmlHTMLPrintFileInfo:
96 * @input: an xmlParserInputPtr input
97 *
98 * Displays the associated file and line informations for the current input
99 */
100
101void
102xmlHTMLPrintFileInfo(xmlParserInputPtr input) {
103 fprintf(stderr, "<p>");
104 if (input != NULL) {
105 if (input->filename) {
106 sprintf(&buffer[strlen(buffer)], "%s:%d: ", input->filename,
107 input->line);
108 } else {
109 sprintf(&buffer[strlen(buffer)], "Entity: line %d: ", input->line);
110 }
111 }
112 xmlHTMLEncodeSend();
113}
114
115/**
116 * xmlHTMLPrintFileContext:
117 * @input: an xmlParserInputPtr input
118 *
119 * Displays current context within the input content for error tracking
120 */
121
122void
123xmlHTMLPrintFileContext(xmlParserInputPtr input) {
124 const xmlChar *cur, *base;
125 int n;
126
127 if (input == NULL) return;
128 fprintf(stderr, "<pre>\n");
129 cur = input->cur;
130 base = input->base;
131 while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) {
132 cur--;
133 }
134 n = 0;
135 while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r'))
136 cur--;
137 if ((*cur == '\n') || (*cur == '\r')) cur++;
138 base = cur;
139 n = 0;
140 while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
141 sprintf(&buffer[strlen(buffer)], "%c", (unsigned char) *cur++);
142 n++;
143 }
144 sprintf(&buffer[strlen(buffer)], "\n");
145 cur = input->cur;
146 while ((*cur == '\n') || (*cur == '\r'))
147 cur--;
148 n = 0;
149 while ((cur != base) && (n++ < 80)) {
150 sprintf(&buffer[strlen(buffer)], " ");
151 base++;
152 }
153 sprintf(&buffer[strlen(buffer)],"^\n");
154 xmlHTMLEncodeSend();
155 fprintf(stderr, "</pre>");
156}
157
158/**
159 * xmlHTMLError:
160 * @ctx: an XML parser context
161 * @msg: the message to display/transmit
162 * @...: extra parameters for the message display
163 *
164 * Display and format an error messages, gives file, line, position and
165 * extra parameters.
166 */
167void
168xmlHTMLError(void *ctx, const char *msg, ...)
169{
170 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
171 xmlParserInputPtr input;
172 xmlParserInputPtr cur = NULL;
173 va_list args;
174
175 buffer[0] = 0;
176 input = ctxt->input;
177 if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
178 cur = input;
179 input = ctxt->inputTab[ctxt->inputNr - 2];
180 }
181
182 xmlHTMLPrintFileInfo(input);
183
184 fprintf(stderr, "<b>error</b>: ");
185 va_start(args, msg);
186 vsprintf(&buffer[strlen(buffer)], msg, args);
187 va_end(args);
188 xmlHTMLEncodeSend();
189 fprintf(stderr, "</p>\n");
190
191 xmlHTMLPrintFileContext(input);
192 xmlHTMLEncodeSend();
193}
194
195/**
196 * xmlHTMLWarning:
197 * @ctx: an XML parser context
198 * @msg: the message to display/transmit
199 * @...: extra parameters for the message display
200 *
201 * Display and format a warning messages, gives file, line, position and
202 * extra parameters.
203 */
204void
205xmlHTMLWarning(void *ctx, const char *msg, ...)
206{
207 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
208 xmlParserInputPtr input;
209 xmlParserInputPtr cur = NULL;
210 va_list args;
211
212 buffer[0] = 0;
213 input = ctxt->input;
214 if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
215 cur = input;
216 input = ctxt->inputTab[ctxt->inputNr - 2];
217 }
218
219
220 xmlHTMLPrintFileInfo(input);
221
222 fprintf(stderr, "<b>warning</b>: ");
223 va_start(args, msg);
224 vsprintf(&buffer[strlen(buffer)], msg, args);
225 va_end(args);
226 xmlHTMLEncodeSend();
227 fprintf(stderr, "</p>\n");
228
229 xmlHTMLPrintFileContext(input);
230 xmlHTMLEncodeSend();
231}
232
233/**
234 * xmlHTMLValidityError:
235 * @ctx: an XML parser context
236 * @msg: the message to display/transmit
237 * @...: extra parameters for the message display
238 *
239 * Display and format an validity error messages, gives file,
240 * line, position and extra parameters.
241 */
242void
243xmlHTMLValidityError(void *ctx, const char *msg, ...)
244{
245 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
246 xmlParserInputPtr input;
247 va_list args;
248
249 buffer[0] = 0;
250 input = ctxt->input;
251 if ((input->filename == NULL) && (ctxt->inputNr > 1))
252 input = ctxt->inputTab[ctxt->inputNr - 2];
253
254 xmlHTMLPrintFileInfo(input);
255
256 fprintf(stderr, "<b>validity error</b>: ");
257 va_start(args, msg);
258 vsprintf(&buffer[strlen(buffer)], msg, args);
259 va_end(args);
260 xmlHTMLEncodeSend();
261 fprintf(stderr, "</p>\n");
262
263 xmlHTMLPrintFileContext(input);
264 xmlHTMLEncodeSend();
265}
266
267/**
268 * xmlHTMLValidityWarning:
269 * @ctx: an XML parser context
270 * @msg: the message to display/transmit
271 * @...: extra parameters for the message display
272 *
273 * Display and format a validity warning messages, gives file, line,
274 * position and extra parameters.
275 */
276void
277xmlHTMLValidityWarning(void *ctx, const char *msg, ...)
278{
279 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
280 xmlParserInputPtr input;
281 va_list args;
282
283 buffer[0] = 0;
284 input = ctxt->input;
285 if ((input->filename == NULL) && (ctxt->inputNr > 1))
286 input = ctxt->inputTab[ctxt->inputNr - 2];
287
288 xmlHTMLPrintFileInfo(input);
289
290 fprintf(stderr, "<b>validity warning</b>: ");
291 va_start(args, msg);
292 vsprintf(&buffer[strlen(buffer)], msg, args);
293 va_end(args);
294 xmlHTMLEncodeSend();
295 fprintf(stderr, "</p>\n");
296
297 xmlHTMLPrintFileContext(input);
298 xmlHTMLEncodeSend();
299}
300
301/************************************************************************
302 * *
303 * Shell Interface *
304 * *
305 ************************************************************************/
306/**
307 * xmlShellReadline:
308 * @prompt: the prompt value
309 *
310 * Read a string
311 *
312 * Returns a pointer to it or NULL on EOF the caller is expected to
313 * free the returned string.
314 */
315char *
316xmlShellReadline(char *prompt) {
317#ifdef HAVE_LIBREADLINE
318 char *line_read;
319
320 /* Get a line from the user. */
321 line_read = readline (prompt);
322
323 /* If the line has any text in it, save it on the history. */
324 if (line_read && *line_read)
325 add_history (line_read);
326
327 return (line_read);
328#else
329 char line_read[501];
330
331 if (prompt != NULL)
332 fprintf(stdout, "%s", prompt);
333 if (!fgets(line_read, 500, stdin))
334 return(NULL);
335 line_read[500] = 0;
336 return(strdup(line_read));
337#endif
338}
339
340/************************************************************************
341 * *
Daniel Veillard5e873c42000-04-12 13:27:38 +0000342 * I/O Interfaces *
343 * *
344 ************************************************************************/
345
346int myRead(FILE *f, char * buffer, int len) {
347 return(fread(buffer, 1, len, f));
348}
349void myClose(FILE *f) {
350 fclose(f);
351}
352
353/************************************************************************
354 * *
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000355 * Test processing *
356 * *
357 ************************************************************************/
358void parseAndPrintFile(char *filename) {
359 xmlDocPtr doc = NULL, tmp;
360
361#ifdef LIBXML_HTML_ENABLED
362 if (html) {
363 doc = htmlParseFile(filename, NULL);
364 } else {
365#endif /* LIBXML_HTML_ENABLED */
366 /*
367 * build an XML tree from a string;
368 */
369 if (push) {
370 FILE *f;
371
372 f = fopen(filename, "r");
373 if (f != NULL) {
374 int res, size = 3;
375 char chars[1024];
376 xmlParserCtxtPtr ctxt;
377
378 if (repeat)
379 size = 1024;
380 res = fread(chars, 1, 4, f);
381 if (res > 0) {
382 ctxt = xmlCreatePushParserCtxt(NULL, NULL,
383 chars, res, filename);
384 while ((res = fread(chars, 1, size, f)) > 0) {
385 xmlParseChunk(ctxt, chars, res, 0);
386 }
387 xmlParseChunk(ctxt, chars, 0, 1);
388 doc = ctxt->myDoc;
389 xmlFreeParserCtxt(ctxt);
390 }
391 }
Daniel Veillard5e873c42000-04-12 13:27:38 +0000392 } else if (testIO) {
393 int ret;
394 FILE *f;
395
396 f = fopen(filename, "r");
397 if (f != NULL) {
398 xmlParserCtxtPtr ctxt;
399
400 ctxt = xmlCreateIOParserCtxt(NULL, NULL,
401 (xmlInputReadCallback) myRead,
402 (xmlInputCloseCallback) myClose,
403 f, XML_CHAR_ENCODING_NONE);
404 xmlParseDocument(ctxt);
405
406 ret = ctxt->wellFormed;
407 doc = ctxt->myDoc;
408 xmlFreeParserCtxt(ctxt);
409 if (!ret) {
410 xmlFreeDoc(doc);
411 doc = NULL;
412 }
413 }
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000414 } else if (recovery) {
415 doc = xmlRecoverFile(filename);
416 } else if (htmlout) {
417 int ret;
418 xmlParserCtxtPtr ctxt;
419 xmlSAXHandler silent, *old;
420
421 ctxt = xmlCreateFileParserCtxt(filename);
422 memcpy(&silent, ctxt->sax, sizeof(silent));
423 old = ctxt->sax;
424 silent.error = xmlHTMLError;
425 if (xmlGetWarningsDefaultValue)
426 silent.warning = xmlHTMLWarning;
427 else
428 silent.warning = NULL;
429 silent.fatalError = xmlHTMLError;
430 ctxt->sax = &silent;
431 ctxt->vctxt.error = xmlHTMLValidityError;
432 if (xmlGetWarningsDefaultValue)
433 ctxt->vctxt.warning = xmlHTMLValidityWarning;
434 else
435 ctxt->vctxt.warning = NULL;
436
437 xmlParseDocument(ctxt);
438
439 ret = ctxt->wellFormed;
440 doc = ctxt->myDoc;
441 ctxt->sax = old;
442 xmlFreeParserCtxt(ctxt);
443 if (!ret) {
444 xmlFreeDoc(doc);
445 doc = NULL;
446 }
447 } else
448 doc = xmlParseFile(filename);
449#ifdef LIBXML_HTML_ENABLED
450 }
451#endif
452
453#ifdef LIBXML_DEBUG_ENABLED
454 /*
455 * shell interraction
456 */
457 if (shell)
458 xmlShell(doc, filename, xmlShellReadline, stdout);
459#endif
460
461 /*
462 * test intermediate copy if needed.
463 */
464 if (copy) {
465 tmp = doc;
466 doc = xmlCopyDoc(doc, 1);
467 xmlFreeDoc(tmp);
468 }
469
470 if ((insert) && (!html)) {
471 const xmlChar* list[256];
472 int nb, i;
473 xmlNodePtr node;
474
475 if (doc->children != NULL) {
476 node = doc->children;
477 while ((node != NULL) && (node->last == NULL)) node = node->next;
478 if (node != NULL) {
479 nb = xmlValidGetValidElements(node->last, NULL, list, 256);
480 if (nb < 0) {
481 printf("could not get valid list of elements\n");
482 } else if (nb == 0) {
483 printf("No element can be indersted under root\n");
484 } else {
485 printf("%d element types can be indersted under root:\n",
486 nb);
487 for (i = 0;i < nb;i++) {
488 printf("%s\n", list[i]);
489 }
490 }
491 }
492 }
493 }else if (noout == 0) {
494 /*
495 * print it.
496 */
497#ifdef LIBXML_DEBUG_ENABLED
498 if (!debug) {
499#endif
500 if (compress)
501 xmlSaveFile("-", doc);
502 else
503 xmlDocDump(stdout, doc);
504#ifdef LIBXML_DEBUG_ENABLED
505 } else
506 xmlDebugDumpDocument(stdout, doc);
507#endif
508 }
509
510 /*
511 * A posteriori validation test
512 */
513 if (postvalid) {
514 xmlValidCtxt cvp;
515 cvp.userData = (void *) stderr; cvp.error = (xmlValidityErrorFunc) fprintf; cvp.warning = (xmlValidityWarningFunc) fprintf;
516 xmlValidateDocument(&cvp, doc);
517 }
518
519#ifdef LIBXML_DEBUG_ENABLED
520 if ((debugent) && (!html))
521 xmlDebugDumpEntities(stdout, doc);
522#endif
523
524 /*
525 * free it.
526 */
527 xmlFreeDoc(doc);
528}
529
530int main(int argc, char **argv) {
531 int i, count;
532 int files = 0;
533
534 for (i = 1; i < argc ; i++) {
535#ifdef LIBXML_DEBUG_ENABLED
536 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
537 debug++;
538 else if ((!strcmp(argv[i], "-debugent")) || (!strcmp(argv[i], "--debugent")))
539 debugent++;
540 else if ((!strcmp(argv[i], "-shell")) ||
541 (!strcmp(argv[i], "--shell"))) {
542 shell++;
543 noout = 1;
544 } else
545#endif
546 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
547 copy++;
548 else if ((!strcmp(argv[i], "-recover")) ||
549 (!strcmp(argv[i], "--recover")))
550 recovery++;
551 else if ((!strcmp(argv[i], "-noent")) ||
552 (!strcmp(argv[i], "--noent")))
553 noent++;
554 else if ((!strcmp(argv[i], "-noout")) ||
555 (!strcmp(argv[i], "--noout")))
556 noout++;
557 else if ((!strcmp(argv[i], "-htmlout")) ||
558 (!strcmp(argv[i], "--htmlout")))
559 htmlout++;
560#ifdef LIBXML_HTML_ENABLED
561 else if ((!strcmp(argv[i], "-html")) ||
562 (!strcmp(argv[i], "--html"))) {
563 html++;
564 }
565#endif /* LIBXML_HTML_ENABLED */
566 else if ((!strcmp(argv[i], "-nowrap")) ||
567 (!strcmp(argv[i], "--nowrap")))
568 nowrap++;
569 else if ((!strcmp(argv[i], "-valid")) ||
570 (!strcmp(argv[i], "--valid")))
571 valid++;
572 else if ((!strcmp(argv[i], "-postvalid")) ||
573 (!strcmp(argv[i], "--postvalid")))
574 postvalid++;
575 else if ((!strcmp(argv[i], "-insert")) ||
576 (!strcmp(argv[i], "--insert")))
577 insert++;
578 else if ((!strcmp(argv[i], "-repeat")) ||
579 (!strcmp(argv[i], "--repeat")))
580 repeat++;
581 else if ((!strcmp(argv[i], "-push")) ||
582 (!strcmp(argv[i], "--push")))
583 push++;
Daniel Veillard5e873c42000-04-12 13:27:38 +0000584 else if ((!strcmp(argv[i], "-testIO")) ||
585 (!strcmp(argv[i], "--testIO")))
586 testIO++;
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000587 else if ((!strcmp(argv[i], "-compress")) ||
588 (!strcmp(argv[i], "--compress"))) {
589 compress++;
590 xmlSetCompressMode(9);
591 }
592 else if ((!strcmp(argv[i], "-nowarning")) ||
593 (!strcmp(argv[i], "--nowarning"))) {
594 xmlGetWarningsDefaultValue = 0;
595 }
596 else if ((!strcmp(argv[i], "-noblanks")) ||
597 (!strcmp(argv[i], "--noblanks"))) {
598 noblanks++;
599 xmlKeepBlanksDefault(0);
600 }
601 }
602 if (noent != 0) xmlSubstituteEntitiesDefault(1);
603 if (valid != 0) xmlDoValidityCheckingDefaultValue = 1;
604 if ((htmlout) && (!nowrap)) {
605 fprintf(stderr,
606 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n");
607 fprintf(stderr, "\t\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
608 fprintf(stderr,
609 "<html><head><title>%s output</title></head>\n",
610 argv[0]);
611 fprintf(stderr,
612 "<body bgcolor=\"#ffffff\"><h1 align=\"center\">%s output</h1>\n",
613 argv[0]);
614 }
615 for (i = 1; i < argc ; i++) {
616 if (argv[i][0] != '-') {
617 if (repeat) {
618 for (count = 0;count < 100 * repeat;count++)
619 parseAndPrintFile(argv[i]);
620 } else
621 parseAndPrintFile(argv[i]);
622 files ++;
623 }
624 }
625 if ((htmlout) && (!nowrap)) {
626 fprintf(stderr, "</body></html>\n");
627 }
628 if (files == 0) {
629 printf("Usage : %s [--debug] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
630 argv[0]);
631 printf("\tParse the XML files and output the result of the parsing\n");
632#ifdef LIBXML_DEBUG_ENABLED
633 printf("\t--debug : dump a debug tree of the in-memory document\n");
634 printf("\t--shell : run a navigating shell\n");
635 printf("\t--debugent : debug the entities defined in the document\n");
636#endif
637 printf("\t--copy : used to test the internal copy implementation\n");
638 printf("\t--recover : output what was parsable on broken XML documents\n");
639 printf("\t--noent : substitute entity references by their value\n");
640 printf("\t--noout : don't output the result tree\n");
641 printf("\t--htmlout : output results as HTML\n");
642 printf("\t--nowarp : do not put HTML doc wrapper\n");
643 printf("\t--valid : validate the document in addition to std well-formed check\n");
644 printf("\t--postvalid : do a posteriori validation, i.e after parsing\n");
645 printf("\t--repeat : repeat 100 times, for timing or profiling\n");
646 printf("\t--insert : ad-hoc test for valid insertions\n");
647 printf("\t--compress : turn on gzip compression of output\n");
648#ifdef LIBXML_HTML_ENABLED
649 printf("\t--html : use the HTML parser\n");
650#endif
651 printf("\t--push : use the push mode of the parser\n");
652 printf("\t--nowarning : do not emit warnings from parser/validator\n");
653 printf("\t--noblanks : drop (ignorable?) blanks spaces\n");
Daniel Veillard5e873c42000-04-12 13:27:38 +0000654 printf("\t--testIO : test user I/O support\n");
Daniel Veillardce8b83b2000-04-05 18:38:42 +0000655 }
656 xmlCleanupParser();
657 xmlMemoryDump();
658
659 return(0);
660}