Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 1 | /* |
| 2 | * testHTML.c : a small tester program for HTML input. |
| 3 | * |
| 4 | * See Copyright for the status of this software. |
| 5 | * |
| 6 | * Daniel.Veillard@w3.org |
| 7 | */ |
| 8 | |
| 9 | #ifdef WIN32 |
| 10 | #define HAVE_FCNTL_H |
| 11 | #include <io.h> |
| 12 | #else |
Daniel Veillard | 7f7d111 | 1999-09-22 09:46:25 +0000 | [diff] [blame^] | 13 | #include "config.h" |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 14 | #endif |
Daniel Veillard | 7f7d111 | 1999-09-22 09:46:25 +0000 | [diff] [blame^] | 15 | |
| 16 | #include <stdio.h> |
| 17 | #include <string.h> |
| 18 | |
| 19 | #ifdef HAVE_SYS_TYPES_H |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 20 | #include <sys/types.h> |
Daniel Veillard | 7f7d111 | 1999-09-22 09:46:25 +0000 | [diff] [blame^] | 21 | #endif |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 22 | #ifdef HAVE_SYS_STAT_H |
| 23 | #include <sys/stat.h> |
| 24 | #endif |
| 25 | #ifdef HAVE_FCNTL_H |
| 26 | #include <fcntl.h> |
| 27 | #endif |
| 28 | #ifdef HAVE_UNISTD_H |
| 29 | #include <unistd.h> |
| 30 | #endif |
Daniel Veillard | 7f7d111 | 1999-09-22 09:46:25 +0000 | [diff] [blame^] | 31 | #ifdef HAVE_STDLIB_H |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 32 | #include <stdlib.h> |
Daniel Veillard | 7f7d111 | 1999-09-22 09:46:25 +0000 | [diff] [blame^] | 33 | #endif |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 34 | |
| 35 | #include "HTMLparser.h" |
Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 36 | #include "HTMLtree.h" |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 37 | #include "debugXML.h" |
| 38 | |
| 39 | static int debug = 0; |
| 40 | static int copy = 0; |
| 41 | |
| 42 | /* |
| 43 | * Note: this is perfectly clean HTML, i.e. not a useful test. |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 44 | static CHAR buffer[] = |
| 45 | "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\ |
| 46 | \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\ |
| 47 | <html>\n\ |
| 48 | <head>\n\ |
| 49 | <title>This service is temporary down</title>\n\ |
| 50 | </head>\n\ |
| 51 | \n\ |
| 52 | <body bgcolor=\"#FFFFFF\">\n\ |
| 53 | <h1 align=\"center\">Sorry, this service is temporary down</h1>\n\ |
| 54 | We are doing our best to get it back on-line,\n\ |
| 55 | \n\ |
| 56 | <p>The W3C system administrators</p>\n\ |
| 57 | </body>\n\ |
| 58 | </html>\n\ |
| 59 | "; |
Daniel Veillard | b96e643 | 1999-08-29 21:02:19 +0000 | [diff] [blame] | 60 | */ |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 61 | |
| 62 | /************************************************************************ |
| 63 | * * |
| 64 | * Debug * |
| 65 | * * |
| 66 | ************************************************************************/ |
| 67 | |
| 68 | void parseAndPrintFile(char *filename) { |
| 69 | htmlDocPtr doc, tmp; |
| 70 | |
| 71 | /* |
| 72 | * build an HTML tree from a string; |
| 73 | */ |
| 74 | doc = htmlParseFile(filename, NULL); |
| 75 | |
| 76 | /* |
| 77 | * test intermediate copy if needed. |
| 78 | */ |
| 79 | if (copy) { |
| 80 | tmp = doc; |
| 81 | doc = xmlCopyDoc(doc, 1); |
| 82 | xmlFreeDoc(tmp); |
| 83 | } |
| 84 | |
| 85 | /* |
| 86 | * print it. |
| 87 | */ |
| 88 | if (!debug) |
Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 89 | htmlDocDump(stdout, doc); |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 90 | else |
| 91 | xmlDebugDumpDocument(stdout, doc); |
| 92 | |
| 93 | /* |
| 94 | * free it. |
| 95 | */ |
| 96 | xmlFreeDoc(doc); |
| 97 | } |
| 98 | |
| 99 | void parseAndPrintBuffer(CHAR *buf) { |
| 100 | htmlDocPtr doc, tmp; |
| 101 | |
| 102 | /* |
| 103 | * build an HTML tree from a string; |
| 104 | */ |
| 105 | doc = htmlParseDoc(buf, NULL); |
| 106 | |
| 107 | /* |
| 108 | * test intermediate copy if needed. |
| 109 | */ |
| 110 | if (copy) { |
| 111 | tmp = doc; |
| 112 | doc = xmlCopyDoc(doc, 1); |
| 113 | xmlFreeDoc(tmp); |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * print it. |
| 118 | */ |
| 119 | if (!debug) |
Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 120 | htmlDocDump(stdout, doc); |
Daniel Veillard | be70ff7 | 1999-07-05 16:50:46 +0000 | [diff] [blame] | 121 | else |
| 122 | xmlDebugDumpDocument(stdout, doc); |
| 123 | |
| 124 | /* |
| 125 | * free it. |
| 126 | */ |
| 127 | xmlFreeDoc(doc); |
| 128 | } |
| 129 | |
| 130 | int main(int argc, char **argv) { |
| 131 | int i; |
| 132 | int files = 0; |
| 133 | |
| 134 | for (i = 1; i < argc ; i++) { |
| 135 | if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) |
| 136 | debug++; |
| 137 | else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy"))) |
| 138 | copy++; |
| 139 | } |
| 140 | for (i = 1; i < argc ; i++) { |
| 141 | if (argv[i][0] != '-') { |
| 142 | parseAndPrintFile(argv[i]); |
| 143 | files ++; |
| 144 | } |
| 145 | } |
| 146 | if (files == 0) { |
| 147 | printf("Usage : %s [--debug] [--copy] HTMLfiles ...\n", |
| 148 | argv[0]); |
| 149 | printf("\tParse the HTML files and output the result of the parsing\n"); |
| 150 | printf("\t--debug : dump a debug tree of the in-memory document\n"); |
| 151 | printf("\t--copy : used to test the internal copy implementation\n"); |
| 152 | } |
| 153 | |
| 154 | return(0); |
| 155 | } |