blob: fe9c0ae313cc54e056e29ac67e69830e900eefe7 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000015#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000016#include <string.h> /* for memset() only ! */
17
Daniel Veillard7f7d1111999-09-22 09:46:25 +000018#ifdef HAVE_CTYPE_H
19#include <ctype.h>
20#endif
21#ifdef HAVE_STDLIB_H
22#include <stdlib.h>
23#endif
24
Daniel Veillard6454aec1999-09-02 22:04:43 +000025#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000026#include "HTMLparser.h"
27#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000028#include "entities.h"
29#include "valid.h"
30
Daniel Veillarddbfd6411999-12-28 16:35:14 +000031static void
32htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
33
Daniel Veillard167b5091999-07-07 04:19:20 +000034/**
35 * htmlDtdDump:
36 * @buf: the HTML buffer output
37 * @doc: the document
38 *
39 * Dump the HTML document DTD, if any.
40 */
41static void
42htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
43 xmlDtdPtr cur = doc->intSubset;
44
45 if (cur == NULL) {
46 fprintf(stderr, "htmlDtdDump : no internal subset\n");
47 return;
48 }
49 xmlBufferWriteChar(buf, "<!DOCTYPE ");
50 xmlBufferWriteCHAR(buf, cur->name);
51 if (cur->ExternalID != NULL) {
52 xmlBufferWriteChar(buf, " PUBLIC ");
53 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000054 if (cur->SystemID != NULL) {
55 xmlBufferWriteChar(buf, " ");
56 xmlBufferWriteQuotedString(buf, cur->SystemID);
57 }
Daniel Veillard167b5091999-07-07 04:19:20 +000058 } else if (cur->SystemID != NULL) {
59 xmlBufferWriteChar(buf, " SYSTEM ");
60 xmlBufferWriteQuotedString(buf, cur->SystemID);
61 }
Daniel Veillard167b5091999-07-07 04:19:20 +000062 xmlBufferWriteChar(buf, ">\n");
63}
64
65/**
66 * htmlAttrDump:
67 * @buf: the HTML buffer output
68 * @doc: the document
69 * @cur: the attribute pointer
70 *
71 * Dump an HTML attribute
72 */
73static void
74htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000075 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000076
77 if (cur == NULL) {
78 fprintf(stderr, "htmlAttrDump : property == NULL\n");
79 return;
80 }
81 xmlBufferWriteChar(buf, " ");
82 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardcf461992000-03-14 18:30:20 +000083 value = xmlNodeListGetString(doc, cur->children, 0);
Daniel Veillard167b5091999-07-07 04:19:20 +000084 if (value) {
85 xmlBufferWriteChar(buf, "=");
86 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000087 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000088 } else {
89 xmlBufferWriteChar(buf, "=\"\"");
90 }
91}
92
93/**
94 * htmlAttrListDump:
95 * @buf: the HTML buffer output
96 * @doc: the document
97 * @cur: the first attribute pointer
98 *
99 * Dump a list of HTML attributes
100 */
101static void
102htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
103 if (cur == NULL) {
104 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
105 return;
106 }
107 while (cur != NULL) {
108 htmlAttrDump(buf, doc, cur);
109 cur = cur->next;
110 }
111}
112
113
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000114void
Daniel Veillard82150d81999-07-07 07:32:15 +0000115htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000116/**
117 * htmlNodeListDump:
118 * @buf: the HTML buffer output
119 * @doc: the document
120 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000121 *
122 * Dump an HTML node list, recursive behaviour,children are printed too.
123 */
124static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000125htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000126 if (cur == NULL) {
127 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
128 return;
129 }
130 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000131 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000132 cur = cur->next;
133 }
134}
135
136/**
137 * htmlNodeDump:
138 * @buf: the HTML buffer output
139 * @doc: the document
140 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000141 *
142 * Dump an HTML node, recursive behaviour,children are printed too.
143 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000144void
Daniel Veillard82150d81999-07-07 07:32:15 +0000145htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000146 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000147
148 if (cur == NULL) {
149 fprintf(stderr, "htmlNodeDump : node == NULL\n");
150 return;
151 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000152 /*
153 * Special cases.
154 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000155 if (cur->type == XML_HTML_DOCUMENT_NODE) {
156 htmlDocContentDump(buf, (xmlDocPtr) cur);
157 return;
158 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000159 if (cur->type == HTML_TEXT_NODE) {
160 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000161 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000162
Daniel Veillard82150d81999-07-07 07:32:15 +0000163 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000164#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000165 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000166#else
167 buffer = xmlEncodeEntitiesReentrant(doc,
168 xmlBufferContent(cur->content));
169#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000170 if (buffer != NULL) {
171 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000172 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000173 }
174 }
175 return;
176 }
177 if (cur->type == HTML_COMMENT_NODE) {
178 if (cur->content != NULL) {
179 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000180#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000181 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000182#else
183 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
184#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000185 xmlBufferWriteChar(buf, "-->");
186 }
187 return;
188 }
189 if (cur->type == HTML_ENTITY_REF_NODE) {
190 xmlBufferWriteChar(buf, "&");
191 xmlBufferWriteCHAR(buf, cur->name);
192 xmlBufferWriteChar(buf, ";");
193 return;
194 }
195
Daniel Veillard82150d81999-07-07 07:32:15 +0000196 /*
197 * Get specific HTmL info for taht node.
198 */
199 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000200
Daniel Veillard82150d81999-07-07 07:32:15 +0000201 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000202 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000203 if (cur->properties != NULL)
204 htmlAttrListDump(buf, doc, cur->properties);
205
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000206 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000207 xmlBufferWriteChar(buf, ">");
208 if (cur->next != NULL) {
209 if ((cur->next->type != HTML_TEXT_NODE) &&
210 (cur->next->type != HTML_ENTITY_REF_NODE))
211 xmlBufferWriteChar(buf, "\n");
212 }
213 return;
214 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000215 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000216 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000217 xmlBufferWriteChar(buf, ">");
218 else {
219 xmlBufferWriteChar(buf, "></");
220 xmlBufferWriteCHAR(buf, cur->name);
221 xmlBufferWriteChar(buf, ">");
222 }
223 if (cur->next != NULL) {
224 if ((cur->next->type != HTML_TEXT_NODE) &&
225 (cur->next->type != HTML_ENTITY_REF_NODE))
226 xmlBufferWriteChar(buf, "\n");
227 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000228 return;
229 }
230 xmlBufferWriteChar(buf, ">");
231 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000232 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000233
Daniel Veillardd293fd11999-12-01 09:51:45 +0000234#ifndef XML_USE_BUFFER_CONTENT
235 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
236#else
237 buffer = xmlEncodeEntitiesReentrant(doc,
238 xmlBufferContent(cur->content));
239#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000240 if (buffer != NULL) {
241 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000242 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000243 }
244 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000245 if (cur->children != NULL) {
246 if ((cur->children->type != HTML_TEXT_NODE) &&
247 (cur->children->type != HTML_ENTITY_REF_NODE) &&
248 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000249 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000250 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000251 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000252 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000253 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000254 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000255 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000256 if (!htmlIsAutoClosed(doc, cur)) {
257 xmlBufferWriteChar(buf, "</");
258 xmlBufferWriteCHAR(buf, cur->name);
259 xmlBufferWriteChar(buf, ">");
260 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000261 if (cur->next != NULL) {
262 if ((cur->next->type != HTML_TEXT_NODE) &&
263 (cur->next->type != HTML_ENTITY_REF_NODE))
264 xmlBufferWriteChar(buf, "\n");
265 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000266}
267
268/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000269 * htmlNodeDumpFile:
270 * @out: the FILE pointer
271 * @doc: the document
272 * @cur: the current node
273 *
274 * Dump an HTML node, recursive behaviour,children are printed too.
275 */
276void
277htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
278 xmlBufferPtr buf;
279
280 buf = xmlBufferCreate();
281 if (buf == NULL) return;
282 htmlNodeDump(buf, doc, cur);
283 xmlBufferDump(out, buf);
284 xmlBufferFree(buf);
285}
286
287/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000288 * htmlDocContentDump:
289 * @buf: the HTML buffer output
290 * @cur: the document
291 *
292 * Dump an HTML document.
293 */
294static void
295htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000296 int type;
297
298 /*
299 * force to output the stuff as HTML, especially for entities
300 */
301 type = cur->type;
302 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000303 if (cur->intSubset != NULL)
304 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000305 else {
306 /* Default to HTML-4.0 transitionnal @@@@ */
307 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
308
309 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000310 if (cur->children != NULL) {
311 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000312 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000313 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000314 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000315}
316
317/**
318 * htmlDocDumpMemory:
319 * @cur: the document
320 * @mem: OUT: the memory pointer
321 * @size: OUT: the memory lenght
322 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000323 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000324 * It's up to the caller to free the memory.
325 */
326void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000327htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000328 xmlBufferPtr buf;
329
330 if (cur == NULL) {
331#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000332 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000333#endif
334 *mem = NULL;
335 *size = 0;
336 return;
337 }
338 buf = xmlBufferCreate();
339 if (buf == NULL) {
340 *mem = NULL;
341 *size = 0;
342 return;
343 }
344 htmlDocContentDump(buf, cur);
345 *mem = buf->content;
346 *size = buf->use;
347 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000348 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000349}
350
351
352/**
353 * htmlDocDump:
354 * @f: the FILE*
355 * @cur: the document
356 *
357 * Dump an HTML document to an open FILE.
358 */
359void
360htmlDocDump(FILE *f, xmlDocPtr cur) {
361 xmlBufferPtr buf;
362
363 if (cur == NULL) {
364#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000365 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000366#endif
367 return;
368 }
369 buf = xmlBufferCreate();
370 if (buf == NULL) return;
371 htmlDocContentDump(buf, cur);
372 xmlBufferDump(f, buf);
373 xmlBufferFree(buf);
374}
375
376/**
377 * htmlSaveFile:
378 * @filename: the filename
379 * @cur: the document
380 *
381 * Dump an HTML document to a file.
382 *
383 * returns: the number of byte written or -1 in case of failure.
384 */
385int
386htmlSaveFile(const char *filename, xmlDocPtr cur) {
387 xmlBufferPtr buf;
388 FILE *output = NULL;
389 int ret;
390
391 /*
392 * save the content to a temp buffer.
393 */
394 buf = xmlBufferCreate();
395 if (buf == NULL) return(0);
396 htmlDocContentDump(buf, cur);
397
398 output = fopen(filename, "w");
399 if (output == NULL) return(-1);
400 ret = xmlBufferDump(output, buf);
401 fclose(output);
402
403 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000404 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000405}
406