blob: 84271c4c0e7365442c87da811f2a121100b3bb3a [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000015#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000016#include <string.h> /* for memset() only ! */
17
Daniel Veillard7f7d1111999-09-22 09:46:25 +000018#ifdef HAVE_CTYPE_H
19#include <ctype.h>
20#endif
21#ifdef HAVE_STDLIB_H
22#include <stdlib.h>
23#endif
24
Daniel Veillard6454aec1999-09-02 22:04:43 +000025#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000026#include "HTMLparser.h"
27#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000028#include "entities.h"
29#include "valid.h"
30
Daniel Veillarddbfd6411999-12-28 16:35:14 +000031static void
32htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
33
Daniel Veillard167b5091999-07-07 04:19:20 +000034/**
35 * htmlDtdDump:
36 * @buf: the HTML buffer output
37 * @doc: the document
38 *
39 * Dump the HTML document DTD, if any.
40 */
41static void
42htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
43 xmlDtdPtr cur = doc->intSubset;
44
45 if (cur == NULL) {
46 fprintf(stderr, "htmlDtdDump : no internal subset\n");
47 return;
48 }
49 xmlBufferWriteChar(buf, "<!DOCTYPE ");
50 xmlBufferWriteCHAR(buf, cur->name);
51 if (cur->ExternalID != NULL) {
52 xmlBufferWriteChar(buf, " PUBLIC ");
53 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000054 if (cur->SystemID != NULL) {
55 xmlBufferWriteChar(buf, " ");
56 xmlBufferWriteQuotedString(buf, cur->SystemID);
57 }
Daniel Veillard167b5091999-07-07 04:19:20 +000058 } else if (cur->SystemID != NULL) {
59 xmlBufferWriteChar(buf, " SYSTEM ");
60 xmlBufferWriteQuotedString(buf, cur->SystemID);
61 }
Daniel Veillard167b5091999-07-07 04:19:20 +000062 xmlBufferWriteChar(buf, ">\n");
63}
64
65/**
66 * htmlAttrDump:
67 * @buf: the HTML buffer output
68 * @doc: the document
69 * @cur: the attribute pointer
70 *
71 * Dump an HTML attribute
72 */
73static void
74htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000075 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000076
77 if (cur == NULL) {
78 fprintf(stderr, "htmlAttrDump : property == NULL\n");
79 return;
80 }
81 xmlBufferWriteChar(buf, " ");
82 xmlBufferWriteCHAR(buf, cur->name);
83 value = xmlNodeListGetString(doc, cur->val, 0);
84 if (value) {
85 xmlBufferWriteChar(buf, "=");
86 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000087 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000088 } else {
89 xmlBufferWriteChar(buf, "=\"\"");
90 }
91}
92
93/**
94 * htmlAttrListDump:
95 * @buf: the HTML buffer output
96 * @doc: the document
97 * @cur: the first attribute pointer
98 *
99 * Dump a list of HTML attributes
100 */
101static void
102htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
103 if (cur == NULL) {
104 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
105 return;
106 }
107 while (cur != NULL) {
108 htmlAttrDump(buf, doc, cur);
109 cur = cur->next;
110 }
111}
112
113
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000114void
Daniel Veillard82150d81999-07-07 07:32:15 +0000115htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000116/**
117 * htmlNodeListDump:
118 * @buf: the HTML buffer output
119 * @doc: the document
120 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000121 *
122 * Dump an HTML node list, recursive behaviour,children are printed too.
123 */
124static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000125htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000126 if (cur == NULL) {
127 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
128 return;
129 }
130 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000131 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000132 cur = cur->next;
133 }
134}
135
136/**
137 * htmlNodeDump:
138 * @buf: the HTML buffer output
139 * @doc: the document
140 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000141 *
142 * Dump an HTML node, recursive behaviour,children are printed too.
143 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000144void
Daniel Veillard82150d81999-07-07 07:32:15 +0000145htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000146 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000147
148 if (cur == NULL) {
149 fprintf(stderr, "htmlNodeDump : node == NULL\n");
150 return;
151 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000152 /*
153 * Special cases.
154 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000155 if (cur->type == XML_HTML_DOCUMENT_NODE) {
156 htmlDocContentDump(buf, (xmlDocPtr) cur);
157 return;
158 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000159 if (cur->type == HTML_TEXT_NODE) {
160 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000161 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000162
Daniel Veillard82150d81999-07-07 07:32:15 +0000163 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000164#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000165 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000166#else
167 buffer = xmlEncodeEntitiesReentrant(doc,
168 xmlBufferContent(cur->content));
169#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000170 if (buffer != NULL) {
171 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000172 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000173 }
174 }
175 return;
176 }
177 if (cur->type == HTML_COMMENT_NODE) {
178 if (cur->content != NULL) {
179 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000180#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000181 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000182#else
183 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
184#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000185 xmlBufferWriteChar(buf, "-->");
186 }
187 return;
188 }
189 if (cur->type == HTML_ENTITY_REF_NODE) {
190 xmlBufferWriteChar(buf, "&");
191 xmlBufferWriteCHAR(buf, cur->name);
192 xmlBufferWriteChar(buf, ";");
193 return;
194 }
195
Daniel Veillard82150d81999-07-07 07:32:15 +0000196 /*
197 * Get specific HTmL info for taht node.
198 */
199 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000200
Daniel Veillard82150d81999-07-07 07:32:15 +0000201 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000202 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000203 if (cur->properties != NULL)
204 htmlAttrListDump(buf, doc, cur->properties);
205
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000206 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000207 xmlBufferWriteChar(buf, ">");
208 if (cur->next != NULL) {
209 if ((cur->next->type != HTML_TEXT_NODE) &&
210 (cur->next->type != HTML_ENTITY_REF_NODE))
211 xmlBufferWriteChar(buf, "\n");
212 }
213 return;
214 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000215 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000216 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000217 xmlBufferWriteChar(buf, ">");
218 else {
219 xmlBufferWriteChar(buf, "></");
220 xmlBufferWriteCHAR(buf, cur->name);
221 xmlBufferWriteChar(buf, ">");
222 }
223 if (cur->next != NULL) {
224 if ((cur->next->type != HTML_TEXT_NODE) &&
225 (cur->next->type != HTML_ENTITY_REF_NODE))
226 xmlBufferWriteChar(buf, "\n");
227 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000228 return;
229 }
230 xmlBufferWriteChar(buf, ">");
231 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000232 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000233
Daniel Veillardd293fd11999-12-01 09:51:45 +0000234#ifndef XML_USE_BUFFER_CONTENT
235 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
236#else
237 buffer = xmlEncodeEntitiesReentrant(doc,
238 xmlBufferContent(cur->content));
239#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000240 if (buffer != NULL) {
241 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000242 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000243 }
244 }
245 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000246 if ((cur->childs->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000247 (cur->childs->type != HTML_ENTITY_REF_NODE) &&
248 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000249 xmlBufferWriteChar(buf, "\n");
250 htmlNodeListDump(buf, doc, cur->childs);
251 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000252 (cur->last->type != HTML_ENTITY_REF_NODE) &&
253 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000254 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000255 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000256 if (!htmlIsAutoClosed(doc, cur)) {
257 xmlBufferWriteChar(buf, "</");
258 xmlBufferWriteCHAR(buf, cur->name);
259 xmlBufferWriteChar(buf, ">");
260 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000261 if (cur->next != NULL) {
262 if ((cur->next->type != HTML_TEXT_NODE) &&
263 (cur->next->type != HTML_ENTITY_REF_NODE))
264 xmlBufferWriteChar(buf, "\n");
265 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000266}
267
268/**
269 * htmlDocContentDump:
270 * @buf: the HTML buffer output
271 * @cur: the document
272 *
273 * Dump an HTML document.
274 */
275static void
276htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000277 int type;
278
279 /*
280 * force to output the stuff as HTML, especially for entities
281 */
282 type = cur->type;
283 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000284 if (cur->intSubset != NULL)
285 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000286 else {
287 /* Default to HTML-4.0 transitionnal @@@@ */
288 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
289
290 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000291 if (cur->root != NULL) {
Daniel Veillard35008381999-10-25 13:15:52 +0000292 htmlNodeListDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000293 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000294 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000295 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000296}
297
298/**
299 * htmlDocDumpMemory:
300 * @cur: the document
301 * @mem: OUT: the memory pointer
302 * @size: OUT: the memory lenght
303 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000304 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000305 * It's up to the caller to free the memory.
306 */
307void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000308htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000309 xmlBufferPtr buf;
310
311 if (cur == NULL) {
312#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000313 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000314#endif
315 *mem = NULL;
316 *size = 0;
317 return;
318 }
319 buf = xmlBufferCreate();
320 if (buf == NULL) {
321 *mem = NULL;
322 *size = 0;
323 return;
324 }
325 htmlDocContentDump(buf, cur);
326 *mem = buf->content;
327 *size = buf->use;
328 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000329 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000330}
331
332
333/**
334 * htmlDocDump:
335 * @f: the FILE*
336 * @cur: the document
337 *
338 * Dump an HTML document to an open FILE.
339 */
340void
341htmlDocDump(FILE *f, xmlDocPtr cur) {
342 xmlBufferPtr buf;
343
344 if (cur == NULL) {
345#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000346 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000347#endif
348 return;
349 }
350 buf = xmlBufferCreate();
351 if (buf == NULL) return;
352 htmlDocContentDump(buf, cur);
353 xmlBufferDump(f, buf);
354 xmlBufferFree(buf);
355}
356
357/**
358 * htmlSaveFile:
359 * @filename: the filename
360 * @cur: the document
361 *
362 * Dump an HTML document to a file.
363 *
364 * returns: the number of byte written or -1 in case of failure.
365 */
366int
367htmlSaveFile(const char *filename, xmlDocPtr cur) {
368 xmlBufferPtr buf;
369 FILE *output = NULL;
370 int ret;
371
372 /*
373 * save the content to a temp buffer.
374 */
375 buf = xmlBufferCreate();
376 if (buf == NULL) return(0);
377 htmlDocContentDump(buf, cur);
378
379 output = fopen(filename, "w");
380 if (output == NULL) return(-1);
381 ret = xmlBufferDump(output, buf);
382 fclose(output);
383
384 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000385 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000386}
387