blob: 4e21c0fcfadac6a37b98f1765e29bbbc92f7bc0f [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
10#ifndef WIN32
Daniel Veillard167b5091999-07-07 04:19:20 +000011#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000014#include <string.h> /* for memset() only ! */
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
Daniel Veillard6454aec1999-09-02 22:04:43 +000023#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000024#include "HTMLparser.h"
25#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000026#include "entities.h"
27#include "valid.h"
28
Daniel Veillard167b5091999-07-07 04:19:20 +000029/**
30 * htmlDtdDump:
31 * @buf: the HTML buffer output
32 * @doc: the document
33 *
34 * Dump the HTML document DTD, if any.
35 */
36static void
37htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38 xmlDtdPtr cur = doc->intSubset;
39
40 if (cur == NULL) {
41 fprintf(stderr, "htmlDtdDump : no internal subset\n");
42 return;
43 }
44 xmlBufferWriteChar(buf, "<!DOCTYPE ");
45 xmlBufferWriteCHAR(buf, cur->name);
46 if (cur->ExternalID != NULL) {
47 xmlBufferWriteChar(buf, " PUBLIC ");
48 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000049 if (cur->SystemID != NULL) {
50 xmlBufferWriteChar(buf, " ");
51 xmlBufferWriteQuotedString(buf, cur->SystemID);
52 }
Daniel Veillard167b5091999-07-07 04:19:20 +000053 } else if (cur->SystemID != NULL) {
54 xmlBufferWriteChar(buf, " SYSTEM ");
55 xmlBufferWriteQuotedString(buf, cur->SystemID);
56 }
Daniel Veillard167b5091999-07-07 04:19:20 +000057 xmlBufferWriteChar(buf, ">\n");
58}
59
60/**
61 * htmlAttrDump:
62 * @buf: the HTML buffer output
63 * @doc: the document
64 * @cur: the attribute pointer
65 *
66 * Dump an HTML attribute
67 */
68static void
69htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000070 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000071
72 if (cur == NULL) {
73 fprintf(stderr, "htmlAttrDump : property == NULL\n");
74 return;
75 }
76 xmlBufferWriteChar(buf, " ");
77 xmlBufferWriteCHAR(buf, cur->name);
78 value = xmlNodeListGetString(doc, cur->val, 0);
79 if (value) {
80 xmlBufferWriteChar(buf, "=");
81 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000082 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000083 } else {
84 xmlBufferWriteChar(buf, "=\"\"");
85 }
86}
87
88/**
89 * htmlAttrListDump:
90 * @buf: the HTML buffer output
91 * @doc: the document
92 * @cur: the first attribute pointer
93 *
94 * Dump a list of HTML attributes
95 */
96static void
97htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98 if (cur == NULL) {
99 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100 return;
101 }
102 while (cur != NULL) {
103 htmlAttrDump(buf, doc, cur);
104 cur = cur->next;
105 }
106}
107
108
109static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000110htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000111/**
112 * htmlNodeListDump:
113 * @buf: the HTML buffer output
114 * @doc: the document
115 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000116 *
117 * Dump an HTML node list, recursive behaviour,children are printed too.
118 */
119static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000120htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000121 if (cur == NULL) {
122 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123 return;
124 }
125 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000126 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 cur = cur->next;
128 }
129}
130
131/**
132 * htmlNodeDump:
133 * @buf: the HTML buffer output
134 * @doc: the document
135 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000136 *
137 * Dump an HTML node, recursive behaviour,children are printed too.
138 */
139static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000140htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000141 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000142
143 if (cur == NULL) {
144 fprintf(stderr, "htmlNodeDump : node == NULL\n");
145 return;
146 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000147 /*
148 * Special cases.
149 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000150 if (cur->type == HTML_TEXT_NODE) {
151 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000152 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000153
Daniel Veillard82150d81999-07-07 07:32:15 +0000154 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillard167b5091999-07-07 04:19:20 +0000155 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
156 if (buffer != NULL) {
157 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000158 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000159 }
160 }
161 return;
162 }
163 if (cur->type == HTML_COMMENT_NODE) {
164 if (cur->content != NULL) {
165 xmlBufferWriteChar(buf, "<!--");
166 xmlBufferWriteCHAR(buf, cur->content);
167 xmlBufferWriteChar(buf, "-->");
168 }
169 return;
170 }
171 if (cur->type == HTML_ENTITY_REF_NODE) {
172 xmlBufferWriteChar(buf, "&");
173 xmlBufferWriteCHAR(buf, cur->name);
174 xmlBufferWriteChar(buf, ";");
175 return;
176 }
177
Daniel Veillard82150d81999-07-07 07:32:15 +0000178 /*
179 * Get specific HTmL info for taht node.
180 */
181 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000182
Daniel Veillard82150d81999-07-07 07:32:15 +0000183 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000184 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000185 if (cur->properties != NULL)
186 htmlAttrListDump(buf, doc, cur->properties);
187
Daniel Veillard82150d81999-07-07 07:32:15 +0000188 if (info->empty) {
189 xmlBufferWriteChar(buf, ">");
190 if (cur->next != NULL) {
191 if ((cur->next->type != HTML_TEXT_NODE) &&
192 (cur->next->type != HTML_ENTITY_REF_NODE))
193 xmlBufferWriteChar(buf, "\n");
194 }
195 return;
196 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000197 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000198 if (info->endTag != 0)
199 xmlBufferWriteChar(buf, ">");
200 else {
201 xmlBufferWriteChar(buf, "></");
202 xmlBufferWriteCHAR(buf, cur->name);
203 xmlBufferWriteChar(buf, ">");
204 }
205 if (cur->next != NULL) {
206 if ((cur->next->type != HTML_TEXT_NODE) &&
207 (cur->next->type != HTML_ENTITY_REF_NODE))
208 xmlBufferWriteChar(buf, "\n");
209 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000210 return;
211 }
212 xmlBufferWriteChar(buf, ">");
213 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000214 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000215
216 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
217 if (buffer != NULL) {
218 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000219 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000220 }
221 }
222 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000223 if ((cur->childs->type != HTML_TEXT_NODE) &&
224 (cur->childs->type != HTML_ENTITY_REF_NODE))
225 xmlBufferWriteChar(buf, "\n");
226 htmlNodeListDump(buf, doc, cur->childs);
227 if ((cur->last->type != HTML_TEXT_NODE) &&
228 (cur->last->type != HTML_ENTITY_REF_NODE))
229 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000230 }
231 xmlBufferWriteChar(buf, "</");
Daniel Veillard167b5091999-07-07 04:19:20 +0000232 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard82150d81999-07-07 07:32:15 +0000233 xmlBufferWriteChar(buf, ">");
234 if (cur->next != NULL) {
235 if ((cur->next->type != HTML_TEXT_NODE) &&
236 (cur->next->type != HTML_ENTITY_REF_NODE))
237 xmlBufferWriteChar(buf, "\n");
238 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000239}
240
241/**
242 * htmlDocContentDump:
243 * @buf: the HTML buffer output
244 * @cur: the document
245 *
246 * Dump an HTML document.
247 */
248static void
249htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000250 if (cur->intSubset != NULL)
251 htmlDtdDump(buf, cur);
252 if (cur->root != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000253 htmlNodeDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000254 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000255 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000256}
257
258/**
259 * htmlDocDumpMemory:
260 * @cur: the document
261 * @mem: OUT: the memory pointer
262 * @size: OUT: the memory lenght
263 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000264 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000265 * It's up to the caller to free the memory.
266 */
267void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000268htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000269 xmlBufferPtr buf;
270
271 if (cur == NULL) {
272#ifdef DEBUG_TREE
273 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
274#endif
275 *mem = NULL;
276 *size = 0;
277 return;
278 }
279 buf = xmlBufferCreate();
280 if (buf == NULL) {
281 *mem = NULL;
282 *size = 0;
283 return;
284 }
285 htmlDocContentDump(buf, cur);
286 *mem = buf->content;
287 *size = buf->use;
288 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000289 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000290}
291
292
293/**
294 * htmlDocDump:
295 * @f: the FILE*
296 * @cur: the document
297 *
298 * Dump an HTML document to an open FILE.
299 */
300void
301htmlDocDump(FILE *f, xmlDocPtr cur) {
302 xmlBufferPtr buf;
303
304 if (cur == NULL) {
305#ifdef DEBUG_TREE
306 fprintf(stderr, "xmlDocDump : document == NULL\n");
307#endif
308 return;
309 }
310 buf = xmlBufferCreate();
311 if (buf == NULL) return;
312 htmlDocContentDump(buf, cur);
313 xmlBufferDump(f, buf);
314 xmlBufferFree(buf);
315}
316
317/**
318 * htmlSaveFile:
319 * @filename: the filename
320 * @cur: the document
321 *
322 * Dump an HTML document to a file.
323 *
324 * returns: the number of byte written or -1 in case of failure.
325 */
326int
327htmlSaveFile(const char *filename, xmlDocPtr cur) {
328 xmlBufferPtr buf;
329 FILE *output = NULL;
330 int ret;
331
332 /*
333 * save the content to a temp buffer.
334 */
335 buf = xmlBufferCreate();
336 if (buf == NULL) return(0);
337 htmlDocContentDump(buf, cur);
338
339 output = fopen(filename, "w");
340 if (output == NULL) return(-1);
341 ret = xmlBufferDump(output, buf);
342 fclose(output);
343
344 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000345 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000346}
347