blob: 2a05b3ed6819cccf4f815badfac72f729445773c [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#include "config.h"
10#include <stdio.h>
11#include <ctype.h>
12#include <stdlib.h>
13#include <string.h> /* for memset() only ! */
14
Daniel Veillard82150d81999-07-07 07:32:15 +000015#include "HTMLparser.h"
16#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000017#include "entities.h"
18#include "valid.h"
19
Daniel Veillard167b5091999-07-07 04:19:20 +000020/**
21 * htmlDtdDump:
22 * @buf: the HTML buffer output
23 * @doc: the document
24 *
25 * Dump the HTML document DTD, if any.
26 */
27static void
28htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
29 xmlDtdPtr cur = doc->intSubset;
30
31 if (cur == NULL) {
32 fprintf(stderr, "htmlDtdDump : no internal subset\n");
33 return;
34 }
35 xmlBufferWriteChar(buf, "<!DOCTYPE ");
36 xmlBufferWriteCHAR(buf, cur->name);
37 if (cur->ExternalID != NULL) {
38 xmlBufferWriteChar(buf, " PUBLIC ");
39 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000040 if (cur->SystemID != NULL) {
41 xmlBufferWriteChar(buf, " ");
42 xmlBufferWriteQuotedString(buf, cur->SystemID);
43 }
Daniel Veillard167b5091999-07-07 04:19:20 +000044 } else if (cur->SystemID != NULL) {
45 xmlBufferWriteChar(buf, " SYSTEM ");
46 xmlBufferWriteQuotedString(buf, cur->SystemID);
47 }
Daniel Veillard167b5091999-07-07 04:19:20 +000048 xmlBufferWriteChar(buf, ">\n");
49}
50
51/**
52 * htmlAttrDump:
53 * @buf: the HTML buffer output
54 * @doc: the document
55 * @cur: the attribute pointer
56 *
57 * Dump an HTML attribute
58 */
59static void
60htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
61 CHAR *value;
62
63 if (cur == NULL) {
64 fprintf(stderr, "htmlAttrDump : property == NULL\n");
65 return;
66 }
67 xmlBufferWriteChar(buf, " ");
68 xmlBufferWriteCHAR(buf, cur->name);
69 value = xmlNodeListGetString(doc, cur->val, 0);
70 if (value) {
71 xmlBufferWriteChar(buf, "=");
72 xmlBufferWriteQuotedString(buf, value);
73 free(value);
74 } else {
75 xmlBufferWriteChar(buf, "=\"\"");
76 }
77}
78
79/**
80 * htmlAttrListDump:
81 * @buf: the HTML buffer output
82 * @doc: the document
83 * @cur: the first attribute pointer
84 *
85 * Dump a list of HTML attributes
86 */
87static void
88htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
89 if (cur == NULL) {
90 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
91 return;
92 }
93 while (cur != NULL) {
94 htmlAttrDump(buf, doc, cur);
95 cur = cur->next;
96 }
97}
98
99
100static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000101htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000102/**
103 * htmlNodeListDump:
104 * @buf: the HTML buffer output
105 * @doc: the document
106 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000107 *
108 * Dump an HTML node list, recursive behaviour,children are printed too.
109 */
110static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000111htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000112 if (cur == NULL) {
113 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
114 return;
115 }
116 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000117 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000118 cur = cur->next;
119 }
120}
121
122/**
123 * htmlNodeDump:
124 * @buf: the HTML buffer output
125 * @doc: the document
126 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 *
128 * Dump an HTML node, recursive behaviour,children are printed too.
129 */
130static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000131htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000132 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000133
134 if (cur == NULL) {
135 fprintf(stderr, "htmlNodeDump : node == NULL\n");
136 return;
137 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000138 /*
139 * Special cases.
140 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000141 if (cur->type == HTML_TEXT_NODE) {
142 if (cur->content != NULL) {
143 CHAR *buffer;
144
Daniel Veillard82150d81999-07-07 07:32:15 +0000145 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillard167b5091999-07-07 04:19:20 +0000146 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
147 if (buffer != NULL) {
148 xmlBufferWriteCHAR(buf, buffer);
149 free(buffer);
150 }
151 }
152 return;
153 }
154 if (cur->type == HTML_COMMENT_NODE) {
155 if (cur->content != NULL) {
156 xmlBufferWriteChar(buf, "<!--");
157 xmlBufferWriteCHAR(buf, cur->content);
158 xmlBufferWriteChar(buf, "-->");
159 }
160 return;
161 }
162 if (cur->type == HTML_ENTITY_REF_NODE) {
163 xmlBufferWriteChar(buf, "&");
164 xmlBufferWriteCHAR(buf, cur->name);
165 xmlBufferWriteChar(buf, ";");
166 return;
167 }
168
Daniel Veillard82150d81999-07-07 07:32:15 +0000169 /*
170 * Get specific HTmL info for taht node.
171 */
172 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000173
Daniel Veillard82150d81999-07-07 07:32:15 +0000174 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000175 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000176 if (cur->properties != NULL)
177 htmlAttrListDump(buf, doc, cur->properties);
178
Daniel Veillard82150d81999-07-07 07:32:15 +0000179 if (info->empty) {
180 xmlBufferWriteChar(buf, ">");
181 if (cur->next != NULL) {
182 if ((cur->next->type != HTML_TEXT_NODE) &&
183 (cur->next->type != HTML_ENTITY_REF_NODE))
184 xmlBufferWriteChar(buf, "\n");
185 }
186 return;
187 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000188 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000189 if (info->endTag != 0)
190 xmlBufferWriteChar(buf, ">");
191 else {
192 xmlBufferWriteChar(buf, "></");
193 xmlBufferWriteCHAR(buf, cur->name);
194 xmlBufferWriteChar(buf, ">");
195 }
196 if (cur->next != NULL) {
197 if ((cur->next->type != HTML_TEXT_NODE) &&
198 (cur->next->type != HTML_ENTITY_REF_NODE))
199 xmlBufferWriteChar(buf, "\n");
200 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000201 return;
202 }
203 xmlBufferWriteChar(buf, ">");
204 if (cur->content != NULL) {
205 CHAR *buffer;
206
207 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
208 if (buffer != NULL) {
209 xmlBufferWriteCHAR(buf, buffer);
210 free(buffer);
211 }
212 }
213 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000214 if ((cur->childs->type != HTML_TEXT_NODE) &&
215 (cur->childs->type != HTML_ENTITY_REF_NODE))
216 xmlBufferWriteChar(buf, "\n");
217 htmlNodeListDump(buf, doc, cur->childs);
218 if ((cur->last->type != HTML_TEXT_NODE) &&
219 (cur->last->type != HTML_ENTITY_REF_NODE))
220 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000221 }
222 xmlBufferWriteChar(buf, "</");
Daniel Veillard167b5091999-07-07 04:19:20 +0000223 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard82150d81999-07-07 07:32:15 +0000224 xmlBufferWriteChar(buf, ">");
225 if (cur->next != NULL) {
226 if ((cur->next->type != HTML_TEXT_NODE) &&
227 (cur->next->type != HTML_ENTITY_REF_NODE))
228 xmlBufferWriteChar(buf, "\n");
229 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000230}
231
232/**
233 * htmlDocContentDump:
234 * @buf: the HTML buffer output
235 * @cur: the document
236 *
237 * Dump an HTML document.
238 */
239static void
240htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000241 if (cur->intSubset != NULL)
242 htmlDtdDump(buf, cur);
243 if (cur->root != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000244 htmlNodeDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000245 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000246 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000247}
248
249/**
250 * htmlDocDumpMemory:
251 * @cur: the document
252 * @mem: OUT: the memory pointer
253 * @size: OUT: the memory lenght
254 *
255 * Dump an HTML document in memory and return the CHAR * and it's size.
256 * It's up to the caller to free the memory.
257 */
258void
259htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) {
260 xmlBufferPtr buf;
261
262 if (cur == NULL) {
263#ifdef DEBUG_TREE
264 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
265#endif
266 *mem = NULL;
267 *size = 0;
268 return;
269 }
270 buf = xmlBufferCreate();
271 if (buf == NULL) {
272 *mem = NULL;
273 *size = 0;
274 return;
275 }
276 htmlDocContentDump(buf, cur);
277 *mem = buf->content;
278 *size = buf->use;
279 memset(buf, -1, sizeof(xmlBuffer));
280 free(buf);
281}
282
283
284/**
285 * htmlDocDump:
286 * @f: the FILE*
287 * @cur: the document
288 *
289 * Dump an HTML document to an open FILE.
290 */
291void
292htmlDocDump(FILE *f, xmlDocPtr cur) {
293 xmlBufferPtr buf;
294
295 if (cur == NULL) {
296#ifdef DEBUG_TREE
297 fprintf(stderr, "xmlDocDump : document == NULL\n");
298#endif
299 return;
300 }
301 buf = xmlBufferCreate();
302 if (buf == NULL) return;
303 htmlDocContentDump(buf, cur);
304 xmlBufferDump(f, buf);
305 xmlBufferFree(buf);
306}
307
308/**
309 * htmlSaveFile:
310 * @filename: the filename
311 * @cur: the document
312 *
313 * Dump an HTML document to a file.
314 *
315 * returns: the number of byte written or -1 in case of failure.
316 */
317int
318htmlSaveFile(const char *filename, xmlDocPtr cur) {
319 xmlBufferPtr buf;
320 FILE *output = NULL;
321 int ret;
322
323 /*
324 * save the content to a temp buffer.
325 */
326 buf = xmlBufferCreate();
327 if (buf == NULL) return(0);
328 htmlDocContentDump(buf, cur);
329
330 output = fopen(filename, "w");
331 if (output == NULL) return(-1);
332 ret = xmlBufferDump(output, buf);
333 fclose(output);
334
335 xmlBufferFree(buf);
336 return(ret * sizeof(CHAR));
337}
338