blob: 0d4b45f1794c151d60d103227394062cdf08b549 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#include "config.h"
10#include <stdio.h>
11#include <ctype.h>
12#include <stdlib.h>
13#include <string.h> /* for memset() only ! */
14
Daniel Veillard82150d81999-07-07 07:32:15 +000015#include "HTMLparser.h"
16#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000017#include "entities.h"
18#include "valid.h"
19
Daniel Veillard167b5091999-07-07 04:19:20 +000020/**
21 * htmlDtdDump:
22 * @buf: the HTML buffer output
23 * @doc: the document
24 *
25 * Dump the HTML document DTD, if any.
26 */
27static void
28htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
29 xmlDtdPtr cur = doc->intSubset;
30
31 if (cur == NULL) {
32 fprintf(stderr, "htmlDtdDump : no internal subset\n");
33 return;
34 }
35 xmlBufferWriteChar(buf, "<!DOCTYPE ");
36 xmlBufferWriteCHAR(buf, cur->name);
37 if (cur->ExternalID != NULL) {
38 xmlBufferWriteChar(buf, " PUBLIC ");
39 xmlBufferWriteQuotedString(buf, cur->ExternalID);
40 xmlBufferWriteChar(buf, " ");
41 xmlBufferWriteQuotedString(buf, cur->SystemID);
42 } else if (cur->SystemID != NULL) {
43 xmlBufferWriteChar(buf, " SYSTEM ");
44 xmlBufferWriteQuotedString(buf, cur->SystemID);
45 }
Daniel Veillard167b5091999-07-07 04:19:20 +000046 xmlBufferWriteChar(buf, ">\n");
47}
48
49/**
50 * htmlAttrDump:
51 * @buf: the HTML buffer output
52 * @doc: the document
53 * @cur: the attribute pointer
54 *
55 * Dump an HTML attribute
56 */
57static void
58htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
59 CHAR *value;
60
61 if (cur == NULL) {
62 fprintf(stderr, "htmlAttrDump : property == NULL\n");
63 return;
64 }
65 xmlBufferWriteChar(buf, " ");
66 xmlBufferWriteCHAR(buf, cur->name);
67 value = xmlNodeListGetString(doc, cur->val, 0);
68 if (value) {
69 xmlBufferWriteChar(buf, "=");
70 xmlBufferWriteQuotedString(buf, value);
71 free(value);
72 } else {
73 xmlBufferWriteChar(buf, "=\"\"");
74 }
75}
76
77/**
78 * htmlAttrListDump:
79 * @buf: the HTML buffer output
80 * @doc: the document
81 * @cur: the first attribute pointer
82 *
83 * Dump a list of HTML attributes
84 */
85static void
86htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
87 if (cur == NULL) {
88 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
89 return;
90 }
91 while (cur != NULL) {
92 htmlAttrDump(buf, doc, cur);
93 cur = cur->next;
94 }
95}
96
97
98static void
Daniel Veillard82150d81999-07-07 07:32:15 +000099htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000100/**
101 * htmlNodeListDump:
102 * @buf: the HTML buffer output
103 * @doc: the document
104 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000105 *
106 * Dump an HTML node list, recursive behaviour,children are printed too.
107 */
108static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000109htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000110 if (cur == NULL) {
111 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
112 return;
113 }
114 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000115 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000116 cur = cur->next;
117 }
118}
119
120/**
121 * htmlNodeDump:
122 * @buf: the HTML buffer output
123 * @doc: the document
124 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000125 *
126 * Dump an HTML node, recursive behaviour,children are printed too.
127 */
128static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000129htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000130 int i;
Daniel Veillard82150d81999-07-07 07:32:15 +0000131 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000132
133 if (cur == NULL) {
134 fprintf(stderr, "htmlNodeDump : node == NULL\n");
135 return;
136 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000137 /*
138 * Special cases.
139 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000140 if (cur->type == HTML_TEXT_NODE) {
141 if (cur->content != NULL) {
142 CHAR *buffer;
143
Daniel Veillard82150d81999-07-07 07:32:15 +0000144 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillard167b5091999-07-07 04:19:20 +0000145 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
146 if (buffer != NULL) {
147 xmlBufferWriteCHAR(buf, buffer);
148 free(buffer);
149 }
150 }
151 return;
152 }
153 if (cur->type == HTML_COMMENT_NODE) {
154 if (cur->content != NULL) {
155 xmlBufferWriteChar(buf, "<!--");
156 xmlBufferWriteCHAR(buf, cur->content);
157 xmlBufferWriteChar(buf, "-->");
158 }
159 return;
160 }
161 if (cur->type == HTML_ENTITY_REF_NODE) {
162 xmlBufferWriteChar(buf, "&");
163 xmlBufferWriteCHAR(buf, cur->name);
164 xmlBufferWriteChar(buf, ";");
165 return;
166 }
167
Daniel Veillard82150d81999-07-07 07:32:15 +0000168 /*
169 * Get specific HTmL info for taht node.
170 */
171 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000172
Daniel Veillard82150d81999-07-07 07:32:15 +0000173 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000174 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000175 if (cur->properties != NULL)
176 htmlAttrListDump(buf, doc, cur->properties);
177
Daniel Veillard82150d81999-07-07 07:32:15 +0000178 if (info->empty) {
179 xmlBufferWriteChar(buf, ">");
180 if (cur->next != NULL) {
181 if ((cur->next->type != HTML_TEXT_NODE) &&
182 (cur->next->type != HTML_ENTITY_REF_NODE))
183 xmlBufferWriteChar(buf, "\n");
184 }
185 return;
186 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000187 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000188 if (info->endTag != 0)
189 xmlBufferWriteChar(buf, ">");
190 else {
191 xmlBufferWriteChar(buf, "></");
192 xmlBufferWriteCHAR(buf, cur->name);
193 xmlBufferWriteChar(buf, ">");
194 }
195 if (cur->next != NULL) {
196 if ((cur->next->type != HTML_TEXT_NODE) &&
197 (cur->next->type != HTML_ENTITY_REF_NODE))
198 xmlBufferWriteChar(buf, "\n");
199 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000200 return;
201 }
202 xmlBufferWriteChar(buf, ">");
203 if (cur->content != NULL) {
204 CHAR *buffer;
205
206 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
207 if (buffer != NULL) {
208 xmlBufferWriteCHAR(buf, buffer);
209 free(buffer);
210 }
211 }
212 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000213 if ((cur->childs->type != HTML_TEXT_NODE) &&
214 (cur->childs->type != HTML_ENTITY_REF_NODE))
215 xmlBufferWriteChar(buf, "\n");
216 htmlNodeListDump(buf, doc, cur->childs);
217 if ((cur->last->type != HTML_TEXT_NODE) &&
218 (cur->last->type != HTML_ENTITY_REF_NODE))
219 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000220 }
221 xmlBufferWriteChar(buf, "</");
Daniel Veillard167b5091999-07-07 04:19:20 +0000222 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard82150d81999-07-07 07:32:15 +0000223 xmlBufferWriteChar(buf, ">");
224 if (cur->next != NULL) {
225 if ((cur->next->type != HTML_TEXT_NODE) &&
226 (cur->next->type != HTML_ENTITY_REF_NODE))
227 xmlBufferWriteChar(buf, "\n");
228 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000229}
230
231/**
232 * htmlDocContentDump:
233 * @buf: the HTML buffer output
234 * @cur: the document
235 *
236 * Dump an HTML document.
237 */
238static void
239htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000240 if (cur->intSubset != NULL)
241 htmlDtdDump(buf, cur);
242 if (cur->root != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000243 htmlNodeDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000244 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000245 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000246}
247
248/**
249 * htmlDocDumpMemory:
250 * @cur: the document
251 * @mem: OUT: the memory pointer
252 * @size: OUT: the memory lenght
253 *
254 * Dump an HTML document in memory and return the CHAR * and it's size.
255 * It's up to the caller to free the memory.
256 */
257void
258htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) {
259 xmlBufferPtr buf;
260
261 if (cur == NULL) {
262#ifdef DEBUG_TREE
263 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
264#endif
265 *mem = NULL;
266 *size = 0;
267 return;
268 }
269 buf = xmlBufferCreate();
270 if (buf == NULL) {
271 *mem = NULL;
272 *size = 0;
273 return;
274 }
275 htmlDocContentDump(buf, cur);
276 *mem = buf->content;
277 *size = buf->use;
278 memset(buf, -1, sizeof(xmlBuffer));
279 free(buf);
280}
281
282
283/**
284 * htmlDocDump:
285 * @f: the FILE*
286 * @cur: the document
287 *
288 * Dump an HTML document to an open FILE.
289 */
290void
291htmlDocDump(FILE *f, xmlDocPtr cur) {
292 xmlBufferPtr buf;
293
294 if (cur == NULL) {
295#ifdef DEBUG_TREE
296 fprintf(stderr, "xmlDocDump : document == NULL\n");
297#endif
298 return;
299 }
300 buf = xmlBufferCreate();
301 if (buf == NULL) return;
302 htmlDocContentDump(buf, cur);
303 xmlBufferDump(f, buf);
304 xmlBufferFree(buf);
305}
306
307/**
308 * htmlSaveFile:
309 * @filename: the filename
310 * @cur: the document
311 *
312 * Dump an HTML document to a file.
313 *
314 * returns: the number of byte written or -1 in case of failure.
315 */
316int
317htmlSaveFile(const char *filename, xmlDocPtr cur) {
318 xmlBufferPtr buf;
319 FILE *output = NULL;
320 int ret;
321
322 /*
323 * save the content to a temp buffer.
324 */
325 buf = xmlBufferCreate();
326 if (buf == NULL) return(0);
327 htmlDocContentDump(buf, cur);
328
329 output = fopen(filename, "w");
330 if (output == NULL) return(-1);
331 ret = xmlBufferDump(output, buf);
332 fclose(output);
333
334 xmlBufferFree(buf);
335 return(ret * sizeof(CHAR));
336}
337