blob: 87363ec4e64553e0c1f3a30eb226f0ffcd3613a0 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#include "config.h"
10#include <stdio.h>
11#include <ctype.h>
12#include <stdlib.h>
13#include <string.h> /* for memset() only ! */
14
Daniel Veillard6454aec1999-09-02 22:04:43 +000015#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000016#include "HTMLparser.h"
17#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000018#include "entities.h"
19#include "valid.h"
20
Daniel Veillard167b5091999-07-07 04:19:20 +000021/**
22 * htmlDtdDump:
23 * @buf: the HTML buffer output
24 * @doc: the document
25 *
26 * Dump the HTML document DTD, if any.
27 */
28static void
29htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
30 xmlDtdPtr cur = doc->intSubset;
31
32 if (cur == NULL) {
33 fprintf(stderr, "htmlDtdDump : no internal subset\n");
34 return;
35 }
36 xmlBufferWriteChar(buf, "<!DOCTYPE ");
37 xmlBufferWriteCHAR(buf, cur->name);
38 if (cur->ExternalID != NULL) {
39 xmlBufferWriteChar(buf, " PUBLIC ");
40 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000041 if (cur->SystemID != NULL) {
42 xmlBufferWriteChar(buf, " ");
43 xmlBufferWriteQuotedString(buf, cur->SystemID);
44 }
Daniel Veillard167b5091999-07-07 04:19:20 +000045 } else if (cur->SystemID != NULL) {
46 xmlBufferWriteChar(buf, " SYSTEM ");
47 xmlBufferWriteQuotedString(buf, cur->SystemID);
48 }
Daniel Veillard167b5091999-07-07 04:19:20 +000049 xmlBufferWriteChar(buf, ">\n");
50}
51
52/**
53 * htmlAttrDump:
54 * @buf: the HTML buffer output
55 * @doc: the document
56 * @cur: the attribute pointer
57 *
58 * Dump an HTML attribute
59 */
60static void
61htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
62 CHAR *value;
63
64 if (cur == NULL) {
65 fprintf(stderr, "htmlAttrDump : property == NULL\n");
66 return;
67 }
68 xmlBufferWriteChar(buf, " ");
69 xmlBufferWriteCHAR(buf, cur->name);
70 value = xmlNodeListGetString(doc, cur->val, 0);
71 if (value) {
72 xmlBufferWriteChar(buf, "=");
73 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000074 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000075 } else {
76 xmlBufferWriteChar(buf, "=\"\"");
77 }
78}
79
80/**
81 * htmlAttrListDump:
82 * @buf: the HTML buffer output
83 * @doc: the document
84 * @cur: the first attribute pointer
85 *
86 * Dump a list of HTML attributes
87 */
88static void
89htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
90 if (cur == NULL) {
91 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
92 return;
93 }
94 while (cur != NULL) {
95 htmlAttrDump(buf, doc, cur);
96 cur = cur->next;
97 }
98}
99
100
101static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000102htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000103/**
104 * htmlNodeListDump:
105 * @buf: the HTML buffer output
106 * @doc: the document
107 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000108 *
109 * Dump an HTML node list, recursive behaviour,children are printed too.
110 */
111static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000112htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000113 if (cur == NULL) {
114 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
115 return;
116 }
117 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000118 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000119 cur = cur->next;
120 }
121}
122
123/**
124 * htmlNodeDump:
125 * @buf: the HTML buffer output
126 * @doc: the document
127 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000128 *
129 * Dump an HTML node, recursive behaviour,children are printed too.
130 */
131static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000132htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000133 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000134
135 if (cur == NULL) {
136 fprintf(stderr, "htmlNodeDump : node == NULL\n");
137 return;
138 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000139 /*
140 * Special cases.
141 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000142 if (cur->type == HTML_TEXT_NODE) {
143 if (cur->content != NULL) {
144 CHAR *buffer;
145
Daniel Veillard82150d81999-07-07 07:32:15 +0000146 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillard167b5091999-07-07 04:19:20 +0000147 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
148 if (buffer != NULL) {
149 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000150 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000151 }
152 }
153 return;
154 }
155 if (cur->type == HTML_COMMENT_NODE) {
156 if (cur->content != NULL) {
157 xmlBufferWriteChar(buf, "<!--");
158 xmlBufferWriteCHAR(buf, cur->content);
159 xmlBufferWriteChar(buf, "-->");
160 }
161 return;
162 }
163 if (cur->type == HTML_ENTITY_REF_NODE) {
164 xmlBufferWriteChar(buf, "&");
165 xmlBufferWriteCHAR(buf, cur->name);
166 xmlBufferWriteChar(buf, ";");
167 return;
168 }
169
Daniel Veillard82150d81999-07-07 07:32:15 +0000170 /*
171 * Get specific HTmL info for taht node.
172 */
173 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000174
Daniel Veillard82150d81999-07-07 07:32:15 +0000175 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000176 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000177 if (cur->properties != NULL)
178 htmlAttrListDump(buf, doc, cur->properties);
179
Daniel Veillard82150d81999-07-07 07:32:15 +0000180 if (info->empty) {
181 xmlBufferWriteChar(buf, ">");
182 if (cur->next != NULL) {
183 if ((cur->next->type != HTML_TEXT_NODE) &&
184 (cur->next->type != HTML_ENTITY_REF_NODE))
185 xmlBufferWriteChar(buf, "\n");
186 }
187 return;
188 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000189 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000190 if (info->endTag != 0)
191 xmlBufferWriteChar(buf, ">");
192 else {
193 xmlBufferWriteChar(buf, "></");
194 xmlBufferWriteCHAR(buf, cur->name);
195 xmlBufferWriteChar(buf, ">");
196 }
197 if (cur->next != NULL) {
198 if ((cur->next->type != HTML_TEXT_NODE) &&
199 (cur->next->type != HTML_ENTITY_REF_NODE))
200 xmlBufferWriteChar(buf, "\n");
201 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000202 return;
203 }
204 xmlBufferWriteChar(buf, ">");
205 if (cur->content != NULL) {
206 CHAR *buffer;
207
208 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
209 if (buffer != NULL) {
210 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000211 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000212 }
213 }
214 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000215 if ((cur->childs->type != HTML_TEXT_NODE) &&
216 (cur->childs->type != HTML_ENTITY_REF_NODE))
217 xmlBufferWriteChar(buf, "\n");
218 htmlNodeListDump(buf, doc, cur->childs);
219 if ((cur->last->type != HTML_TEXT_NODE) &&
220 (cur->last->type != HTML_ENTITY_REF_NODE))
221 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000222 }
223 xmlBufferWriteChar(buf, "</");
Daniel Veillard167b5091999-07-07 04:19:20 +0000224 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard82150d81999-07-07 07:32:15 +0000225 xmlBufferWriteChar(buf, ">");
226 if (cur->next != NULL) {
227 if ((cur->next->type != HTML_TEXT_NODE) &&
228 (cur->next->type != HTML_ENTITY_REF_NODE))
229 xmlBufferWriteChar(buf, "\n");
230 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000231}
232
233/**
234 * htmlDocContentDump:
235 * @buf: the HTML buffer output
236 * @cur: the document
237 *
238 * Dump an HTML document.
239 */
240static void
241htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000242 if (cur->intSubset != NULL)
243 htmlDtdDump(buf, cur);
244 if (cur->root != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000245 htmlNodeDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000246 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000247 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000248}
249
250/**
251 * htmlDocDumpMemory:
252 * @cur: the document
253 * @mem: OUT: the memory pointer
254 * @size: OUT: the memory lenght
255 *
256 * Dump an HTML document in memory and return the CHAR * and it's size.
257 * It's up to the caller to free the memory.
258 */
259void
260htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) {
261 xmlBufferPtr buf;
262
263 if (cur == NULL) {
264#ifdef DEBUG_TREE
265 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
266#endif
267 *mem = NULL;
268 *size = 0;
269 return;
270 }
271 buf = xmlBufferCreate();
272 if (buf == NULL) {
273 *mem = NULL;
274 *size = 0;
275 return;
276 }
277 htmlDocContentDump(buf, cur);
278 *mem = buf->content;
279 *size = buf->use;
280 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000281 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000282}
283
284
285/**
286 * htmlDocDump:
287 * @f: the FILE*
288 * @cur: the document
289 *
290 * Dump an HTML document to an open FILE.
291 */
292void
293htmlDocDump(FILE *f, xmlDocPtr cur) {
294 xmlBufferPtr buf;
295
296 if (cur == NULL) {
297#ifdef DEBUG_TREE
298 fprintf(stderr, "xmlDocDump : document == NULL\n");
299#endif
300 return;
301 }
302 buf = xmlBufferCreate();
303 if (buf == NULL) return;
304 htmlDocContentDump(buf, cur);
305 xmlBufferDump(f, buf);
306 xmlBufferFree(buf);
307}
308
309/**
310 * htmlSaveFile:
311 * @filename: the filename
312 * @cur: the document
313 *
314 * Dump an HTML document to a file.
315 *
316 * returns: the number of byte written or -1 in case of failure.
317 */
318int
319htmlSaveFile(const char *filename, xmlDocPtr cur) {
320 xmlBufferPtr buf;
321 FILE *output = NULL;
322 int ret;
323
324 /*
325 * save the content to a temp buffer.
326 */
327 buf = xmlBufferCreate();
328 if (buf == NULL) return(0);
329 htmlDocContentDump(buf, cur);
330
331 output = fopen(filename, "w");
332 if (output == NULL) return(-1);
333 ret = xmlBufferDump(output, buf);
334 fclose(output);
335
336 xmlBufferFree(buf);
337 return(ret * sizeof(CHAR));
338}
339