blob: a114e6519dc1928dd865a9e3b873831e7f1f6635 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
10#ifndef WIN32
Daniel Veillard167b5091999-07-07 04:19:20 +000011#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000014#include <string.h> /* for memset() only ! */
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
Daniel Veillard6454aec1999-09-02 22:04:43 +000023#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000024#include "HTMLparser.h"
25#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000026#include "entities.h"
27#include "valid.h"
28
Daniel Veillard167b5091999-07-07 04:19:20 +000029/**
30 * htmlDtdDump:
31 * @buf: the HTML buffer output
32 * @doc: the document
33 *
34 * Dump the HTML document DTD, if any.
35 */
36static void
37htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38 xmlDtdPtr cur = doc->intSubset;
39
40 if (cur == NULL) {
41 fprintf(stderr, "htmlDtdDump : no internal subset\n");
42 return;
43 }
44 xmlBufferWriteChar(buf, "<!DOCTYPE ");
45 xmlBufferWriteCHAR(buf, cur->name);
46 if (cur->ExternalID != NULL) {
47 xmlBufferWriteChar(buf, " PUBLIC ");
48 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000049 if (cur->SystemID != NULL) {
50 xmlBufferWriteChar(buf, " ");
51 xmlBufferWriteQuotedString(buf, cur->SystemID);
52 }
Daniel Veillard167b5091999-07-07 04:19:20 +000053 } else if (cur->SystemID != NULL) {
54 xmlBufferWriteChar(buf, " SYSTEM ");
55 xmlBufferWriteQuotedString(buf, cur->SystemID);
56 }
Daniel Veillard167b5091999-07-07 04:19:20 +000057 xmlBufferWriteChar(buf, ">\n");
58}
59
60/**
61 * htmlAttrDump:
62 * @buf: the HTML buffer output
63 * @doc: the document
64 * @cur: the attribute pointer
65 *
66 * Dump an HTML attribute
67 */
68static void
69htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000070 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000071
72 if (cur == NULL) {
73 fprintf(stderr, "htmlAttrDump : property == NULL\n");
74 return;
75 }
76 xmlBufferWriteChar(buf, " ");
77 xmlBufferWriteCHAR(buf, cur->name);
78 value = xmlNodeListGetString(doc, cur->val, 0);
79 if (value) {
80 xmlBufferWriteChar(buf, "=");
81 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000082 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000083 } else {
84 xmlBufferWriteChar(buf, "=\"\"");
85 }
86}
87
88/**
89 * htmlAttrListDump:
90 * @buf: the HTML buffer output
91 * @doc: the document
92 * @cur: the first attribute pointer
93 *
94 * Dump a list of HTML attributes
95 */
96static void
97htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98 if (cur == NULL) {
99 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100 return;
101 }
102 while (cur != NULL) {
103 htmlAttrDump(buf, doc, cur);
104 cur = cur->next;
105 }
106}
107
108
109static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000110htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000111/**
112 * htmlNodeListDump:
113 * @buf: the HTML buffer output
114 * @doc: the document
115 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000116 *
117 * Dump an HTML node list, recursive behaviour,children are printed too.
118 */
119static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000120htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000121 if (cur == NULL) {
122 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123 return;
124 }
125 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000126 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 cur = cur->next;
128 }
129}
130
131/**
132 * htmlNodeDump:
133 * @buf: the HTML buffer output
134 * @doc: the document
135 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000136 *
137 * Dump an HTML node, recursive behaviour,children are printed too.
138 */
139static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000140htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000141 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000142
143 if (cur == NULL) {
144 fprintf(stderr, "htmlNodeDump : node == NULL\n");
145 return;
146 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000147 /*
148 * Special cases.
149 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000150 if (cur->type == HTML_TEXT_NODE) {
151 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000152 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000153
Daniel Veillard82150d81999-07-07 07:32:15 +0000154 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000155#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000156 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000157#else
158 buffer = xmlEncodeEntitiesReentrant(doc,
159 xmlBufferContent(cur->content));
160#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000161 if (buffer != NULL) {
162 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000163 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000164 }
165 }
166 return;
167 }
168 if (cur->type == HTML_COMMENT_NODE) {
169 if (cur->content != NULL) {
170 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000171#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000172 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000173#else
174 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
175#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000176 xmlBufferWriteChar(buf, "-->");
177 }
178 return;
179 }
180 if (cur->type == HTML_ENTITY_REF_NODE) {
181 xmlBufferWriteChar(buf, "&");
182 xmlBufferWriteCHAR(buf, cur->name);
183 xmlBufferWriteChar(buf, ";");
184 return;
185 }
186
Daniel Veillard82150d81999-07-07 07:32:15 +0000187 /*
188 * Get specific HTmL info for taht node.
189 */
190 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000191
Daniel Veillard82150d81999-07-07 07:32:15 +0000192 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000193 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000194 if (cur->properties != NULL)
195 htmlAttrListDump(buf, doc, cur->properties);
196
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000197 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000198 xmlBufferWriteChar(buf, ">");
199 if (cur->next != NULL) {
200 if ((cur->next->type != HTML_TEXT_NODE) &&
201 (cur->next->type != HTML_ENTITY_REF_NODE))
202 xmlBufferWriteChar(buf, "\n");
203 }
204 return;
205 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000206 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000207 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000208 xmlBufferWriteChar(buf, ">");
209 else {
210 xmlBufferWriteChar(buf, "></");
211 xmlBufferWriteCHAR(buf, cur->name);
212 xmlBufferWriteChar(buf, ">");
213 }
214 if (cur->next != NULL) {
215 if ((cur->next->type != HTML_TEXT_NODE) &&
216 (cur->next->type != HTML_ENTITY_REF_NODE))
217 xmlBufferWriteChar(buf, "\n");
218 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000219 return;
220 }
221 xmlBufferWriteChar(buf, ">");
222 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000223 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000224
Daniel Veillardd293fd11999-12-01 09:51:45 +0000225#ifndef XML_USE_BUFFER_CONTENT
226 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
227#else
228 buffer = xmlEncodeEntitiesReentrant(doc,
229 xmlBufferContent(cur->content));
230#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000231 if (buffer != NULL) {
232 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000233 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000234 }
235 }
236 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000237 if ((cur->childs->type != HTML_TEXT_NODE) &&
238 (cur->childs->type != HTML_ENTITY_REF_NODE))
239 xmlBufferWriteChar(buf, "\n");
240 htmlNodeListDump(buf, doc, cur->childs);
241 if ((cur->last->type != HTML_TEXT_NODE) &&
242 (cur->last->type != HTML_ENTITY_REF_NODE))
243 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000244 }
245 xmlBufferWriteChar(buf, "</");
Daniel Veillard167b5091999-07-07 04:19:20 +0000246 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard82150d81999-07-07 07:32:15 +0000247 xmlBufferWriteChar(buf, ">");
248 if (cur->next != NULL) {
249 if ((cur->next->type != HTML_TEXT_NODE) &&
250 (cur->next->type != HTML_ENTITY_REF_NODE))
251 xmlBufferWriteChar(buf, "\n");
252 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000253}
254
255/**
256 * htmlDocContentDump:
257 * @buf: the HTML buffer output
258 * @cur: the document
259 *
260 * Dump an HTML document.
261 */
262static void
263htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000264 if (cur->intSubset != NULL)
265 htmlDtdDump(buf, cur);
266 if (cur->root != NULL) {
Daniel Veillard35008381999-10-25 13:15:52 +0000267 htmlNodeListDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000268 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000269 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000270}
271
272/**
273 * htmlDocDumpMemory:
274 * @cur: the document
275 * @mem: OUT: the memory pointer
276 * @size: OUT: the memory lenght
277 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000278 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000279 * It's up to the caller to free the memory.
280 */
281void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000282htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000283 xmlBufferPtr buf;
284
285 if (cur == NULL) {
286#ifdef DEBUG_TREE
287 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
288#endif
289 *mem = NULL;
290 *size = 0;
291 return;
292 }
293 buf = xmlBufferCreate();
294 if (buf == NULL) {
295 *mem = NULL;
296 *size = 0;
297 return;
298 }
299 htmlDocContentDump(buf, cur);
300 *mem = buf->content;
301 *size = buf->use;
302 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000303 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000304}
305
306
307/**
308 * htmlDocDump:
309 * @f: the FILE*
310 * @cur: the document
311 *
312 * Dump an HTML document to an open FILE.
313 */
314void
315htmlDocDump(FILE *f, xmlDocPtr cur) {
316 xmlBufferPtr buf;
317
318 if (cur == NULL) {
319#ifdef DEBUG_TREE
320 fprintf(stderr, "xmlDocDump : document == NULL\n");
321#endif
322 return;
323 }
324 buf = xmlBufferCreate();
325 if (buf == NULL) return;
326 htmlDocContentDump(buf, cur);
327 xmlBufferDump(f, buf);
328 xmlBufferFree(buf);
329}
330
331/**
332 * htmlSaveFile:
333 * @filename: the filename
334 * @cur: the document
335 *
336 * Dump an HTML document to a file.
337 *
338 * returns: the number of byte written or -1 in case of failure.
339 */
340int
341htmlSaveFile(const char *filename, xmlDocPtr cur) {
342 xmlBufferPtr buf;
343 FILE *output = NULL;
344 int ret;
345
346 /*
347 * save the content to a temp buffer.
348 */
349 buf = xmlBufferCreate();
350 if (buf == NULL) return(0);
351 htmlDocContentDump(buf, cur);
352
353 output = fopen(filename, "w");
354 if (output == NULL) return(-1);
355 ret = xmlBufferDump(output, buf);
356 fclose(output);
357
358 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000359 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000360}
361