blob: 4b24f7d9d85958c78478d1fb658e189ca4eb9359 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
10#ifndef WIN32
Daniel Veillard167b5091999-07-07 04:19:20 +000011#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000014#include <string.h> /* for memset() only ! */
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
Daniel Veillard6454aec1999-09-02 22:04:43 +000023#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000024#include "HTMLparser.h"
25#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000026#include "entities.h"
27#include "valid.h"
28
Daniel Veillard167b5091999-07-07 04:19:20 +000029/**
30 * htmlDtdDump:
31 * @buf: the HTML buffer output
32 * @doc: the document
33 *
34 * Dump the HTML document DTD, if any.
35 */
36static void
37htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38 xmlDtdPtr cur = doc->intSubset;
39
40 if (cur == NULL) {
41 fprintf(stderr, "htmlDtdDump : no internal subset\n");
42 return;
43 }
44 xmlBufferWriteChar(buf, "<!DOCTYPE ");
45 xmlBufferWriteCHAR(buf, cur->name);
46 if (cur->ExternalID != NULL) {
47 xmlBufferWriteChar(buf, " PUBLIC ");
48 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000049 if (cur->SystemID != NULL) {
50 xmlBufferWriteChar(buf, " ");
51 xmlBufferWriteQuotedString(buf, cur->SystemID);
52 }
Daniel Veillard167b5091999-07-07 04:19:20 +000053 } else if (cur->SystemID != NULL) {
54 xmlBufferWriteChar(buf, " SYSTEM ");
55 xmlBufferWriteQuotedString(buf, cur->SystemID);
56 }
Daniel Veillard167b5091999-07-07 04:19:20 +000057 xmlBufferWriteChar(buf, ">\n");
58}
59
60/**
61 * htmlAttrDump:
62 * @buf: the HTML buffer output
63 * @doc: the document
64 * @cur: the attribute pointer
65 *
66 * Dump an HTML attribute
67 */
68static void
69htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000070 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000071
72 if (cur == NULL) {
73 fprintf(stderr, "htmlAttrDump : property == NULL\n");
74 return;
75 }
76 xmlBufferWriteChar(buf, " ");
77 xmlBufferWriteCHAR(buf, cur->name);
78 value = xmlNodeListGetString(doc, cur->val, 0);
79 if (value) {
80 xmlBufferWriteChar(buf, "=");
81 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000082 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000083 } else {
84 xmlBufferWriteChar(buf, "=\"\"");
85 }
86}
87
88/**
89 * htmlAttrListDump:
90 * @buf: the HTML buffer output
91 * @doc: the document
92 * @cur: the first attribute pointer
93 *
94 * Dump a list of HTML attributes
95 */
96static void
97htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98 if (cur == NULL) {
99 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100 return;
101 }
102 while (cur != NULL) {
103 htmlAttrDump(buf, doc, cur);
104 cur = cur->next;
105 }
106}
107
108
109static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000110htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000111/**
112 * htmlNodeListDump:
113 * @buf: the HTML buffer output
114 * @doc: the document
115 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000116 *
117 * Dump an HTML node list, recursive behaviour,children are printed too.
118 */
119static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000120htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000121 if (cur == NULL) {
122 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123 return;
124 }
125 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000126 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 cur = cur->next;
128 }
129}
130
131/**
132 * htmlNodeDump:
133 * @buf: the HTML buffer output
134 * @doc: the document
135 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000136 *
137 * Dump an HTML node, recursive behaviour,children are printed too.
138 */
139static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000140htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000141 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000142
143 if (cur == NULL) {
144 fprintf(stderr, "htmlNodeDump : node == NULL\n");
145 return;
146 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000147 /*
148 * Special cases.
149 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000150 if (cur->type == HTML_TEXT_NODE) {
151 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000152 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000153
Daniel Veillard82150d81999-07-07 07:32:15 +0000154 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000155#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000156 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000157#else
158 buffer = xmlEncodeEntitiesReentrant(doc,
159 xmlBufferContent(cur->content));
160#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000161 if (buffer != NULL) {
162 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000163 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000164 }
165 }
166 return;
167 }
168 if (cur->type == HTML_COMMENT_NODE) {
169 if (cur->content != NULL) {
170 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000171#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000172 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000173#else
174 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
175#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000176 xmlBufferWriteChar(buf, "-->");
177 }
178 return;
179 }
180 if (cur->type == HTML_ENTITY_REF_NODE) {
181 xmlBufferWriteChar(buf, "&");
182 xmlBufferWriteCHAR(buf, cur->name);
183 xmlBufferWriteChar(buf, ";");
184 return;
185 }
186
Daniel Veillard82150d81999-07-07 07:32:15 +0000187 /*
188 * Get specific HTmL info for taht node.
189 */
190 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000191
Daniel Veillard82150d81999-07-07 07:32:15 +0000192 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000193 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000194 if (cur->properties != NULL)
195 htmlAttrListDump(buf, doc, cur->properties);
196
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000197 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000198 xmlBufferWriteChar(buf, ">");
199 if (cur->next != NULL) {
200 if ((cur->next->type != HTML_TEXT_NODE) &&
201 (cur->next->type != HTML_ENTITY_REF_NODE))
202 xmlBufferWriteChar(buf, "\n");
203 }
204 return;
205 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000206 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000207 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000208 xmlBufferWriteChar(buf, ">");
209 else {
210 xmlBufferWriteChar(buf, "></");
211 xmlBufferWriteCHAR(buf, cur->name);
212 xmlBufferWriteChar(buf, ">");
213 }
214 if (cur->next != NULL) {
215 if ((cur->next->type != HTML_TEXT_NODE) &&
216 (cur->next->type != HTML_ENTITY_REF_NODE))
217 xmlBufferWriteChar(buf, "\n");
218 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000219 return;
220 }
221 xmlBufferWriteChar(buf, ">");
222 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000223 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000224
Daniel Veillardd293fd11999-12-01 09:51:45 +0000225#ifndef XML_USE_BUFFER_CONTENT
226 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
227#else
228 buffer = xmlEncodeEntitiesReentrant(doc,
229 xmlBufferContent(cur->content));
230#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000231 if (buffer != NULL) {
232 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000233 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000234 }
235 }
236 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000237 if ((cur->childs->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000238 (cur->childs->type != HTML_ENTITY_REF_NODE) &&
239 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000240 xmlBufferWriteChar(buf, "\n");
241 htmlNodeListDump(buf, doc, cur->childs);
242 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000243 (cur->last->type != HTML_ENTITY_REF_NODE) &&
244 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000245 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000246 }
247 xmlBufferWriteChar(buf, "</");
Daniel Veillard167b5091999-07-07 04:19:20 +0000248 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard82150d81999-07-07 07:32:15 +0000249 xmlBufferWriteChar(buf, ">");
250 if (cur->next != NULL) {
251 if ((cur->next->type != HTML_TEXT_NODE) &&
252 (cur->next->type != HTML_ENTITY_REF_NODE))
253 xmlBufferWriteChar(buf, "\n");
254 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000255}
256
257/**
258 * htmlDocContentDump:
259 * @buf: the HTML buffer output
260 * @cur: the document
261 *
262 * Dump an HTML document.
263 */
264static void
265htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000266 if (cur->intSubset != NULL)
267 htmlDtdDump(buf, cur);
268 if (cur->root != NULL) {
Daniel Veillard35008381999-10-25 13:15:52 +0000269 htmlNodeListDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000270 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000271 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000272}
273
274/**
275 * htmlDocDumpMemory:
276 * @cur: the document
277 * @mem: OUT: the memory pointer
278 * @size: OUT: the memory lenght
279 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000280 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000281 * It's up to the caller to free the memory.
282 */
283void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000284htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000285 xmlBufferPtr buf;
286
287 if (cur == NULL) {
288#ifdef DEBUG_TREE
289 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
290#endif
291 *mem = NULL;
292 *size = 0;
293 return;
294 }
295 buf = xmlBufferCreate();
296 if (buf == NULL) {
297 *mem = NULL;
298 *size = 0;
299 return;
300 }
301 htmlDocContentDump(buf, cur);
302 *mem = buf->content;
303 *size = buf->use;
304 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000305 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000306}
307
308
309/**
310 * htmlDocDump:
311 * @f: the FILE*
312 * @cur: the document
313 *
314 * Dump an HTML document to an open FILE.
315 */
316void
317htmlDocDump(FILE *f, xmlDocPtr cur) {
318 xmlBufferPtr buf;
319
320 if (cur == NULL) {
321#ifdef DEBUG_TREE
322 fprintf(stderr, "xmlDocDump : document == NULL\n");
323#endif
324 return;
325 }
326 buf = xmlBufferCreate();
327 if (buf == NULL) return;
328 htmlDocContentDump(buf, cur);
329 xmlBufferDump(f, buf);
330 xmlBufferFree(buf);
331}
332
333/**
334 * htmlSaveFile:
335 * @filename: the filename
336 * @cur: the document
337 *
338 * Dump an HTML document to a file.
339 *
340 * returns: the number of byte written or -1 in case of failure.
341 */
342int
343htmlSaveFile(const char *filename, xmlDocPtr cur) {
344 xmlBufferPtr buf;
345 FILE *output = NULL;
346 int ret;
347
348 /*
349 * save the content to a temp buffer.
350 */
351 buf = xmlBufferCreate();
352 if (buf == NULL) return(0);
353 htmlDocContentDump(buf, cur);
354
355 output = fopen(filename, "w");
356 if (output == NULL) return(-1);
357 ret = xmlBufferDump(output, buf);
358 fclose(output);
359
360 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000361 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000362}
363