blob: c84daea589dd4983c85960eaa3f4aa41f5bbe18c [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000015#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000016#include <string.h> /* for memset() only ! */
17
Daniel Veillard7f7d1111999-09-22 09:46:25 +000018#ifdef HAVE_CTYPE_H
19#include <ctype.h>
20#endif
21#ifdef HAVE_STDLIB_H
22#include <stdlib.h>
23#endif
24
Daniel Veillard6454aec1999-09-02 22:04:43 +000025#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000026#include "HTMLparser.h"
27#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000028#include "entities.h"
29#include "valid.h"
30
Daniel Veillard167b5091999-07-07 04:19:20 +000031/**
32 * htmlDtdDump:
33 * @buf: the HTML buffer output
34 * @doc: the document
35 *
36 * Dump the HTML document DTD, if any.
37 */
38static void
39htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
40 xmlDtdPtr cur = doc->intSubset;
41
42 if (cur == NULL) {
43 fprintf(stderr, "htmlDtdDump : no internal subset\n");
44 return;
45 }
46 xmlBufferWriteChar(buf, "<!DOCTYPE ");
47 xmlBufferWriteCHAR(buf, cur->name);
48 if (cur->ExternalID != NULL) {
49 xmlBufferWriteChar(buf, " PUBLIC ");
50 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000051 if (cur->SystemID != NULL) {
52 xmlBufferWriteChar(buf, " ");
53 xmlBufferWriteQuotedString(buf, cur->SystemID);
54 }
Daniel Veillard167b5091999-07-07 04:19:20 +000055 } else if (cur->SystemID != NULL) {
56 xmlBufferWriteChar(buf, " SYSTEM ");
57 xmlBufferWriteQuotedString(buf, cur->SystemID);
58 }
Daniel Veillard167b5091999-07-07 04:19:20 +000059 xmlBufferWriteChar(buf, ">\n");
60}
61
62/**
63 * htmlAttrDump:
64 * @buf: the HTML buffer output
65 * @doc: the document
66 * @cur: the attribute pointer
67 *
68 * Dump an HTML attribute
69 */
70static void
71htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000072 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000073
74 if (cur == NULL) {
75 fprintf(stderr, "htmlAttrDump : property == NULL\n");
76 return;
77 }
78 xmlBufferWriteChar(buf, " ");
79 xmlBufferWriteCHAR(buf, cur->name);
80 value = xmlNodeListGetString(doc, cur->val, 0);
81 if (value) {
82 xmlBufferWriteChar(buf, "=");
83 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000084 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000085 } else {
86 xmlBufferWriteChar(buf, "=\"\"");
87 }
88}
89
90/**
91 * htmlAttrListDump:
92 * @buf: the HTML buffer output
93 * @doc: the document
94 * @cur: the first attribute pointer
95 *
96 * Dump a list of HTML attributes
97 */
98static void
99htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
100 if (cur == NULL) {
101 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
102 return;
103 }
104 while (cur != NULL) {
105 htmlAttrDump(buf, doc, cur);
106 cur = cur->next;
107 }
108}
109
110
111static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000112htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000113/**
114 * htmlNodeListDump:
115 * @buf: the HTML buffer output
116 * @doc: the document
117 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000118 *
119 * Dump an HTML node list, recursive behaviour,children are printed too.
120 */
121static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000122htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000123 if (cur == NULL) {
124 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
125 return;
126 }
127 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000128 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000129 cur = cur->next;
130 }
131}
132
133/**
134 * htmlNodeDump:
135 * @buf: the HTML buffer output
136 * @doc: the document
137 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000138 *
139 * Dump an HTML node, recursive behaviour,children are printed too.
140 */
141static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000142htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000143 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000144
145 if (cur == NULL) {
146 fprintf(stderr, "htmlNodeDump : node == NULL\n");
147 return;
148 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000149 /*
150 * Special cases.
151 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000152 if (cur->type == HTML_TEXT_NODE) {
153 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000154 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000155
Daniel Veillard82150d81999-07-07 07:32:15 +0000156 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000157#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000158 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000159#else
160 buffer = xmlEncodeEntitiesReentrant(doc,
161 xmlBufferContent(cur->content));
162#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000163 if (buffer != NULL) {
164 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000165 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000166 }
167 }
168 return;
169 }
170 if (cur->type == HTML_COMMENT_NODE) {
171 if (cur->content != NULL) {
172 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000173#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000174 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000175#else
176 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
177#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000178 xmlBufferWriteChar(buf, "-->");
179 }
180 return;
181 }
182 if (cur->type == HTML_ENTITY_REF_NODE) {
183 xmlBufferWriteChar(buf, "&");
184 xmlBufferWriteCHAR(buf, cur->name);
185 xmlBufferWriteChar(buf, ";");
186 return;
187 }
188
Daniel Veillard82150d81999-07-07 07:32:15 +0000189 /*
190 * Get specific HTmL info for taht node.
191 */
192 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000193
Daniel Veillard82150d81999-07-07 07:32:15 +0000194 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000195 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000196 if (cur->properties != NULL)
197 htmlAttrListDump(buf, doc, cur->properties);
198
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000199 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000200 xmlBufferWriteChar(buf, ">");
201 if (cur->next != NULL) {
202 if ((cur->next->type != HTML_TEXT_NODE) &&
203 (cur->next->type != HTML_ENTITY_REF_NODE))
204 xmlBufferWriteChar(buf, "\n");
205 }
206 return;
207 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000208 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000209 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000210 xmlBufferWriteChar(buf, ">");
211 else {
212 xmlBufferWriteChar(buf, "></");
213 xmlBufferWriteCHAR(buf, cur->name);
214 xmlBufferWriteChar(buf, ">");
215 }
216 if (cur->next != NULL) {
217 if ((cur->next->type != HTML_TEXT_NODE) &&
218 (cur->next->type != HTML_ENTITY_REF_NODE))
219 xmlBufferWriteChar(buf, "\n");
220 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000221 return;
222 }
223 xmlBufferWriteChar(buf, ">");
224 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000225 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000226
Daniel Veillardd293fd11999-12-01 09:51:45 +0000227#ifndef XML_USE_BUFFER_CONTENT
228 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
229#else
230 buffer = xmlEncodeEntitiesReentrant(doc,
231 xmlBufferContent(cur->content));
232#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000233 if (buffer != NULL) {
234 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000235 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000236 }
237 }
238 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000239 if ((cur->childs->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000240 (cur->childs->type != HTML_ENTITY_REF_NODE) &&
241 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000242 xmlBufferWriteChar(buf, "\n");
243 htmlNodeListDump(buf, doc, cur->childs);
244 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000245 (cur->last->type != HTML_ENTITY_REF_NODE) &&
246 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000247 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000248 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000249 if (!htmlIsAutoClosed(doc, cur)) {
250 xmlBufferWriteChar(buf, "</");
251 xmlBufferWriteCHAR(buf, cur->name);
252 xmlBufferWriteChar(buf, ">");
253 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000254 if (cur->next != NULL) {
255 if ((cur->next->type != HTML_TEXT_NODE) &&
256 (cur->next->type != HTML_ENTITY_REF_NODE))
257 xmlBufferWriteChar(buf, "\n");
258 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000259}
260
261/**
262 * htmlDocContentDump:
263 * @buf: the HTML buffer output
264 * @cur: the document
265 *
266 * Dump an HTML document.
267 */
268static void
269htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000270 int type;
271
272 /*
273 * force to output the stuff as HTML, especially for entities
274 */
275 type = cur->type;
276 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000277 if (cur->intSubset != NULL)
278 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000279 else {
280 /* Default to HTML-4.0 transitionnal @@@@ */
281 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
282
283 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000284 if (cur->root != NULL) {
Daniel Veillard35008381999-10-25 13:15:52 +0000285 htmlNodeListDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000286 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000287 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000288 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000289}
290
291/**
292 * htmlDocDumpMemory:
293 * @cur: the document
294 * @mem: OUT: the memory pointer
295 * @size: OUT: the memory lenght
296 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000297 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000298 * It's up to the caller to free the memory.
299 */
300void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000301htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000302 xmlBufferPtr buf;
303
304 if (cur == NULL) {
305#ifdef DEBUG_TREE
306 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
307#endif
308 *mem = NULL;
309 *size = 0;
310 return;
311 }
312 buf = xmlBufferCreate();
313 if (buf == NULL) {
314 *mem = NULL;
315 *size = 0;
316 return;
317 }
318 htmlDocContentDump(buf, cur);
319 *mem = buf->content;
320 *size = buf->use;
321 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000322 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000323}
324
325
326/**
327 * htmlDocDump:
328 * @f: the FILE*
329 * @cur: the document
330 *
331 * Dump an HTML document to an open FILE.
332 */
333void
334htmlDocDump(FILE *f, xmlDocPtr cur) {
335 xmlBufferPtr buf;
336
337 if (cur == NULL) {
338#ifdef DEBUG_TREE
339 fprintf(stderr, "xmlDocDump : document == NULL\n");
340#endif
341 return;
342 }
343 buf = xmlBufferCreate();
344 if (buf == NULL) return;
345 htmlDocContentDump(buf, cur);
346 xmlBufferDump(f, buf);
347 xmlBufferFree(buf);
348}
349
350/**
351 * htmlSaveFile:
352 * @filename: the filename
353 * @cur: the document
354 *
355 * Dump an HTML document to a file.
356 *
357 * returns: the number of byte written or -1 in case of failure.
358 */
359int
360htmlSaveFile(const char *filename, xmlDocPtr cur) {
361 xmlBufferPtr buf;
362 FILE *output = NULL;
363 int ret;
364
365 /*
366 * save the content to a temp buffer.
367 */
368 buf = xmlBufferCreate();
369 if (buf == NULL) return(0);
370 htmlDocContentDump(buf, cur);
371
372 output = fopen(filename, "w");
373 if (output == NULL) return(-1);
374 ret = xmlBufferDump(output, buf);
375 fclose(output);
376
377 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000378 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000379}
380