blob: 1265a0ab6f19dc537493273c5c05116ff9c43dcf [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
10#ifndef WIN32
Daniel Veillard167b5091999-07-07 04:19:20 +000011#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#endif
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000014#include <string.h> /* for memset() only ! */
15
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
Daniel Veillard6454aec1999-09-02 22:04:43 +000023#include "xmlmemory.h"
Daniel Veillard82150d81999-07-07 07:32:15 +000024#include "HTMLparser.h"
25#include "HTMLtree.h"
Daniel Veillard167b5091999-07-07 04:19:20 +000026#include "entities.h"
27#include "valid.h"
28
Daniel Veillard167b5091999-07-07 04:19:20 +000029/**
30 * htmlDtdDump:
31 * @buf: the HTML buffer output
32 * @doc: the document
33 *
34 * Dump the HTML document DTD, if any.
35 */
36static void
37htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38 xmlDtdPtr cur = doc->intSubset;
39
40 if (cur == NULL) {
41 fprintf(stderr, "htmlDtdDump : no internal subset\n");
42 return;
43 }
44 xmlBufferWriteChar(buf, "<!DOCTYPE ");
45 xmlBufferWriteCHAR(buf, cur->name);
46 if (cur->ExternalID != NULL) {
47 xmlBufferWriteChar(buf, " PUBLIC ");
48 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000049 if (cur->SystemID != NULL) {
50 xmlBufferWriteChar(buf, " ");
51 xmlBufferWriteQuotedString(buf, cur->SystemID);
52 }
Daniel Veillard167b5091999-07-07 04:19:20 +000053 } else if (cur->SystemID != NULL) {
54 xmlBufferWriteChar(buf, " SYSTEM ");
55 xmlBufferWriteQuotedString(buf, cur->SystemID);
56 }
Daniel Veillard167b5091999-07-07 04:19:20 +000057 xmlBufferWriteChar(buf, ">\n");
58}
59
60/**
61 * htmlAttrDump:
62 * @buf: the HTML buffer output
63 * @doc: the document
64 * @cur: the attribute pointer
65 *
66 * Dump an HTML attribute
67 */
68static void
69htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000070 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000071
72 if (cur == NULL) {
73 fprintf(stderr, "htmlAttrDump : property == NULL\n");
74 return;
75 }
76 xmlBufferWriteChar(buf, " ");
77 xmlBufferWriteCHAR(buf, cur->name);
78 value = xmlNodeListGetString(doc, cur->val, 0);
79 if (value) {
80 xmlBufferWriteChar(buf, "=");
81 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000082 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000083 } else {
84 xmlBufferWriteChar(buf, "=\"\"");
85 }
86}
87
88/**
89 * htmlAttrListDump:
90 * @buf: the HTML buffer output
91 * @doc: the document
92 * @cur: the first attribute pointer
93 *
94 * Dump a list of HTML attributes
95 */
96static void
97htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98 if (cur == NULL) {
99 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100 return;
101 }
102 while (cur != NULL) {
103 htmlAttrDump(buf, doc, cur);
104 cur = cur->next;
105 }
106}
107
108
109static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000110htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000111/**
112 * htmlNodeListDump:
113 * @buf: the HTML buffer output
114 * @doc: the document
115 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000116 *
117 * Dump an HTML node list, recursive behaviour,children are printed too.
118 */
119static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000120htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000121 if (cur == NULL) {
122 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123 return;
124 }
125 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000126 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 cur = cur->next;
128 }
129}
130
131/**
132 * htmlNodeDump:
133 * @buf: the HTML buffer output
134 * @doc: the document
135 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000136 *
137 * Dump an HTML node, recursive behaviour,children are printed too.
138 */
139static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000140htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000141 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000142
143 if (cur == NULL) {
144 fprintf(stderr, "htmlNodeDump : node == NULL\n");
145 return;
146 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000147 /*
148 * Special cases.
149 */
Daniel Veillard167b5091999-07-07 04:19:20 +0000150 if (cur->type == HTML_TEXT_NODE) {
151 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000152 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000153
Daniel Veillard82150d81999-07-07 07:32:15 +0000154 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000155#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000156 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000157#else
158 buffer = xmlEncodeEntitiesReentrant(doc,
159 xmlBufferContent(cur->content));
160#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000161 if (buffer != NULL) {
162 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000163 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000164 }
165 }
166 return;
167 }
168 if (cur->type == HTML_COMMENT_NODE) {
169 if (cur->content != NULL) {
170 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000171#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000172 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000173#else
174 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
175#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000176 xmlBufferWriteChar(buf, "-->");
177 }
178 return;
179 }
180 if (cur->type == HTML_ENTITY_REF_NODE) {
181 xmlBufferWriteChar(buf, "&");
182 xmlBufferWriteCHAR(buf, cur->name);
183 xmlBufferWriteChar(buf, ";");
184 return;
185 }
186
Daniel Veillard82150d81999-07-07 07:32:15 +0000187 /*
188 * Get specific HTmL info for taht node.
189 */
190 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000191
Daniel Veillard82150d81999-07-07 07:32:15 +0000192 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000193 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000194 if (cur->properties != NULL)
195 htmlAttrListDump(buf, doc, cur->properties);
196
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000197 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000198 xmlBufferWriteChar(buf, ">");
199 if (cur->next != NULL) {
200 if ((cur->next->type != HTML_TEXT_NODE) &&
201 (cur->next->type != HTML_ENTITY_REF_NODE))
202 xmlBufferWriteChar(buf, "\n");
203 }
204 return;
205 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000206 if ((cur->content == NULL) && (cur->childs == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000207 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000208 xmlBufferWriteChar(buf, ">");
209 else {
210 xmlBufferWriteChar(buf, "></");
211 xmlBufferWriteCHAR(buf, cur->name);
212 xmlBufferWriteChar(buf, ">");
213 }
214 if (cur->next != NULL) {
215 if ((cur->next->type != HTML_TEXT_NODE) &&
216 (cur->next->type != HTML_ENTITY_REF_NODE))
217 xmlBufferWriteChar(buf, "\n");
218 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000219 return;
220 }
221 xmlBufferWriteChar(buf, ">");
222 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000223 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000224
Daniel Veillardd293fd11999-12-01 09:51:45 +0000225#ifndef XML_USE_BUFFER_CONTENT
226 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
227#else
228 buffer = xmlEncodeEntitiesReentrant(doc,
229 xmlBufferContent(cur->content));
230#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000231 if (buffer != NULL) {
232 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000233 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000234 }
235 }
236 if (cur->childs != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000237 if ((cur->childs->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000238 (cur->childs->type != HTML_ENTITY_REF_NODE) &&
239 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000240 xmlBufferWriteChar(buf, "\n");
241 htmlNodeListDump(buf, doc, cur->childs);
242 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000243 (cur->last->type != HTML_ENTITY_REF_NODE) &&
244 (cur->childs != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000245 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000246 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000247 if (!htmlIsAutoClosed(doc, cur)) {
248 xmlBufferWriteChar(buf, "</");
249 xmlBufferWriteCHAR(buf, cur->name);
250 xmlBufferWriteChar(buf, ">");
251 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000252 if (cur->next != NULL) {
253 if ((cur->next->type != HTML_TEXT_NODE) &&
254 (cur->next->type != HTML_ENTITY_REF_NODE))
255 xmlBufferWriteChar(buf, "\n");
256 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000257}
258
259/**
260 * htmlDocContentDump:
261 * @buf: the HTML buffer output
262 * @cur: the document
263 *
264 * Dump an HTML document.
265 */
266static void
267htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000268 int type;
269
270 /*
271 * force to output the stuff as HTML, especially for entities
272 */
273 type = cur->type;
274 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000275 if (cur->intSubset != NULL)
276 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000277 else {
278 /* Default to HTML-4.0 transitionnal @@@@ */
279 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
280
281 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000282 if (cur->root != NULL) {
Daniel Veillard35008381999-10-25 13:15:52 +0000283 htmlNodeListDump(buf, cur, cur->root);
Daniel Veillard167b5091999-07-07 04:19:20 +0000284 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000285 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000286 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000287}
288
289/**
290 * htmlDocDumpMemory:
291 * @cur: the document
292 * @mem: OUT: the memory pointer
293 * @size: OUT: the memory lenght
294 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000295 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000296 * It's up to the caller to free the memory.
297 */
298void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000299htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000300 xmlBufferPtr buf;
301
302 if (cur == NULL) {
303#ifdef DEBUG_TREE
304 fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
305#endif
306 *mem = NULL;
307 *size = 0;
308 return;
309 }
310 buf = xmlBufferCreate();
311 if (buf == NULL) {
312 *mem = NULL;
313 *size = 0;
314 return;
315 }
316 htmlDocContentDump(buf, cur);
317 *mem = buf->content;
318 *size = buf->use;
319 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000320 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000321}
322
323
324/**
325 * htmlDocDump:
326 * @f: the FILE*
327 * @cur: the document
328 *
329 * Dump an HTML document to an open FILE.
330 */
331void
332htmlDocDump(FILE *f, xmlDocPtr cur) {
333 xmlBufferPtr buf;
334
335 if (cur == NULL) {
336#ifdef DEBUG_TREE
337 fprintf(stderr, "xmlDocDump : document == NULL\n");
338#endif
339 return;
340 }
341 buf = xmlBufferCreate();
342 if (buf == NULL) return;
343 htmlDocContentDump(buf, cur);
344 xmlBufferDump(f, buf);
345 xmlBufferFree(buf);
346}
347
348/**
349 * htmlSaveFile:
350 * @filename: the filename
351 * @cur: the document
352 *
353 * Dump an HTML document to a file.
354 *
355 * returns: the number of byte written or -1 in case of failure.
356 */
357int
358htmlSaveFile(const char *filename, xmlDocPtr cur) {
359 xmlBufferPtr buf;
360 FILE *output = NULL;
361 int ret;
362
363 /*
364 * save the content to a temp buffer.
365 */
366 buf = xmlBufferCreate();
367 if (buf == NULL) return(0);
368 htmlDocContentDump(buf, cur);
369
370 output = fopen(filename, "w");
371 if (output == NULL) return(-1);
372 ret = xmlBufferDump(output, buf);
373 fclose(output);
374
375 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000376 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000377}
378