blob: 24a90ba74e4694cfedb88c5223b31d01e803d2b4 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
16#include "xmlversion.h"
17#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillarddbfd6411999-12-28 16:35:14 +000035static void
36htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
37
Daniel Veillard167b5091999-07-07 04:19:20 +000038/**
39 * htmlDtdDump:
40 * @buf: the HTML buffer output
41 * @doc: the document
42 *
43 * Dump the HTML document DTD, if any.
44 */
45static void
46htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
47 xmlDtdPtr cur = doc->intSubset;
48
49 if (cur == NULL) {
50 fprintf(stderr, "htmlDtdDump : no internal subset\n");
51 return;
52 }
53 xmlBufferWriteChar(buf, "<!DOCTYPE ");
54 xmlBufferWriteCHAR(buf, cur->name);
55 if (cur->ExternalID != NULL) {
56 xmlBufferWriteChar(buf, " PUBLIC ");
57 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000058 if (cur->SystemID != NULL) {
59 xmlBufferWriteChar(buf, " ");
60 xmlBufferWriteQuotedString(buf, cur->SystemID);
61 }
Daniel Veillard167b5091999-07-07 04:19:20 +000062 } else if (cur->SystemID != NULL) {
63 xmlBufferWriteChar(buf, " SYSTEM ");
64 xmlBufferWriteQuotedString(buf, cur->SystemID);
65 }
Daniel Veillard167b5091999-07-07 04:19:20 +000066 xmlBufferWriteChar(buf, ">\n");
67}
68
69/**
70 * htmlAttrDump:
71 * @buf: the HTML buffer output
72 * @doc: the document
73 * @cur: the attribute pointer
74 *
75 * Dump an HTML attribute
76 */
77static void
78htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000079 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000080
81 if (cur == NULL) {
82 fprintf(stderr, "htmlAttrDump : property == NULL\n");
83 return;
84 }
85 xmlBufferWriteChar(buf, " ");
86 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +000087 if (cur->children != NULL) {
88 value = xmlNodeListGetString(doc, cur->children, 0);
89 if (value) {
90 xmlBufferWriteChar(buf, "=");
91 xmlBufferWriteQuotedString(buf, value);
92 xmlFree(value);
93 } else {
94 xmlBufferWriteChar(buf, "=\"\"");
95 }
Daniel Veillard167b5091999-07-07 04:19:20 +000096 }
97}
98
99/**
100 * htmlAttrListDump:
101 * @buf: the HTML buffer output
102 * @doc: the document
103 * @cur: the first attribute pointer
104 *
105 * Dump a list of HTML attributes
106 */
107static void
108htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
109 if (cur == NULL) {
110 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
111 return;
112 }
113 while (cur != NULL) {
114 htmlAttrDump(buf, doc, cur);
115 cur = cur->next;
116 }
117}
118
119
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000120void
Daniel Veillard82150d81999-07-07 07:32:15 +0000121htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000122/**
123 * htmlNodeListDump:
124 * @buf: the HTML buffer output
125 * @doc: the document
126 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 *
128 * Dump an HTML node list, recursive behaviour,children are printed too.
129 */
130static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000131htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000132 if (cur == NULL) {
133 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
134 return;
135 }
136 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000137 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000138 cur = cur->next;
139 }
140}
141
142/**
143 * htmlNodeDump:
144 * @buf: the HTML buffer output
145 * @doc: the document
146 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000147 *
148 * Dump an HTML node, recursive behaviour,children are printed too.
149 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000150void
Daniel Veillard82150d81999-07-07 07:32:15 +0000151htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000152 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000153
154 if (cur == NULL) {
155 fprintf(stderr, "htmlNodeDump : node == NULL\n");
156 return;
157 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000158 /*
159 * Special cases.
160 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000161 if (cur->type == XML_HTML_DOCUMENT_NODE) {
162 htmlDocContentDump(buf, (xmlDocPtr) cur);
163 return;
164 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000165 if (cur->type == HTML_TEXT_NODE) {
166 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000167 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000168
Daniel Veillard82150d81999-07-07 07:32:15 +0000169 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000170#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000171 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000172#else
173 buffer = xmlEncodeEntitiesReentrant(doc,
174 xmlBufferContent(cur->content));
175#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000176 if (buffer != NULL) {
177 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000178 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000179 }
180 }
181 return;
182 }
183 if (cur->type == HTML_COMMENT_NODE) {
184 if (cur->content != NULL) {
185 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000186#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000187 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000188#else
189 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
190#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000191 xmlBufferWriteChar(buf, "-->");
192 }
193 return;
194 }
195 if (cur->type == HTML_ENTITY_REF_NODE) {
196 xmlBufferWriteChar(buf, "&");
197 xmlBufferWriteCHAR(buf, cur->name);
198 xmlBufferWriteChar(buf, ";");
199 return;
200 }
201
Daniel Veillard82150d81999-07-07 07:32:15 +0000202 /*
203 * Get specific HTmL info for taht node.
204 */
205 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000206
Daniel Veillard82150d81999-07-07 07:32:15 +0000207 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000208 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000209 if (cur->properties != NULL)
210 htmlAttrListDump(buf, doc, cur->properties);
211
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000212 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000213 xmlBufferWriteChar(buf, ">");
214 if (cur->next != NULL) {
215 if ((cur->next->type != HTML_TEXT_NODE) &&
216 (cur->next->type != HTML_ENTITY_REF_NODE))
217 xmlBufferWriteChar(buf, "\n");
218 }
219 return;
220 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000221 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000222 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000223 xmlBufferWriteChar(buf, ">");
224 else {
225 xmlBufferWriteChar(buf, "></");
226 xmlBufferWriteCHAR(buf, cur->name);
227 xmlBufferWriteChar(buf, ">");
228 }
229 if (cur->next != NULL) {
230 if ((cur->next->type != HTML_TEXT_NODE) &&
231 (cur->next->type != HTML_ENTITY_REF_NODE))
232 xmlBufferWriteChar(buf, "\n");
233 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000234 return;
235 }
236 xmlBufferWriteChar(buf, ">");
237 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000238 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000239
Daniel Veillardd293fd11999-12-01 09:51:45 +0000240#ifndef XML_USE_BUFFER_CONTENT
241 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
242#else
243 buffer = xmlEncodeEntitiesReentrant(doc,
244 xmlBufferContent(cur->content));
245#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000246 if (buffer != NULL) {
247 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000248 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000249 }
250 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000251 if (cur->children != NULL) {
252 if ((cur->children->type != HTML_TEXT_NODE) &&
253 (cur->children->type != HTML_ENTITY_REF_NODE) &&
254 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000255 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000256 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000257 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000258 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000259 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000260 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000261 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000262 if (!htmlIsAutoClosed(doc, cur)) {
263 xmlBufferWriteChar(buf, "</");
264 xmlBufferWriteCHAR(buf, cur->name);
265 xmlBufferWriteChar(buf, ">");
266 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000267 if (cur->next != NULL) {
268 if ((cur->next->type != HTML_TEXT_NODE) &&
269 (cur->next->type != HTML_ENTITY_REF_NODE))
270 xmlBufferWriteChar(buf, "\n");
271 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000272}
273
274/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000275 * htmlNodeDumpFile:
276 * @out: the FILE pointer
277 * @doc: the document
278 * @cur: the current node
279 *
280 * Dump an HTML node, recursive behaviour,children are printed too.
281 */
282void
283htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
284 xmlBufferPtr buf;
285
286 buf = xmlBufferCreate();
287 if (buf == NULL) return;
288 htmlNodeDump(buf, doc, cur);
289 xmlBufferDump(out, buf);
290 xmlBufferFree(buf);
291}
292
293/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000294 * htmlDocContentDump:
295 * @buf: the HTML buffer output
296 * @cur: the document
297 *
298 * Dump an HTML document.
299 */
300static void
301htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000302 int type;
303
304 /*
305 * force to output the stuff as HTML, especially for entities
306 */
307 type = cur->type;
308 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000309 if (cur->intSubset != NULL)
310 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000311 else {
312 /* Default to HTML-4.0 transitionnal @@@@ */
313 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
314
315 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000316 if (cur->children != NULL) {
317 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000318 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000319 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000320 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000321}
322
323/**
324 * htmlDocDumpMemory:
325 * @cur: the document
326 * @mem: OUT: the memory pointer
327 * @size: OUT: the memory lenght
328 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000329 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000330 * It's up to the caller to free the memory.
331 */
332void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000333htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000334 xmlBufferPtr buf;
335
336 if (cur == NULL) {
337#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000338 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000339#endif
340 *mem = NULL;
341 *size = 0;
342 return;
343 }
344 buf = xmlBufferCreate();
345 if (buf == NULL) {
346 *mem = NULL;
347 *size = 0;
348 return;
349 }
350 htmlDocContentDump(buf, cur);
351 *mem = buf->content;
352 *size = buf->use;
353 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000354 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000355}
356
357
358/**
359 * htmlDocDump:
360 * @f: the FILE*
361 * @cur: the document
362 *
363 * Dump an HTML document to an open FILE.
364 */
365void
366htmlDocDump(FILE *f, xmlDocPtr cur) {
367 xmlBufferPtr buf;
368
369 if (cur == NULL) {
370#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000371 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000372#endif
373 return;
374 }
375 buf = xmlBufferCreate();
376 if (buf == NULL) return;
377 htmlDocContentDump(buf, cur);
378 xmlBufferDump(f, buf);
379 xmlBufferFree(buf);
380}
381
382/**
383 * htmlSaveFile:
384 * @filename: the filename
385 * @cur: the document
386 *
387 * Dump an HTML document to a file.
388 *
389 * returns: the number of byte written or -1 in case of failure.
390 */
391int
392htmlSaveFile(const char *filename, xmlDocPtr cur) {
393 xmlBufferPtr buf;
394 FILE *output = NULL;
395 int ret;
396
397 /*
398 * save the content to a temp buffer.
399 */
400 buf = xmlBufferCreate();
401 if (buf == NULL) return(0);
402 htmlDocContentDump(buf, cur);
403
404 output = fopen(filename, "w");
405 if (output == NULL) return(-1);
406 ret = xmlBufferDump(output, buf);
407 fclose(output);
408
409 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000410 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000411}
412
Daniel Veillard361d8452000-04-03 19:48:13 +0000413#endif /* LIBXML_HTML_ENABLED */