blob: e4a955ccf3c93bd240d49809b6ee5ee968c6ba55 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
16#include "xmlversion.h"
17#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillarddbfd6411999-12-28 16:35:14 +000035static void
36htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
37
Daniel Veillard167b5091999-07-07 04:19:20 +000038/**
39 * htmlDtdDump:
40 * @buf: the HTML buffer output
41 * @doc: the document
42 *
43 * Dump the HTML document DTD, if any.
44 */
45static void
46htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
47 xmlDtdPtr cur = doc->intSubset;
48
49 if (cur == NULL) {
50 fprintf(stderr, "htmlDtdDump : no internal subset\n");
51 return;
52 }
53 xmlBufferWriteChar(buf, "<!DOCTYPE ");
54 xmlBufferWriteCHAR(buf, cur->name);
55 if (cur->ExternalID != NULL) {
56 xmlBufferWriteChar(buf, " PUBLIC ");
57 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000058 if (cur->SystemID != NULL) {
59 xmlBufferWriteChar(buf, " ");
60 xmlBufferWriteQuotedString(buf, cur->SystemID);
61 }
Daniel Veillard167b5091999-07-07 04:19:20 +000062 } else if (cur->SystemID != NULL) {
63 xmlBufferWriteChar(buf, " SYSTEM ");
64 xmlBufferWriteQuotedString(buf, cur->SystemID);
65 }
Daniel Veillard167b5091999-07-07 04:19:20 +000066 xmlBufferWriteChar(buf, ">\n");
67}
68
69/**
70 * htmlAttrDump:
71 * @buf: the HTML buffer output
72 * @doc: the document
73 * @cur: the attribute pointer
74 *
75 * Dump an HTML attribute
76 */
77static void
78htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000079 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000080
81 if (cur == NULL) {
82 fprintf(stderr, "htmlAttrDump : property == NULL\n");
83 return;
84 }
85 xmlBufferWriteChar(buf, " ");
86 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardcf461992000-03-14 18:30:20 +000087 value = xmlNodeListGetString(doc, cur->children, 0);
Daniel Veillard167b5091999-07-07 04:19:20 +000088 if (value) {
89 xmlBufferWriteChar(buf, "=");
90 xmlBufferWriteQuotedString(buf, value);
Daniel Veillard6454aec1999-09-02 22:04:43 +000091 xmlFree(value);
Daniel Veillard167b5091999-07-07 04:19:20 +000092 } else {
93 xmlBufferWriteChar(buf, "=\"\"");
94 }
95}
96
97/**
98 * htmlAttrListDump:
99 * @buf: the HTML buffer output
100 * @doc: the document
101 * @cur: the first attribute pointer
102 *
103 * Dump a list of HTML attributes
104 */
105static void
106htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
107 if (cur == NULL) {
108 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
109 return;
110 }
111 while (cur != NULL) {
112 htmlAttrDump(buf, doc, cur);
113 cur = cur->next;
114 }
115}
116
117
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000118void
Daniel Veillard82150d81999-07-07 07:32:15 +0000119htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000120/**
121 * htmlNodeListDump:
122 * @buf: the HTML buffer output
123 * @doc: the document
124 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000125 *
126 * Dump an HTML node list, recursive behaviour,children are printed too.
127 */
128static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000129htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000130 if (cur == NULL) {
131 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
132 return;
133 }
134 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000135 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000136 cur = cur->next;
137 }
138}
139
140/**
141 * htmlNodeDump:
142 * @buf: the HTML buffer output
143 * @doc: the document
144 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000145 *
146 * Dump an HTML node, recursive behaviour,children are printed too.
147 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000148void
Daniel Veillard82150d81999-07-07 07:32:15 +0000149htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000150 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000151
152 if (cur == NULL) {
153 fprintf(stderr, "htmlNodeDump : node == NULL\n");
154 return;
155 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000156 /*
157 * Special cases.
158 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000159 if (cur->type == XML_HTML_DOCUMENT_NODE) {
160 htmlDocContentDump(buf, (xmlDocPtr) cur);
161 return;
162 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000163 if (cur->type == HTML_TEXT_NODE) {
164 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000165 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000166
Daniel Veillard82150d81999-07-07 07:32:15 +0000167 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000168#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000169 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000170#else
171 buffer = xmlEncodeEntitiesReentrant(doc,
172 xmlBufferContent(cur->content));
173#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000174 if (buffer != NULL) {
175 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000176 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000177 }
178 }
179 return;
180 }
181 if (cur->type == HTML_COMMENT_NODE) {
182 if (cur->content != NULL) {
183 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000184#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000185 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000186#else
187 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
188#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000189 xmlBufferWriteChar(buf, "-->");
190 }
191 return;
192 }
193 if (cur->type == HTML_ENTITY_REF_NODE) {
194 xmlBufferWriteChar(buf, "&");
195 xmlBufferWriteCHAR(buf, cur->name);
196 xmlBufferWriteChar(buf, ";");
197 return;
198 }
199
Daniel Veillard82150d81999-07-07 07:32:15 +0000200 /*
201 * Get specific HTmL info for taht node.
202 */
203 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000204
Daniel Veillard82150d81999-07-07 07:32:15 +0000205 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000206 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000207 if (cur->properties != NULL)
208 htmlAttrListDump(buf, doc, cur->properties);
209
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000210 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000211 xmlBufferWriteChar(buf, ">");
212 if (cur->next != NULL) {
213 if ((cur->next->type != HTML_TEXT_NODE) &&
214 (cur->next->type != HTML_ENTITY_REF_NODE))
215 xmlBufferWriteChar(buf, "\n");
216 }
217 return;
218 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000219 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000220 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000221 xmlBufferWriteChar(buf, ">");
222 else {
223 xmlBufferWriteChar(buf, "></");
224 xmlBufferWriteCHAR(buf, cur->name);
225 xmlBufferWriteChar(buf, ">");
226 }
227 if (cur->next != NULL) {
228 if ((cur->next->type != HTML_TEXT_NODE) &&
229 (cur->next->type != HTML_ENTITY_REF_NODE))
230 xmlBufferWriteChar(buf, "\n");
231 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000232 return;
233 }
234 xmlBufferWriteChar(buf, ">");
235 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000236 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000237
Daniel Veillardd293fd11999-12-01 09:51:45 +0000238#ifndef XML_USE_BUFFER_CONTENT
239 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
240#else
241 buffer = xmlEncodeEntitiesReentrant(doc,
242 xmlBufferContent(cur->content));
243#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000244 if (buffer != NULL) {
245 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000246 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000247 }
248 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000249 if (cur->children != NULL) {
250 if ((cur->children->type != HTML_TEXT_NODE) &&
251 (cur->children->type != HTML_ENTITY_REF_NODE) &&
252 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000253 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000254 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000255 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000256 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000257 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000258 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000259 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000260 if (!htmlIsAutoClosed(doc, cur)) {
261 xmlBufferWriteChar(buf, "</");
262 xmlBufferWriteCHAR(buf, cur->name);
263 xmlBufferWriteChar(buf, ">");
264 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000265 if (cur->next != NULL) {
266 if ((cur->next->type != HTML_TEXT_NODE) &&
267 (cur->next->type != HTML_ENTITY_REF_NODE))
268 xmlBufferWriteChar(buf, "\n");
269 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000270}
271
272/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000273 * htmlNodeDumpFile:
274 * @out: the FILE pointer
275 * @doc: the document
276 * @cur: the current node
277 *
278 * Dump an HTML node, recursive behaviour,children are printed too.
279 */
280void
281htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
282 xmlBufferPtr buf;
283
284 buf = xmlBufferCreate();
285 if (buf == NULL) return;
286 htmlNodeDump(buf, doc, cur);
287 xmlBufferDump(out, buf);
288 xmlBufferFree(buf);
289}
290
291/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000292 * htmlDocContentDump:
293 * @buf: the HTML buffer output
294 * @cur: the document
295 *
296 * Dump an HTML document.
297 */
298static void
299htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000300 int type;
301
302 /*
303 * force to output the stuff as HTML, especially for entities
304 */
305 type = cur->type;
306 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000307 if (cur->intSubset != NULL)
308 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000309 else {
310 /* Default to HTML-4.0 transitionnal @@@@ */
311 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
312
313 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000314 if (cur->children != NULL) {
315 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000316 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000317 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000318 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000319}
320
321/**
322 * htmlDocDumpMemory:
323 * @cur: the document
324 * @mem: OUT: the memory pointer
325 * @size: OUT: the memory lenght
326 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000327 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000328 * It's up to the caller to free the memory.
329 */
330void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000331htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000332 xmlBufferPtr buf;
333
334 if (cur == NULL) {
335#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000336 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000337#endif
338 *mem = NULL;
339 *size = 0;
340 return;
341 }
342 buf = xmlBufferCreate();
343 if (buf == NULL) {
344 *mem = NULL;
345 *size = 0;
346 return;
347 }
348 htmlDocContentDump(buf, cur);
349 *mem = buf->content;
350 *size = buf->use;
351 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000352 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000353}
354
355
356/**
357 * htmlDocDump:
358 * @f: the FILE*
359 * @cur: the document
360 *
361 * Dump an HTML document to an open FILE.
362 */
363void
364htmlDocDump(FILE *f, xmlDocPtr cur) {
365 xmlBufferPtr buf;
366
367 if (cur == NULL) {
368#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000369 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000370#endif
371 return;
372 }
373 buf = xmlBufferCreate();
374 if (buf == NULL) return;
375 htmlDocContentDump(buf, cur);
376 xmlBufferDump(f, buf);
377 xmlBufferFree(buf);
378}
379
380/**
381 * htmlSaveFile:
382 * @filename: the filename
383 * @cur: the document
384 *
385 * Dump an HTML document to a file.
386 *
387 * returns: the number of byte written or -1 in case of failure.
388 */
389int
390htmlSaveFile(const char *filename, xmlDocPtr cur) {
391 xmlBufferPtr buf;
392 FILE *output = NULL;
393 int ret;
394
395 /*
396 * save the content to a temp buffer.
397 */
398 buf = xmlBufferCreate();
399 if (buf == NULL) return(0);
400 htmlDocContentDump(buf, cur);
401
402 output = fopen(filename, "w");
403 if (output == NULL) return(-1);
404 ret = xmlBufferDump(output, buf);
405 fclose(output);
406
407 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000408 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000409}
410
Daniel Veillard361d8452000-04-03 19:48:13 +0000411#endif /* LIBXML_HTML_ENABLED */