blob: d981ec0ea7538dd9cefea8727d5976a67d8877dc [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
16#include "xmlversion.h"
17#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillarddbfd6411999-12-28 16:35:14 +000035static void
36htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
37
Daniel Veillard167b5091999-07-07 04:19:20 +000038/**
39 * htmlDtdDump:
40 * @buf: the HTML buffer output
41 * @doc: the document
42 *
43 * Dump the HTML document DTD, if any.
44 */
45static void
46htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
47 xmlDtdPtr cur = doc->intSubset;
48
49 if (cur == NULL) {
50 fprintf(stderr, "htmlDtdDump : no internal subset\n");
51 return;
52 }
53 xmlBufferWriteChar(buf, "<!DOCTYPE ");
54 xmlBufferWriteCHAR(buf, cur->name);
55 if (cur->ExternalID != NULL) {
56 xmlBufferWriteChar(buf, " PUBLIC ");
57 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +000058 if (cur->SystemID != NULL) {
59 xmlBufferWriteChar(buf, " ");
60 xmlBufferWriteQuotedString(buf, cur->SystemID);
61 }
Daniel Veillard167b5091999-07-07 04:19:20 +000062 } else if (cur->SystemID != NULL) {
63 xmlBufferWriteChar(buf, " SYSTEM ");
64 xmlBufferWriteQuotedString(buf, cur->SystemID);
65 }
Daniel Veillard167b5091999-07-07 04:19:20 +000066 xmlBufferWriteChar(buf, ">\n");
67}
68
69/**
70 * htmlAttrDump:
71 * @buf: the HTML buffer output
72 * @doc: the document
73 * @cur: the attribute pointer
74 *
75 * Dump an HTML attribute
76 */
77static void
78htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +000079 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +000080
81 if (cur == NULL) {
82 fprintf(stderr, "htmlAttrDump : property == NULL\n");
83 return;
84 }
85 xmlBufferWriteChar(buf, " ");
86 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +000087 if (cur->children != NULL) {
88 value = xmlNodeListGetString(doc, cur->children, 0);
89 if (value) {
90 xmlBufferWriteChar(buf, "=");
91 xmlBufferWriteQuotedString(buf, value);
92 xmlFree(value);
93 } else {
94 xmlBufferWriteChar(buf, "=\"\"");
95 }
Daniel Veillard167b5091999-07-07 04:19:20 +000096 }
97}
98
99/**
100 * htmlAttrListDump:
101 * @buf: the HTML buffer output
102 * @doc: the document
103 * @cur: the first attribute pointer
104 *
105 * Dump a list of HTML attributes
106 */
107static void
108htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
109 if (cur == NULL) {
110 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
111 return;
112 }
113 while (cur != NULL) {
114 htmlAttrDump(buf, doc, cur);
115 cur = cur->next;
116 }
117}
118
119
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000120void
Daniel Veillard82150d81999-07-07 07:32:15 +0000121htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000122/**
123 * htmlNodeListDump:
124 * @buf: the HTML buffer output
125 * @doc: the document
126 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000127 *
128 * Dump an HTML node list, recursive behaviour,children are printed too.
129 */
130static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000131htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000132 if (cur == NULL) {
133 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
134 return;
135 }
136 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000137 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000138 cur = cur->next;
139 }
140}
141
142/**
143 * htmlNodeDump:
144 * @buf: the HTML buffer output
145 * @doc: the document
146 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000147 *
148 * Dump an HTML node, recursive behaviour,children are printed too.
149 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000150void
Daniel Veillard82150d81999-07-07 07:32:15 +0000151htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000152 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000153
154 if (cur == NULL) {
155 fprintf(stderr, "htmlNodeDump : node == NULL\n");
156 return;
157 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000158 /*
159 * Special cases.
160 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000161 if (cur->type == XML_DTD_NODE)
162 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000163 if (cur->type == XML_HTML_DOCUMENT_NODE) {
164 htmlDocContentDump(buf, (xmlDocPtr) cur);
165 return;
166 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000167 if (cur->type == HTML_TEXT_NODE) {
168 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000169 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000170
Daniel Veillard82150d81999-07-07 07:32:15 +0000171 /* uses the HTML encoding routine !!!!!!!!!! */
Daniel Veillardd293fd11999-12-01 09:51:45 +0000172#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000173 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000174#else
175 buffer = xmlEncodeEntitiesReentrant(doc,
176 xmlBufferContent(cur->content));
177#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000178 if (buffer != NULL) {
179 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000180 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000181 }
182 }
183 return;
184 }
185 if (cur->type == HTML_COMMENT_NODE) {
186 if (cur->content != NULL) {
187 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000188#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000189 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000190#else
191 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
192#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000193 xmlBufferWriteChar(buf, "-->");
194 }
195 return;
196 }
197 if (cur->type == HTML_ENTITY_REF_NODE) {
198 xmlBufferWriteChar(buf, "&");
199 xmlBufferWriteCHAR(buf, cur->name);
200 xmlBufferWriteChar(buf, ";");
201 return;
202 }
203
Daniel Veillard82150d81999-07-07 07:32:15 +0000204 /*
205 * Get specific HTmL info for taht node.
206 */
207 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000208
Daniel Veillard82150d81999-07-07 07:32:15 +0000209 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000210 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000211 if (cur->properties != NULL)
212 htmlAttrListDump(buf, doc, cur->properties);
213
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000214 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000215 xmlBufferWriteChar(buf, ">");
216 if (cur->next != NULL) {
217 if ((cur->next->type != HTML_TEXT_NODE) &&
218 (cur->next->type != HTML_ENTITY_REF_NODE))
219 xmlBufferWriteChar(buf, "\n");
220 }
221 return;
222 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000223 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000224 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000225 xmlBufferWriteChar(buf, ">");
226 else {
227 xmlBufferWriteChar(buf, "></");
228 xmlBufferWriteCHAR(buf, cur->name);
229 xmlBufferWriteChar(buf, ">");
230 }
231 if (cur->next != NULL) {
232 if ((cur->next->type != HTML_TEXT_NODE) &&
233 (cur->next->type != HTML_ENTITY_REF_NODE))
234 xmlBufferWriteChar(buf, "\n");
235 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000236 return;
237 }
238 xmlBufferWriteChar(buf, ">");
239 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000240 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000241
Daniel Veillardd293fd11999-12-01 09:51:45 +0000242#ifndef XML_USE_BUFFER_CONTENT
243 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
244#else
245 buffer = xmlEncodeEntitiesReentrant(doc,
246 xmlBufferContent(cur->content));
247#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000248 if (buffer != NULL) {
249 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000250 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000251 }
252 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000253 if (cur->children != NULL) {
254 if ((cur->children->type != HTML_TEXT_NODE) &&
255 (cur->children->type != HTML_ENTITY_REF_NODE) &&
256 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000257 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000258 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000259 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000260 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000261 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000262 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000263 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000264 if (!htmlIsAutoClosed(doc, cur)) {
265 xmlBufferWriteChar(buf, "</");
266 xmlBufferWriteCHAR(buf, cur->name);
267 xmlBufferWriteChar(buf, ">");
268 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000269 if (cur->next != NULL) {
270 if ((cur->next->type != HTML_TEXT_NODE) &&
271 (cur->next->type != HTML_ENTITY_REF_NODE))
272 xmlBufferWriteChar(buf, "\n");
273 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000274}
275
276/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000277 * htmlNodeDumpFile:
278 * @out: the FILE pointer
279 * @doc: the document
280 * @cur: the current node
281 *
282 * Dump an HTML node, recursive behaviour,children are printed too.
283 */
284void
285htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
286 xmlBufferPtr buf;
287
288 buf = xmlBufferCreate();
289 if (buf == NULL) return;
290 htmlNodeDump(buf, doc, cur);
291 xmlBufferDump(out, buf);
292 xmlBufferFree(buf);
293}
294
295/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000296 * htmlDocContentDump:
297 * @buf: the HTML buffer output
298 * @cur: the document
299 *
300 * Dump an HTML document.
301 */
302static void
303htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000304 int type;
305
306 /*
307 * force to output the stuff as HTML, especially for entities
308 */
309 type = cur->type;
310 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000311 if (cur->intSubset != NULL)
312 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000313 else {
314 /* Default to HTML-4.0 transitionnal @@@@ */
315 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
316
317 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000318 if (cur->children != NULL) {
319 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000320 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000321 xmlBufferWriteChar(buf, "\n");
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000322 cur->type = type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000323}
324
325/**
326 * htmlDocDumpMemory:
327 * @cur: the document
328 * @mem: OUT: the memory pointer
329 * @size: OUT: the memory lenght
330 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000331 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000332 * It's up to the caller to free the memory.
333 */
334void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000335htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000336 xmlBufferPtr buf;
337
338 if (cur == NULL) {
339#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000340 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000341#endif
342 *mem = NULL;
343 *size = 0;
344 return;
345 }
346 buf = xmlBufferCreate();
347 if (buf == NULL) {
348 *mem = NULL;
349 *size = 0;
350 return;
351 }
352 htmlDocContentDump(buf, cur);
353 *mem = buf->content;
354 *size = buf->use;
355 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000356 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000357}
358
359
360/**
361 * htmlDocDump:
362 * @f: the FILE*
363 * @cur: the document
364 *
365 * Dump an HTML document to an open FILE.
366 */
367void
368htmlDocDump(FILE *f, xmlDocPtr cur) {
369 xmlBufferPtr buf;
370
371 if (cur == NULL) {
372#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000373 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000374#endif
375 return;
376 }
377 buf = xmlBufferCreate();
378 if (buf == NULL) return;
379 htmlDocContentDump(buf, cur);
380 xmlBufferDump(f, buf);
381 xmlBufferFree(buf);
382}
383
384/**
385 * htmlSaveFile:
386 * @filename: the filename
387 * @cur: the document
388 *
389 * Dump an HTML document to a file.
390 *
391 * returns: the number of byte written or -1 in case of failure.
392 */
393int
394htmlSaveFile(const char *filename, xmlDocPtr cur) {
395 xmlBufferPtr buf;
396 FILE *output = NULL;
397 int ret;
398
399 /*
400 * save the content to a temp buffer.
401 */
402 buf = xmlBufferCreate();
403 if (buf == NULL) return(0);
404 htmlDocContentDump(buf, cur);
405
406 output = fopen(filename, "w");
407 if (output == NULL) return(-1);
408 ret = xmlBufferDump(output, buf);
409 fclose(output);
410
411 xmlBufferFree(buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000412 return(ret * sizeof(xmlChar));
Daniel Veillard167b5091999-07-07 04:19:20 +0000413}
414
Daniel Veillard361d8452000-04-03 19:48:13 +0000415#endif /* LIBXML_HTML_ENABLED */