blob: e02465dd2ac029b0656945498991e8d24ca71ab7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#ifdef HAVE_CTYPE_H
15#include <ctype.h>
16#endif
17#ifdef HAVE_STDLIB_H
18#include <stdlib.h>
19#endif
20
21#include <libxml/xmlmemory.h>
22#include <libxml/HTMLparser.h>
23#include <libxml/HTMLtree.h>
24#include <libxml/entities.h>
25#include <libxml/valid.h>
26#include <libxml/xmlerror.h>
27#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000028#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000029
30/************************************************************************
31 * *
32 * Getting/Setting encoding meta tags *
33 * *
34 ************************************************************************/
35
36/**
37 * htmlGetMetaEncoding:
38 * @doc: the document
39 *
40 * Encoding definition lookup in the Meta tags
41 *
42 * Returns the current encoding as flagged in the HTML source
43 */
44const xmlChar *
45htmlGetMetaEncoding(htmlDocPtr doc) {
46 htmlNodePtr cur;
47 const xmlChar *content;
48 const xmlChar *encoding;
49
50 if (doc == NULL)
51 return(NULL);
52 cur = doc->children;
53
54 /*
55 * Search the html
56 */
57 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000058 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000059 if (xmlStrEqual(cur->name, BAD_CAST"html"))
60 break;
61 if (xmlStrEqual(cur->name, BAD_CAST"head"))
62 goto found_head;
63 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
64 goto found_meta;
65 }
66 cur = cur->next;
67 }
68 if (cur == NULL)
69 return(NULL);
70 cur = cur->children;
71
72 /*
73 * Search the head
74 */
75 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000076 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000077 if (xmlStrEqual(cur->name, BAD_CAST"head"))
78 break;
79 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
80 goto found_meta;
81 }
82 cur = cur->next;
83 }
84 if (cur == NULL)
85 return(NULL);
86found_head:
87 cur = cur->children;
88
89 /*
90 * Search the meta elements
91 */
92found_meta:
93 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000094 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000095 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
96 xmlAttrPtr attr = cur->properties;
97 int http;
98 const xmlChar *value;
99
100 content = NULL;
101 http = 0;
102 while (attr != NULL) {
103 if ((attr->children != NULL) &&
104 (attr->children->type == XML_TEXT_NODE) &&
105 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000106 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000107 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
108 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
109 http = 1;
110 else if ((value != NULL)
111 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
112 content = value;
113 if ((http != 0) && (content != NULL))
114 goto found_content;
115 }
116 attr = attr->next;
117 }
118 }
119 }
120 cur = cur->next;
121 }
122 return(NULL);
123
124found_content:
125 encoding = xmlStrstr(content, BAD_CAST"charset=");
126 if (encoding == NULL)
127 encoding = xmlStrstr(content, BAD_CAST"Charset=");
128 if (encoding == NULL)
129 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
130 if (encoding != NULL) {
131 encoding += 8;
132 } else {
133 encoding = xmlStrstr(content, BAD_CAST"charset =");
134 if (encoding == NULL)
135 encoding = xmlStrstr(content, BAD_CAST"Charset =");
136 if (encoding == NULL)
137 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
138 if (encoding != NULL)
139 encoding += 9;
140 }
141 if (encoding != NULL) {
142 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
143 }
144 return(encoding);
145}
146
147/**
148 * htmlSetMetaEncoding:
149 * @doc: the document
150 * @encoding: the encoding string
151 *
152 * Sets the current encoding in the Meta tags
153 * NOTE: this will not change the document content encoding, just
154 * the META flag associated.
155 *
156 * Returns 0 in case of success and -1 in case of error
157 */
158int
159htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
160 htmlNodePtr cur, meta;
161 const xmlChar *content;
162 char newcontent[100];
163
164
165 if (doc == NULL)
166 return(-1);
167
168 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000169 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
170 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000171 newcontent[sizeof(newcontent) - 1] = 0;
172 }
173
174 cur = doc->children;
175
176 /*
177 * Search the html
178 */
179 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000180 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000181 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
182 break;
183 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
184 goto found_head;
185 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
186 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000187 }
188 cur = cur->next;
189 }
190 if (cur == NULL)
191 return(-1);
192 cur = cur->children;
193
194 /*
195 * Search the head
196 */
197 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000198 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000199 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
200 break;
201 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
202 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000203 }
204 cur = cur->next;
205 }
206 if (cur == NULL)
207 return(-1);
208found_head:
209 if (cur->children == NULL) {
210 if (encoding == NULL)
211 return(0);
212 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
213 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000214 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000215 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000216 return(0);
217 }
218 cur = cur->children;
219
220found_meta:
221 if (encoding != NULL) {
222 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000223 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000224 */
225
226 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
227 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000228 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000229 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000230 }
231
232 /*
233 * Search and destroy all the remaining the meta elements carrying
234 * encoding informations
235 */
236 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000237 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000238 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000239 xmlAttrPtr attr = cur->properties;
240 int http;
241 const xmlChar *value;
242
243 content = NULL;
244 http = 0;
245 while (attr != NULL) {
246 if ((attr->children != NULL) &&
247 (attr->children->type == XML_TEXT_NODE) &&
248 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000249 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000250 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
251 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
252 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000253 else
254 {
255 if ((value != NULL) &&
256 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
257 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000258 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000259 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000260 break;
261 }
262 attr = attr->next;
263 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000264 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000265 meta = cur;
266 cur = cur->next;
267 xmlUnlinkNode(meta);
268 xmlFreeNode(meta);
269 continue;
270 }
271
272 }
273 }
274 cur = cur->next;
275 }
276 return(0);
277}
278
279/************************************************************************
280 * *
281 * Dumping HTML tree content to a simple buffer *
282 * *
283 ************************************************************************/
284
285static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000286htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000287static void
288htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
289 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000290
291/**
292 * htmlDtdDump:
293 * @buf: the HTML buffer output
294 * @doc: the document
295 *
296 * Dump the HTML document DTD, if any.
297 */
298static void
299htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
300 xmlDtdPtr cur = doc->intSubset;
301
302 if (cur == NULL) {
303 xmlGenericError(xmlGenericErrorContext,
304 "htmlDtdDump : no internal subset\n");
305 return;
306 }
307 xmlBufferWriteChar(buf, "<!DOCTYPE ");
308 xmlBufferWriteCHAR(buf, cur->name);
309 if (cur->ExternalID != NULL) {
310 xmlBufferWriteChar(buf, " PUBLIC ");
311 xmlBufferWriteQuotedString(buf, cur->ExternalID);
312 if (cur->SystemID != NULL) {
313 xmlBufferWriteChar(buf, " ");
314 xmlBufferWriteQuotedString(buf, cur->SystemID);
315 }
316 } else if (cur->SystemID != NULL) {
317 xmlBufferWriteChar(buf, " SYSTEM ");
318 xmlBufferWriteQuotedString(buf, cur->SystemID);
319 }
320 xmlBufferWriteChar(buf, ">\n");
321}
322
323/**
324 * htmlAttrDump:
325 * @buf: the HTML buffer output
326 * @doc: the document
327 * @cur: the attribute pointer
328 *
329 * Dump an HTML attribute
330 */
331static void
332htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
333 xmlChar *value;
334
Daniel Veillardeca60d02001-06-13 07:45:41 +0000335 /*
336 * TODO: The html output method should not escape a & character
337 * occurring in an attribute value immediately followed by
338 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
339 */
340
Owen Taylor3473f882001-02-23 17:55:21 +0000341 if (cur == NULL) {
342 xmlGenericError(xmlGenericErrorContext,
343 "htmlAttrDump : property == NULL\n");
344 return;
345 }
346 xmlBufferWriteChar(buf, " ");
347 xmlBufferWriteCHAR(buf, cur->name);
348 if (cur->children != NULL) {
349 value = xmlNodeListGetString(doc, cur->children, 0);
350 if (value) {
351 xmlBufferWriteChar(buf, "=");
352 xmlBufferWriteQuotedString(buf, value);
353 xmlFree(value);
354 } else {
355 xmlBufferWriteChar(buf, "=\"\"");
356 }
357 }
358}
359
360/**
361 * htmlAttrListDump:
362 * @buf: the HTML buffer output
363 * @doc: the document
364 * @cur: the first attribute pointer
365 *
366 * Dump a list of HTML attributes
367 */
368static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000369htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
370 int i = 0;
371
Owen Taylor3473f882001-02-23 17:55:21 +0000372 if (cur == NULL) {
373 xmlGenericError(xmlGenericErrorContext,
374 "htmlAttrListDump : property == NULL\n");
375 return;
376 }
377 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000378 i++;
379 if ((format) && (i >= 5)) {
380 i = 0;
381 xmlBufferWriteChar(buf, "\n");
382 }
Owen Taylor3473f882001-02-23 17:55:21 +0000383 htmlAttrDump(buf, doc, cur);
384 cur = cur->next;
385 }
386}
387
Owen Taylor3473f882001-02-23 17:55:21 +0000388/**
389 * htmlNodeListDump:
390 * @buf: the HTML buffer output
391 * @doc: the document
392 * @cur: the first node
393 *
394 * Dump an HTML node list, recursive behaviour,children are printed too.
395 */
396static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000397htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000398 if (cur == NULL) {
399 xmlGenericError(xmlGenericErrorContext,
400 "htmlNodeListDump : node == NULL\n");
401 return;
402 }
403 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000404 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000405 cur = cur->next;
406 }
407}
408
409/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000410 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000411 * @buf: the HTML buffer output
412 * @doc: the document
413 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000414 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000415 *
416 * Dump an HTML node, recursive behaviour,children are printed too.
417 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000418static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000419htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
420 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000421 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000422
423 if (cur == NULL) {
424 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000425 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000426 return;
427 }
428 /*
429 * Special cases.
430 */
431 if (cur->type == XML_DTD_NODE)
432 return;
433 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000434 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000435 return;
436 }
437 if (cur->type == HTML_TEXT_NODE) {
438 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000439 if (((cur->name == (const xmlChar *)xmlStringText) ||
440 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000441 ((cur->parent == NULL) ||
442 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000443 xmlChar *buffer;
444
Owen Taylor3473f882001-02-23 17:55:21 +0000445 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000446 if (buffer != NULL) {
447 xmlBufferWriteCHAR(buf, buffer);
448 xmlFree(buffer);
449 }
450 } else {
451 xmlBufferWriteCHAR(buf, cur->content);
452 }
453 }
454 return;
455 }
456 if (cur->type == HTML_COMMENT_NODE) {
457 if (cur->content != NULL) {
458 xmlBufferWriteChar(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000459 xmlBufferWriteCHAR(buf, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000460 xmlBufferWriteChar(buf, "-->");
461 }
462 return;
463 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000464 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000465 if (cur->name == NULL)
466 return;
467 xmlBufferWriteChar(buf, "<?");
468 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000469 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000470 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000471 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000472 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000473 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000474 return;
475 }
Owen Taylor3473f882001-02-23 17:55:21 +0000476 if (cur->type == HTML_ENTITY_REF_NODE) {
477 xmlBufferWriteChar(buf, "&");
478 xmlBufferWriteCHAR(buf, cur->name);
479 xmlBufferWriteChar(buf, ";");
480 return;
481 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000482 if (cur->type == HTML_PRESERVE_NODE) {
483 if (cur->content != NULL) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000484 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard083c2662001-05-08 08:27:14 +0000485 }
486 return;
487 }
Owen Taylor3473f882001-02-23 17:55:21 +0000488
489 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000490 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000491 */
492 info = htmlTagLookup(cur->name);
493
494 xmlBufferWriteChar(buf, "<");
495 xmlBufferWriteCHAR(buf, cur->name);
496 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000497 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000498
499 if ((info != NULL) && (info->empty)) {
500 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000501 if ((format) && (info != NULL) && (!info->isinline) &&
502 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000503 if ((cur->next->type != HTML_TEXT_NODE) &&
504 (cur->next->type != HTML_ENTITY_REF_NODE))
505 xmlBufferWriteChar(buf, "\n");
506 }
507 return;
508 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000509 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
510 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000511 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000512 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
513 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000514 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000515 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000516 xmlBufferWriteChar(buf, "></");
517 xmlBufferWriteCHAR(buf, cur->name);
518 xmlBufferWriteChar(buf, ">");
519 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000520 if ((format) && (info != NULL) && (!info->isinline) &&
521 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000522 if ((cur->next->type != HTML_TEXT_NODE) &&
523 (cur->next->type != HTML_ENTITY_REF_NODE))
524 xmlBufferWriteChar(buf, "\n");
525 }
526 return;
527 }
528 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000529 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000530 xmlChar *buffer;
531
Daniel Veillard083c2662001-05-08 08:27:14 +0000532 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000533 if (buffer != NULL) {
534 xmlBufferWriteCHAR(buf, buffer);
535 xmlFree(buffer);
536 }
537 }
538 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000539 if ((format) && (info != NULL) && (!info->isinline) &&
540 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000541 (cur->children->type != HTML_ENTITY_REF_NODE) &&
542 (cur->children != cur->last))
543 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000544 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000545 if ((format) && (info != NULL) && (!info->isinline) &&
546 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000547 (cur->last->type != HTML_ENTITY_REF_NODE) &&
548 (cur->children != cur->last))
549 xmlBufferWriteChar(buf, "\n");
550 }
Owen Taylor3473f882001-02-23 17:55:21 +0000551 xmlBufferWriteChar(buf, "</");
552 xmlBufferWriteCHAR(buf, cur->name);
553 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000554 if ((format) && (info != NULL) && (!info->isinline) &&
555 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000556 if ((cur->next->type != HTML_TEXT_NODE) &&
557 (cur->next->type != HTML_ENTITY_REF_NODE))
558 xmlBufferWriteChar(buf, "\n");
559 }
560}
561
562/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000563 * htmlNodeDump:
564 * @buf: the HTML buffer output
565 * @doc: the document
566 * @cur: the current node
567 *
568 * Dump an HTML node, recursive behaviour,children are printed too,
569 * and formatting returns are added.
570 */
571void
572htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
573 htmlNodeDumpFormat(buf, doc, cur, 1);
574}
575
576/**
577 * htmlNodeDumpFileFormat:
578 * @out: the FILE pointer
579 * @doc: the document
580 * @cur: the current node
581 * @encoding: the document encoding
582 * @format: should formatting spaces been added
583 *
584 * Dump an HTML node, recursive behaviour,children are printed too.
585 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000586 * TODO: if encoding == NULL try to save in the doc encoding
587 *
588 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000589 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000590int
591htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
592 xmlNodePtr cur, const char *encoding, int format) {
593 xmlOutputBufferPtr buf;
594 xmlCharEncodingHandlerPtr handler = NULL;
595 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000596
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000597 if (encoding != NULL) {
598 xmlCharEncoding enc;
599
600 enc = xmlParseCharEncoding(encoding);
601 if (enc != XML_CHAR_ENCODING_UTF8) {
602 handler = xmlFindCharEncodingHandler(encoding);
603 if (handler == NULL)
604 return(-1);
605 }
606 }
607
608 /*
609 * Fallback to HTML or ASCII when the encoding is unspecified
610 */
611 if (handler == NULL)
612 handler = xmlFindCharEncodingHandler("HTML");
613 if (handler == NULL)
614 handler = xmlFindCharEncodingHandler("ascii");
615
616 /*
617 * save the content to a temp buffer.
618 */
619 buf = xmlOutputBufferCreateFile(out, handler);
620 if (buf == NULL) return(0);
621
622 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
623
624 ret = xmlOutputBufferClose(buf);
625 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000626}
627
628/**
Owen Taylor3473f882001-02-23 17:55:21 +0000629 * htmlNodeDumpFile:
630 * @out: the FILE pointer
631 * @doc: the document
632 * @cur: the current node
633 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000634 * Dump an HTML node, recursive behaviour,children are printed too,
635 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000636 */
637void
638htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000639 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000640}
641
642/**
643 * htmlDocContentDump:
644 * @buf: the HTML buffer output
645 * @cur: the document
646 *
647 * Dump an HTML document.
648 */
649static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000650htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000651 int type;
652
653 /*
654 * force to output the stuff as HTML, especially for entities
655 */
656 type = cur->type;
657 cur->type = XML_HTML_DOCUMENT_NODE;
658 if (cur->intSubset != NULL)
659 htmlDtdDump(buf, cur);
660 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000661 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000662 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
663
664 }
665 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000666 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000667 }
668 xmlBufferWriteChar(buf, "\n");
669 cur->type = (xmlElementType) type;
670}
671
672/**
673 * htmlDocDumpMemory:
674 * @cur: the document
675 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000676 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000677 *
678 * Dump an HTML document in memory and return the xmlChar * and it's size.
679 * It's up to the caller to free the memory.
680 */
681void
682htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000683 xmlOutputBufferPtr buf;
684 xmlCharEncodingHandlerPtr handler = NULL;
685 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000686
687 if (cur == NULL) {
688#ifdef DEBUG_TREE
689 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000690 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000691#endif
692 *mem = NULL;
693 *size = 0;
694 return;
695 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000696
697 encoding = (const char *) htmlGetMetaEncoding(cur);
698
699 if (encoding != NULL) {
700 xmlCharEncoding enc;
701
702 enc = xmlParseCharEncoding(encoding);
703 if (enc != cur->charset) {
704 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
705 /*
706 * Not supported yet
707 */
708 *mem = NULL;
709 *size = 0;
710 return;
711 }
712
713 handler = xmlFindCharEncodingHandler(encoding);
714 if (handler == NULL) {
715 *mem = NULL;
716 *size = 0;
717 return;
718 }
719 }
720 }
721
722 /*
723 * Fallback to HTML or ASCII when the encoding is unspecified
724 */
725 if (handler == NULL)
726 handler = xmlFindCharEncodingHandler("HTML");
727 if (handler == NULL)
728 handler = xmlFindCharEncodingHandler("ascii");
729
730 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000731 if (buf == NULL) {
732 *mem = NULL;
733 *size = 0;
734 return;
735 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000736
737 htmlDocContentDumpOutput(buf, cur, NULL);
738 xmlOutputBufferFlush(buf);
739 if (buf->conv != NULL) {
740 *size = buf->conv->use;
741 *mem = xmlStrndup(buf->conv->content, *size);
742 } else {
743 *size = buf->buffer->use;
744 *mem = xmlStrndup(buf->buffer->content, *size);
745 }
746 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000747}
748
749
750/************************************************************************
751 * *
752 * Dumping HTML tree content to an I/O output buffer *
753 * *
754 ************************************************************************/
755
756/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000757 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000758 * @buf: the HTML buffer output
759 * @doc: the document
760 * @encoding: the encoding string
761 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000762 * TODO: check whether encoding is needed
763 *
Owen Taylor3473f882001-02-23 17:55:21 +0000764 * Dump the HTML document DTD, if any.
765 */
766static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000767htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000768 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000769 xmlDtdPtr cur = doc->intSubset;
770
771 if (cur == NULL) {
772 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000773 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000774 return;
775 }
776 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
777 xmlOutputBufferWriteString(buf, (const char *)cur->name);
778 if (cur->ExternalID != NULL) {
779 xmlOutputBufferWriteString(buf, " PUBLIC ");
780 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
781 if (cur->SystemID != NULL) {
782 xmlOutputBufferWriteString(buf, " ");
783 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
784 }
785 } else if (cur->SystemID != NULL) {
786 xmlOutputBufferWriteString(buf, " SYSTEM ");
787 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
788 }
789 xmlOutputBufferWriteString(buf, ">\n");
790}
791
792/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000793 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000794 * @buf: the HTML buffer output
795 * @doc: the document
796 * @cur: the attribute pointer
797 * @encoding: the encoding string
798 *
799 * Dump an HTML attribute
800 */
801static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000802htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000803 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000804 xmlChar *value;
805
Daniel Veillardeca60d02001-06-13 07:45:41 +0000806 /*
807 * TODO: The html output method should not escape a & character
808 * occurring in an attribute value immediately followed by
809 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
810 */
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (cur == NULL) {
813 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000814 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000815 return;
816 }
817 xmlOutputBufferWriteString(buf, " ");
818 xmlOutputBufferWriteString(buf, (const char *)cur->name);
819 if (cur->children != NULL) {
820 value = xmlNodeListGetString(doc, cur->children, 0);
821 if (value) {
822 xmlOutputBufferWriteString(buf, "=");
823 xmlBufferWriteQuotedString(buf->buffer, value);
824 xmlFree(value);
825 } else {
826 xmlOutputBufferWriteString(buf, "=\"\"");
827 }
828 }
829}
830
831/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000832 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000833 * @buf: the HTML buffer output
834 * @doc: the document
835 * @cur: the first attribute pointer
836 * @encoding: the encoding string
837 *
838 * Dump a list of HTML attributes
839 */
840static void
841htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
842 if (cur == NULL) {
843 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000844 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000845 return;
846 }
847 while (cur != NULL) {
848 htmlAttrDumpOutput(buf, doc, cur, encoding);
849 cur = cur->next;
850 }
851}
852
853
854void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
855 xmlNodePtr cur, const char *encoding);
856
857/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000858 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000859 * @buf: the HTML buffer output
860 * @doc: the document
861 * @cur: the first node
862 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000863 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000864 *
865 * Dump an HTML node list, recursive behaviour,children are printed too.
866 */
867static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000868htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
869 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000870 if (cur == NULL) {
871 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000872 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000873 return;
874 }
875 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000876 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000877 cur = cur->next;
878 }
879}
880
881/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000882 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000883 * @buf: the HTML buffer output
884 * @doc: the document
885 * @cur: the current node
886 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000887 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000888 *
889 * Dump an HTML node, recursive behaviour,children are printed too.
890 */
891void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000892htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
893 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000894 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000895
896 if (cur == NULL) {
897 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000898 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000899 return;
900 }
901 /*
902 * Special cases.
903 */
904 if (cur->type == XML_DTD_NODE)
905 return;
906 if (cur->type == XML_HTML_DOCUMENT_NODE) {
907 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
908 return;
909 }
910 if (cur->type == HTML_TEXT_NODE) {
911 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000912 if (((cur->name == (const xmlChar *)xmlStringText) ||
913 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000914 ((cur->parent == NULL) ||
915 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000916 xmlChar *buffer;
917
Owen Taylor3473f882001-02-23 17:55:21 +0000918 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000919 if (buffer != NULL) {
920 xmlOutputBufferWriteString(buf, (const char *)buffer);
921 xmlFree(buffer);
922 }
923 } else {
924 xmlOutputBufferWriteString(buf, (const char *)cur->content);
925 }
926 }
927 return;
928 }
929 if (cur->type == HTML_COMMENT_NODE) {
930 if (cur->content != NULL) {
931 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000932 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000933 xmlOutputBufferWriteString(buf, "-->");
934 }
935 return;
936 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000937 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000938 if (cur->name == NULL)
939 return;
940 xmlOutputBufferWriteString(buf, "<?");
941 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000942 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000943 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000944 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000945 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000946 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000947 return;
948 }
Owen Taylor3473f882001-02-23 17:55:21 +0000949 if (cur->type == HTML_ENTITY_REF_NODE) {
950 xmlOutputBufferWriteString(buf, "&");
951 xmlOutputBufferWriteString(buf, (const char *)cur->name);
952 xmlOutputBufferWriteString(buf, ";");
953 return;
954 }
955 if (cur->type == HTML_PRESERVE_NODE) {
956 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000957 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000958 }
959 return;
960 }
961
962 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000963 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000964 */
965 info = htmlTagLookup(cur->name);
966
967 xmlOutputBufferWriteString(buf, "<");
968 xmlOutputBufferWriteString(buf, (const char *)cur->name);
969 if (cur->properties != NULL)
970 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
971
972 if ((info != NULL) && (info->empty)) {
973 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000974 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000975 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000976 (cur->next->type != HTML_ENTITY_REF_NODE) &&
977 (cur->parent != NULL) &&
978 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000979 xmlOutputBufferWriteString(buf, "\n");
980 }
981 return;
982 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000983 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
984 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000985 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000986 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
987 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000988 xmlOutputBufferWriteString(buf, ">");
989 } else {
990 xmlOutputBufferWriteString(buf, "></");
991 xmlOutputBufferWriteString(buf, (const char *)cur->name);
992 xmlOutputBufferWriteString(buf, ">");
993 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000994 if ((format) && (cur->next != NULL) &&
995 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000996 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000997 (cur->next->type != HTML_ENTITY_REF_NODE) &&
998 (cur->parent != NULL) &&
999 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001000 xmlOutputBufferWriteString(buf, "\n");
1001 }
1002 return;
1003 }
1004 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001005 if ((cur->type != XML_ELEMENT_NODE) &&
1006 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001007 /*
1008 * Uses the OutputBuffer property to automatically convert
1009 * invalids to charrefs
1010 */
1011
Owen Taylor3473f882001-02-23 17:55:21 +00001012 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
1014 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001015 if ((format) && (info != NULL) && (!info->isinline) &&
1016 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001017 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001018 (cur->children != cur->last) &&
1019 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001020 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001021 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001022 if ((format) && (info != NULL) && (!info->isinline) &&
1023 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001024 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001025 (cur->children != cur->last) &&
1026 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001027 xmlOutputBufferWriteString(buf, "\n");
1028 }
Owen Taylor3473f882001-02-23 17:55:21 +00001029 xmlOutputBufferWriteString(buf, "</");
1030 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1031 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001032 if ((format) && (info != NULL) && (!info->isinline) &&
1033 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001034 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001035 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1036 (cur->parent != NULL) &&
1037 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001038 xmlOutputBufferWriteString(buf, "\n");
1039 }
1040}
1041
1042/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001043 * htmlNodeDumpOutput:
1044 * @buf: the HTML buffer output
1045 * @doc: the document
1046 * @cur: the current node
1047 * @encoding: the encoding string
1048 *
1049 * Dump an HTML node, recursive behaviour,children are printed too,
1050 * and formatting returns/spaces are added.
1051 */
1052void
1053htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1054 xmlNodePtr cur, const char *encoding) {
1055 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1056}
1057
1058/**
1059 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * @buf: the HTML buffer output
1061 * @cur: the document
1062 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001063 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001064 *
1065 * Dump an HTML document.
1066 */
1067void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001068htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1069 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001070 int type;
1071
1072 /*
1073 * force to output the stuff as HTML, especially for entities
1074 */
1075 type = cur->type;
1076 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001077 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001078 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001079 }
1080 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001081 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001082 }
1083 xmlOutputBufferWriteString(buf, "\n");
1084 cur->type = (xmlElementType) type;
1085}
1086
Daniel Veillard95d845f2001-06-13 13:48:46 +00001087/**
1088 * htmlDocContentDumpOutput:
1089 * @buf: the HTML buffer output
1090 * @cur: the document
1091 * @encoding: the encoding string
1092 *
1093 * Dump an HTML document. Formating return/spaces are added.
1094 */
1095void
1096htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1097 const char *encoding) {
1098 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1099}
1100
Owen Taylor3473f882001-02-23 17:55:21 +00001101/************************************************************************
1102 * *
1103 * Saving functions front-ends *
1104 * *
1105 ************************************************************************/
1106
1107/**
1108 * htmlDocDump:
1109 * @f: the FILE*
1110 * @cur: the document
1111 *
1112 * Dump an HTML document to an open FILE.
1113 *
1114 * returns: the number of byte written or -1 in case of failure.
1115 */
1116int
1117htmlDocDump(FILE *f, xmlDocPtr cur) {
1118 xmlOutputBufferPtr buf;
1119 xmlCharEncodingHandlerPtr handler = NULL;
1120 const char *encoding;
1121 int ret;
1122
1123 if (cur == NULL) {
1124#ifdef DEBUG_TREE
1125 xmlGenericError(xmlGenericErrorContext,
1126 "htmlDocDump : document == NULL\n");
1127#endif
1128 return(-1);
1129 }
1130
1131 encoding = (const char *) htmlGetMetaEncoding(cur);
1132
1133 if (encoding != NULL) {
1134 xmlCharEncoding enc;
1135
1136 enc = xmlParseCharEncoding(encoding);
1137 if (enc != cur->charset) {
1138 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1139 /*
1140 * Not supported yet
1141 */
1142 return(-1);
1143 }
1144
1145 handler = xmlFindCharEncodingHandler(encoding);
1146 if (handler == NULL)
1147 return(-1);
1148 }
1149 }
1150
1151 /*
1152 * Fallback to HTML or ASCII when the encoding is unspecified
1153 */
1154 if (handler == NULL)
1155 handler = xmlFindCharEncodingHandler("HTML");
1156 if (handler == NULL)
1157 handler = xmlFindCharEncodingHandler("ascii");
1158
1159 buf = xmlOutputBufferCreateFile(f, handler);
1160 if (buf == NULL) return(-1);
1161 htmlDocContentDumpOutput(buf, cur, NULL);
1162
1163 ret = xmlOutputBufferClose(buf);
1164 return(ret);
1165}
1166
1167/**
1168 * htmlSaveFile:
1169 * @filename: the filename (or URL)
1170 * @cur: the document
1171 *
1172 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1173 * used.
1174 * returns: the number of byte written or -1 in case of failure.
1175 */
1176int
1177htmlSaveFile(const char *filename, xmlDocPtr cur) {
1178 xmlOutputBufferPtr buf;
1179 xmlCharEncodingHandlerPtr handler = NULL;
1180 const char *encoding;
1181 int ret;
1182
1183 encoding = (const char *) htmlGetMetaEncoding(cur);
1184
1185 if (encoding != NULL) {
1186 xmlCharEncoding enc;
1187
1188 enc = xmlParseCharEncoding(encoding);
1189 if (enc != cur->charset) {
1190 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1191 /*
1192 * Not supported yet
1193 */
1194 return(-1);
1195 }
1196
1197 handler = xmlFindCharEncodingHandler(encoding);
1198 if (handler == NULL)
1199 return(-1);
1200 }
1201 }
1202
1203 /*
1204 * Fallback to HTML or ASCII when the encoding is unspecified
1205 */
1206 if (handler == NULL)
1207 handler = xmlFindCharEncodingHandler("HTML");
1208 if (handler == NULL)
1209 handler = xmlFindCharEncodingHandler("ascii");
1210
1211 /*
1212 * save the content to a temp buffer.
1213 */
1214 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1215 if (buf == NULL) return(0);
1216
1217 htmlDocContentDumpOutput(buf, cur, NULL);
1218
1219 ret = xmlOutputBufferClose(buf);
1220 return(ret);
1221}
1222
1223/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001224 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001225 * @filename: the filename
1226 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001227 * @format: should formatting spaces been added
1228 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001229 *
1230 * Dump an HTML document to a file using a given encoding.
1231 *
1232 * returns: the number of byte written or -1 in case of failure.
1233 */
1234int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001235htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1236 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001237 xmlOutputBufferPtr buf;
1238 xmlCharEncodingHandlerPtr handler = NULL;
1239 int ret;
1240
1241 if (encoding != NULL) {
1242 xmlCharEncoding enc;
1243
1244 enc = xmlParseCharEncoding(encoding);
1245 if (enc != cur->charset) {
1246 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1247 /*
1248 * Not supported yet
1249 */
1250 return(-1);
1251 }
1252
1253 handler = xmlFindCharEncodingHandler(encoding);
1254 if (handler == NULL)
1255 return(-1);
1256 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1257 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001258 } else {
1259 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001260 }
1261
1262 /*
1263 * Fallback to HTML or ASCII when the encoding is unspecified
1264 */
1265 if (handler == NULL)
1266 handler = xmlFindCharEncodingHandler("HTML");
1267 if (handler == NULL)
1268 handler = xmlFindCharEncodingHandler("ascii");
1269
1270 /*
1271 * save the content to a temp buffer.
1272 */
1273 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1274 if (buf == NULL) return(0);
1275
Daniel Veillard95d845f2001-06-13 13:48:46 +00001276 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001277
1278 ret = xmlOutputBufferClose(buf);
1279 return(ret);
1280}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001281
1282/**
1283 * htmlSaveFileEnc:
1284 * @filename: the filename
1285 * @cur: the document
1286 * @encoding: the document encoding
1287 *
1288 * Dump an HTML document to a file using a given encoding
1289 * and formatting returns/spaces are added.
1290 *
1291 * returns: the number of byte written or -1 in case of failure.
1292 */
1293int
1294htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1295 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1296}
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#endif /* LIBXML_HTML_ENABLED */