blob: 7868d644b69709f9ea34a9311cc80ad20ce085ec [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000027#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29/************************************************************************
30 * *
31 * Getting/Setting encoding meta tags *
32 * *
33 ************************************************************************/
34
35/**
36 * htmlGetMetaEncoding:
37 * @doc: the document
38 *
39 * Encoding definition lookup in the Meta tags
40 *
41 * Returns the current encoding as flagged in the HTML source
42 */
43const xmlChar *
44htmlGetMetaEncoding(htmlDocPtr doc) {
45 htmlNodePtr cur;
46 const xmlChar *content;
47 const xmlChar *encoding;
48
49 if (doc == NULL)
50 return(NULL);
51 cur = doc->children;
52
53 /*
54 * Search the html
55 */
56 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000057 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000058 if (xmlStrEqual(cur->name, BAD_CAST"html"))
59 break;
60 if (xmlStrEqual(cur->name, BAD_CAST"head"))
61 goto found_head;
62 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
63 goto found_meta;
64 }
65 cur = cur->next;
66 }
67 if (cur == NULL)
68 return(NULL);
69 cur = cur->children;
70
71 /*
72 * Search the head
73 */
74 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000075 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000076 if (xmlStrEqual(cur->name, BAD_CAST"head"))
77 break;
78 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 goto found_meta;
80 }
81 cur = cur->next;
82 }
83 if (cur == NULL)
84 return(NULL);
85found_head:
86 cur = cur->children;
87
88 /*
89 * Search the meta elements
90 */
91found_meta:
92 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000093 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000094 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
95 xmlAttrPtr attr = cur->properties;
96 int http;
97 const xmlChar *value;
98
99 content = NULL;
100 http = 0;
101 while (attr != NULL) {
102 if ((attr->children != NULL) &&
103 (attr->children->type == XML_TEXT_NODE) &&
104 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000105 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000106 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
107 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
108 http = 1;
109 else if ((value != NULL)
110 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
111 content = value;
112 if ((http != 0) && (content != NULL))
113 goto found_content;
114 }
115 attr = attr->next;
116 }
117 }
118 }
119 cur = cur->next;
120 }
121 return(NULL);
122
123found_content:
124 encoding = xmlStrstr(content, BAD_CAST"charset=");
125 if (encoding == NULL)
126 encoding = xmlStrstr(content, BAD_CAST"Charset=");
127 if (encoding == NULL)
128 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
129 if (encoding != NULL) {
130 encoding += 8;
131 } else {
132 encoding = xmlStrstr(content, BAD_CAST"charset =");
133 if (encoding == NULL)
134 encoding = xmlStrstr(content, BAD_CAST"Charset =");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
137 if (encoding != NULL)
138 encoding += 9;
139 }
140 if (encoding != NULL) {
141 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
142 }
143 return(encoding);
144}
145
146/**
147 * htmlSetMetaEncoding:
148 * @doc: the document
149 * @encoding: the encoding string
150 *
151 * Sets the current encoding in the Meta tags
152 * NOTE: this will not change the document content encoding, just
153 * the META flag associated.
154 *
155 * Returns 0 in case of success and -1 in case of error
156 */
157int
158htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
159 htmlNodePtr cur, meta;
160 const xmlChar *content;
161 char newcontent[100];
162
163
164 if (doc == NULL)
165 return(-1);
166
167 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000168 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
169 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000170 newcontent[sizeof(newcontent) - 1] = 0;
171 }
172
173 cur = doc->children;
174
175 /*
176 * Search the html
177 */
178 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000179 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000180 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
181 break;
182 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
183 goto found_head;
184 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
185 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000186 }
187 cur = cur->next;
188 }
189 if (cur == NULL)
190 return(-1);
191 cur = cur->children;
192
193 /*
194 * Search the head
195 */
196 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000197 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000198 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
199 break;
200 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
201 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000202 }
203 cur = cur->next;
204 }
205 if (cur == NULL)
206 return(-1);
207found_head:
208 if (cur->children == NULL) {
209 if (encoding == NULL)
210 return(0);
211 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
212 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000213 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000214 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000215 return(0);
216 }
217 cur = cur->children;
218
219found_meta:
220 if (encoding != NULL) {
221 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000222 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000223 */
224
225 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
226 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000227 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000228 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000229 }
230
231 /*
232 * Search and destroy all the remaining the meta elements carrying
233 * encoding informations
234 */
235 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000236 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000237 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000238 xmlAttrPtr attr = cur->properties;
239 int http;
240 const xmlChar *value;
241
242 content = NULL;
243 http = 0;
244 while (attr != NULL) {
245 if ((attr->children != NULL) &&
246 (attr->children->type == XML_TEXT_NODE) &&
247 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000248 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000249 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
250 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
251 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000252 else
253 {
254 if ((value != NULL) &&
255 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
256 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000257 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000258 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000259 break;
260 }
261 attr = attr->next;
262 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000263 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000264 meta = cur;
265 cur = cur->next;
266 xmlUnlinkNode(meta);
267 xmlFreeNode(meta);
268 continue;
269 }
270
271 }
272 }
273 cur = cur->next;
274 }
275 return(0);
276}
277
278/************************************************************************
279 * *
280 * Dumping HTML tree content to a simple buffer *
281 * *
282 ************************************************************************/
283
284static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000285htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000286static void
287htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
288 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000289
290/**
291 * htmlDtdDump:
292 * @buf: the HTML buffer output
293 * @doc: the document
294 *
295 * Dump the HTML document DTD, if any.
296 */
297static void
298htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
299 xmlDtdPtr cur = doc->intSubset;
300
301 if (cur == NULL) {
302 xmlGenericError(xmlGenericErrorContext,
303 "htmlDtdDump : no internal subset\n");
304 return;
305 }
306 xmlBufferWriteChar(buf, "<!DOCTYPE ");
307 xmlBufferWriteCHAR(buf, cur->name);
308 if (cur->ExternalID != NULL) {
309 xmlBufferWriteChar(buf, " PUBLIC ");
310 xmlBufferWriteQuotedString(buf, cur->ExternalID);
311 if (cur->SystemID != NULL) {
312 xmlBufferWriteChar(buf, " ");
313 xmlBufferWriteQuotedString(buf, cur->SystemID);
314 }
315 } else if (cur->SystemID != NULL) {
316 xmlBufferWriteChar(buf, " SYSTEM ");
317 xmlBufferWriteQuotedString(buf, cur->SystemID);
318 }
319 xmlBufferWriteChar(buf, ">\n");
320}
321
322/**
323 * htmlAttrDump:
324 * @buf: the HTML buffer output
325 * @doc: the document
326 * @cur: the attribute pointer
327 *
328 * Dump an HTML attribute
329 */
330static void
331htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
332 xmlChar *value;
333
Daniel Veillardeca60d02001-06-13 07:45:41 +0000334 /*
335 * TODO: The html output method should not escape a & character
336 * occurring in an attribute value immediately followed by
337 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
338 */
339
Owen Taylor3473f882001-02-23 17:55:21 +0000340 if (cur == NULL) {
341 xmlGenericError(xmlGenericErrorContext,
342 "htmlAttrDump : property == NULL\n");
343 return;
344 }
345 xmlBufferWriteChar(buf, " ");
346 xmlBufferWriteCHAR(buf, cur->name);
347 if (cur->children != NULL) {
348 value = xmlNodeListGetString(doc, cur->children, 0);
349 if (value) {
350 xmlBufferWriteChar(buf, "=");
351 xmlBufferWriteQuotedString(buf, value);
352 xmlFree(value);
353 } else {
354 xmlBufferWriteChar(buf, "=\"\"");
355 }
356 }
357}
358
359/**
360 * htmlAttrListDump:
361 * @buf: the HTML buffer output
362 * @doc: the document
363 * @cur: the first attribute pointer
364 *
365 * Dump a list of HTML attributes
366 */
367static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000368htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
369 int i = 0;
370
Owen Taylor3473f882001-02-23 17:55:21 +0000371 if (cur == NULL) {
372 xmlGenericError(xmlGenericErrorContext,
373 "htmlAttrListDump : property == NULL\n");
374 return;
375 }
376 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000377 i++;
378 if ((format) && (i >= 5)) {
379 i = 0;
380 xmlBufferWriteChar(buf, "\n");
381 }
Owen Taylor3473f882001-02-23 17:55:21 +0000382 htmlAttrDump(buf, doc, cur);
383 cur = cur->next;
384 }
385}
386
Owen Taylor3473f882001-02-23 17:55:21 +0000387/**
388 * htmlNodeListDump:
389 * @buf: the HTML buffer output
390 * @doc: the document
391 * @cur: the first node
392 *
393 * Dump an HTML node list, recursive behaviour,children are printed too.
394 */
395static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000396htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000397 if (cur == NULL) {
398 xmlGenericError(xmlGenericErrorContext,
399 "htmlNodeListDump : node == NULL\n");
400 return;
401 }
402 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000403 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000404 cur = cur->next;
405 }
406}
407
408/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000409 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000410 * @buf: the HTML buffer output
411 * @doc: the document
412 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000413 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000414 *
415 * Dump an HTML node, recursive behaviour,children are printed too.
416 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000417static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000418htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
419 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000420 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000421
422 if (cur == NULL) {
423 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000424 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000425 return;
426 }
427 /*
428 * Special cases.
429 */
430 if (cur->type == XML_DTD_NODE)
431 return;
432 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000433 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000434 return;
435 }
436 if (cur->type == HTML_TEXT_NODE) {
437 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000438 if (((cur->name == (const xmlChar *)xmlStringText) ||
439 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000440 ((cur->parent == NULL) ||
441 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000442 xmlChar *buffer;
443
Owen Taylor3473f882001-02-23 17:55:21 +0000444 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000445 if (buffer != NULL) {
446 xmlBufferWriteCHAR(buf, buffer);
447 xmlFree(buffer);
448 }
449 } else {
450 xmlBufferWriteCHAR(buf, cur->content);
451 }
452 }
453 return;
454 }
455 if (cur->type == HTML_COMMENT_NODE) {
456 if (cur->content != NULL) {
457 xmlBufferWriteChar(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000458 xmlBufferWriteCHAR(buf, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000459 xmlBufferWriteChar(buf, "-->");
460 }
461 return;
462 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000463 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000464 if (cur->name == NULL)
465 return;
466 xmlBufferWriteChar(buf, "<?");
467 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000468 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000469 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000470 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000471 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000472 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000473 return;
474 }
Owen Taylor3473f882001-02-23 17:55:21 +0000475 if (cur->type == HTML_ENTITY_REF_NODE) {
476 xmlBufferWriteChar(buf, "&");
477 xmlBufferWriteCHAR(buf, cur->name);
478 xmlBufferWriteChar(buf, ";");
479 return;
480 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000481 if (cur->type == HTML_PRESERVE_NODE) {
482 if (cur->content != NULL) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000483 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard083c2662001-05-08 08:27:14 +0000484 }
485 return;
486 }
Owen Taylor3473f882001-02-23 17:55:21 +0000487
488 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000489 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000490 */
491 info = htmlTagLookup(cur->name);
492
493 xmlBufferWriteChar(buf, "<");
494 xmlBufferWriteCHAR(buf, cur->name);
495 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000496 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000497
498 if ((info != NULL) && (info->empty)) {
499 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000500 if ((format) && (info != NULL) && (!info->isinline) &&
501 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000502 if ((cur->next->type != HTML_TEXT_NODE) &&
503 (cur->next->type != HTML_ENTITY_REF_NODE))
504 xmlBufferWriteChar(buf, "\n");
505 }
506 return;
507 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000508 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
509 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000510 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000511 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
512 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000513 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000514 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000515 xmlBufferWriteChar(buf, "></");
516 xmlBufferWriteCHAR(buf, cur->name);
517 xmlBufferWriteChar(buf, ">");
518 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000519 if ((format) && (info != NULL) && (!info->isinline) &&
520 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000521 if ((cur->next->type != HTML_TEXT_NODE) &&
522 (cur->next->type != HTML_ENTITY_REF_NODE))
523 xmlBufferWriteChar(buf, "\n");
524 }
525 return;
526 }
527 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000528 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000529 xmlChar *buffer;
530
Daniel Veillard083c2662001-05-08 08:27:14 +0000531 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000532 if (buffer != NULL) {
533 xmlBufferWriteCHAR(buf, buffer);
534 xmlFree(buffer);
535 }
536 }
537 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000538 if ((format) && (info != NULL) && (!info->isinline) &&
539 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000540 (cur->children->type != HTML_ENTITY_REF_NODE) &&
541 (cur->children != cur->last))
542 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000543 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000544 if ((format) && (info != NULL) && (!info->isinline) &&
545 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000546 (cur->last->type != HTML_ENTITY_REF_NODE) &&
547 (cur->children != cur->last))
548 xmlBufferWriteChar(buf, "\n");
549 }
Owen Taylor3473f882001-02-23 17:55:21 +0000550 xmlBufferWriteChar(buf, "</");
551 xmlBufferWriteCHAR(buf, cur->name);
552 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000553 if ((format) && (info != NULL) && (!info->isinline) &&
554 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000555 if ((cur->next->type != HTML_TEXT_NODE) &&
556 (cur->next->type != HTML_ENTITY_REF_NODE))
557 xmlBufferWriteChar(buf, "\n");
558 }
559}
560
561/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000562 * htmlNodeDump:
563 * @buf: the HTML buffer output
564 * @doc: the document
565 * @cur: the current node
566 *
567 * Dump an HTML node, recursive behaviour,children are printed too,
568 * and formatting returns are added.
569 */
570void
571htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
572 htmlNodeDumpFormat(buf, doc, cur, 1);
573}
574
575/**
576 * htmlNodeDumpFileFormat:
577 * @out: the FILE pointer
578 * @doc: the document
579 * @cur: the current node
580 * @encoding: the document encoding
581 * @format: should formatting spaces been added
582 *
583 * Dump an HTML node, recursive behaviour,children are printed too.
584 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000585 * TODO: if encoding == NULL try to save in the doc encoding
586 *
587 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000588 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000589int
590htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
591 xmlNodePtr cur, const char *encoding, int format) {
592 xmlOutputBufferPtr buf;
593 xmlCharEncodingHandlerPtr handler = NULL;
594 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000595
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000596 if (encoding != NULL) {
597 xmlCharEncoding enc;
598
599 enc = xmlParseCharEncoding(encoding);
600 if (enc != XML_CHAR_ENCODING_UTF8) {
601 handler = xmlFindCharEncodingHandler(encoding);
602 if (handler == NULL)
603 return(-1);
604 }
605 }
606
607 /*
608 * Fallback to HTML or ASCII when the encoding is unspecified
609 */
610 if (handler == NULL)
611 handler = xmlFindCharEncodingHandler("HTML");
612 if (handler == NULL)
613 handler = xmlFindCharEncodingHandler("ascii");
614
615 /*
616 * save the content to a temp buffer.
617 */
618 buf = xmlOutputBufferCreateFile(out, handler);
619 if (buf == NULL) return(0);
620
621 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
622
623 ret = xmlOutputBufferClose(buf);
624 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000625}
626
627/**
Owen Taylor3473f882001-02-23 17:55:21 +0000628 * htmlNodeDumpFile:
629 * @out: the FILE pointer
630 * @doc: the document
631 * @cur: the current node
632 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000633 * Dump an HTML node, recursive behaviour,children are printed too,
634 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000635 */
636void
637htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000638 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000639}
640
641/**
642 * htmlDocContentDump:
643 * @buf: the HTML buffer output
644 * @cur: the document
645 *
646 * Dump an HTML document.
647 */
648static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000649htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000650 int type;
651
652 /*
653 * force to output the stuff as HTML, especially for entities
654 */
655 type = cur->type;
656 cur->type = XML_HTML_DOCUMENT_NODE;
657 if (cur->intSubset != NULL)
658 htmlDtdDump(buf, cur);
659 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000660 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000661 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
662
663 }
664 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000665 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000666 }
667 xmlBufferWriteChar(buf, "\n");
668 cur->type = (xmlElementType) type;
669}
670
671/**
672 * htmlDocDumpMemory:
673 * @cur: the document
674 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000675 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000676 *
677 * Dump an HTML document in memory and return the xmlChar * and it's size.
678 * It's up to the caller to free the memory.
679 */
680void
681htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000682 xmlOutputBufferPtr buf;
683 xmlCharEncodingHandlerPtr handler = NULL;
684 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000685
686 if (cur == NULL) {
687#ifdef DEBUG_TREE
688 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000689 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000690#endif
691 *mem = NULL;
692 *size = 0;
693 return;
694 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000695
696 encoding = (const char *) htmlGetMetaEncoding(cur);
697
698 if (encoding != NULL) {
699 xmlCharEncoding enc;
700
701 enc = xmlParseCharEncoding(encoding);
702 if (enc != cur->charset) {
703 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
704 /*
705 * Not supported yet
706 */
707 *mem = NULL;
708 *size = 0;
709 return;
710 }
711
712 handler = xmlFindCharEncodingHandler(encoding);
713 if (handler == NULL) {
714 *mem = NULL;
715 *size = 0;
716 return;
717 }
718 }
719 }
720
721 /*
722 * Fallback to HTML or ASCII when the encoding is unspecified
723 */
724 if (handler == NULL)
725 handler = xmlFindCharEncodingHandler("HTML");
726 if (handler == NULL)
727 handler = xmlFindCharEncodingHandler("ascii");
728
729 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000730 if (buf == NULL) {
731 *mem = NULL;
732 *size = 0;
733 return;
734 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000735
736 htmlDocContentDumpOutput(buf, cur, NULL);
737 xmlOutputBufferFlush(buf);
738 if (buf->conv != NULL) {
739 *size = buf->conv->use;
740 *mem = xmlStrndup(buf->conv->content, *size);
741 } else {
742 *size = buf->buffer->use;
743 *mem = xmlStrndup(buf->buffer->content, *size);
744 }
745 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000746}
747
748
749/************************************************************************
750 * *
751 * Dumping HTML tree content to an I/O output buffer *
752 * *
753 ************************************************************************/
754
755/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000756 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000757 * @buf: the HTML buffer output
758 * @doc: the document
759 * @encoding: the encoding string
760 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000761 * TODO: check whether encoding is needed
762 *
Owen Taylor3473f882001-02-23 17:55:21 +0000763 * Dump the HTML document DTD, if any.
764 */
765static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000766htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000767 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000768 xmlDtdPtr cur = doc->intSubset;
769
770 if (cur == NULL) {
771 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000772 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000773 return;
774 }
775 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
776 xmlOutputBufferWriteString(buf, (const char *)cur->name);
777 if (cur->ExternalID != NULL) {
778 xmlOutputBufferWriteString(buf, " PUBLIC ");
779 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
780 if (cur->SystemID != NULL) {
781 xmlOutputBufferWriteString(buf, " ");
782 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
783 }
784 } else if (cur->SystemID != NULL) {
785 xmlOutputBufferWriteString(buf, " SYSTEM ");
786 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
787 }
788 xmlOutputBufferWriteString(buf, ">\n");
789}
790
791/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000792 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000793 * @buf: the HTML buffer output
794 * @doc: the document
795 * @cur: the attribute pointer
796 * @encoding: the encoding string
797 *
798 * Dump an HTML attribute
799 */
800static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000801htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000802 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000803 xmlChar *value;
804
Daniel Veillardeca60d02001-06-13 07:45:41 +0000805 /*
806 * TODO: The html output method should not escape a & character
807 * occurring in an attribute value immediately followed by
808 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
809 */
810
Owen Taylor3473f882001-02-23 17:55:21 +0000811 if (cur == NULL) {
812 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000813 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000814 return;
815 }
816 xmlOutputBufferWriteString(buf, " ");
817 xmlOutputBufferWriteString(buf, (const char *)cur->name);
818 if (cur->children != NULL) {
819 value = xmlNodeListGetString(doc, cur->children, 0);
820 if (value) {
821 xmlOutputBufferWriteString(buf, "=");
822 xmlBufferWriteQuotedString(buf->buffer, value);
823 xmlFree(value);
824 } else {
825 xmlOutputBufferWriteString(buf, "=\"\"");
826 }
827 }
828}
829
830/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000831 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000832 * @buf: the HTML buffer output
833 * @doc: the document
834 * @cur: the first attribute pointer
835 * @encoding: the encoding string
836 *
837 * Dump a list of HTML attributes
838 */
839static void
840htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
841 if (cur == NULL) {
842 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000843 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000844 return;
845 }
846 while (cur != NULL) {
847 htmlAttrDumpOutput(buf, doc, cur, encoding);
848 cur = cur->next;
849 }
850}
851
852
853void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
854 xmlNodePtr cur, const char *encoding);
855
856/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000857 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000858 * @buf: the HTML buffer output
859 * @doc: the document
860 * @cur: the first node
861 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000862 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000863 *
864 * Dump an HTML node list, recursive behaviour,children are printed too.
865 */
866static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000867htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
868 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000869 if (cur == NULL) {
870 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000871 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000872 return;
873 }
874 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000875 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000876 cur = cur->next;
877 }
878}
879
880/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000881 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000882 * @buf: the HTML buffer output
883 * @doc: the document
884 * @cur: the current node
885 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000886 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000887 *
888 * Dump an HTML node, recursive behaviour,children are printed too.
889 */
890void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000891htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
892 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000893 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000894
895 if (cur == NULL) {
896 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000897 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000898 return;
899 }
900 /*
901 * Special cases.
902 */
903 if (cur->type == XML_DTD_NODE)
904 return;
905 if (cur->type == XML_HTML_DOCUMENT_NODE) {
906 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
907 return;
908 }
909 if (cur->type == HTML_TEXT_NODE) {
910 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000911 if (((cur->name == (const xmlChar *)xmlStringText) ||
912 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000913 ((cur->parent == NULL) ||
914 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000915 xmlChar *buffer;
916
Owen Taylor3473f882001-02-23 17:55:21 +0000917 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000918 if (buffer != NULL) {
919 xmlOutputBufferWriteString(buf, (const char *)buffer);
920 xmlFree(buffer);
921 }
922 } else {
923 xmlOutputBufferWriteString(buf, (const char *)cur->content);
924 }
925 }
926 return;
927 }
928 if (cur->type == HTML_COMMENT_NODE) {
929 if (cur->content != NULL) {
930 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000931 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000932 xmlOutputBufferWriteString(buf, "-->");
933 }
934 return;
935 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000936 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000937 if (cur->name == NULL)
938 return;
939 xmlOutputBufferWriteString(buf, "<?");
940 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000941 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000942 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000943 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000944 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000945 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000946 return;
947 }
Owen Taylor3473f882001-02-23 17:55:21 +0000948 if (cur->type == HTML_ENTITY_REF_NODE) {
949 xmlOutputBufferWriteString(buf, "&");
950 xmlOutputBufferWriteString(buf, (const char *)cur->name);
951 xmlOutputBufferWriteString(buf, ";");
952 return;
953 }
954 if (cur->type == HTML_PRESERVE_NODE) {
955 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000956 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000957 }
958 return;
959 }
960
961 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000962 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000963 */
964 info = htmlTagLookup(cur->name);
965
966 xmlOutputBufferWriteString(buf, "<");
967 xmlOutputBufferWriteString(buf, (const char *)cur->name);
968 if (cur->properties != NULL)
969 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
970
971 if ((info != NULL) && (info->empty)) {
972 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000973 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000974 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000975 (cur->next->type != HTML_ENTITY_REF_NODE) &&
976 (cur->parent != NULL) &&
977 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000978 xmlOutputBufferWriteString(buf, "\n");
979 }
980 return;
981 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000982 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
983 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000984 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000985 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
986 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000987 xmlOutputBufferWriteString(buf, ">");
988 } else {
989 xmlOutputBufferWriteString(buf, "></");
990 xmlOutputBufferWriteString(buf, (const char *)cur->name);
991 xmlOutputBufferWriteString(buf, ">");
992 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000993 if ((format) && (cur->next != NULL) &&
994 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000995 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000996 (cur->next->type != HTML_ENTITY_REF_NODE) &&
997 (cur->parent != NULL) &&
998 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000999 xmlOutputBufferWriteString(buf, "\n");
1000 }
1001 return;
1002 }
1003 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001004 if ((cur->type != XML_ELEMENT_NODE) &&
1005 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001006 /*
1007 * Uses the OutputBuffer property to automatically convert
1008 * invalids to charrefs
1009 */
1010
Owen Taylor3473f882001-02-23 17:55:21 +00001011 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001012 }
1013 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001014 if ((format) && (info != NULL) && (!info->isinline) &&
1015 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001016 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001017 (cur->children != cur->last) &&
1018 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001019 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001020 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001021 if ((format) && (info != NULL) && (!info->isinline) &&
1022 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001023 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001024 (cur->children != cur->last) &&
1025 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001026 xmlOutputBufferWriteString(buf, "\n");
1027 }
Owen Taylor3473f882001-02-23 17:55:21 +00001028 xmlOutputBufferWriteString(buf, "</");
1029 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1030 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001031 if ((format) && (info != NULL) && (!info->isinline) &&
1032 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001033 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001034 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1035 (cur->parent != NULL) &&
1036 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001037 xmlOutputBufferWriteString(buf, "\n");
1038 }
1039}
1040
1041/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001042 * htmlNodeDumpOutput:
1043 * @buf: the HTML buffer output
1044 * @doc: the document
1045 * @cur: the current node
1046 * @encoding: the encoding string
1047 *
1048 * Dump an HTML node, recursive behaviour,children are printed too,
1049 * and formatting returns/spaces are added.
1050 */
1051void
1052htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1053 xmlNodePtr cur, const char *encoding) {
1054 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1055}
1056
1057/**
1058 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001059 * @buf: the HTML buffer output
1060 * @cur: the document
1061 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001062 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001063 *
1064 * Dump an HTML document.
1065 */
1066void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001067htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1068 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001069 int type;
1070
1071 /*
1072 * force to output the stuff as HTML, especially for entities
1073 */
1074 type = cur->type;
1075 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001076 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001077 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001078 }
1079 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001080 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001081 }
1082 xmlOutputBufferWriteString(buf, "\n");
1083 cur->type = (xmlElementType) type;
1084}
1085
Daniel Veillard95d845f2001-06-13 13:48:46 +00001086/**
1087 * htmlDocContentDumpOutput:
1088 * @buf: the HTML buffer output
1089 * @cur: the document
1090 * @encoding: the encoding string
1091 *
1092 * Dump an HTML document. Formating return/spaces are added.
1093 */
1094void
1095htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1096 const char *encoding) {
1097 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1098}
1099
Owen Taylor3473f882001-02-23 17:55:21 +00001100/************************************************************************
1101 * *
1102 * Saving functions front-ends *
1103 * *
1104 ************************************************************************/
1105
1106/**
1107 * htmlDocDump:
1108 * @f: the FILE*
1109 * @cur: the document
1110 *
1111 * Dump an HTML document to an open FILE.
1112 *
1113 * returns: the number of byte written or -1 in case of failure.
1114 */
1115int
1116htmlDocDump(FILE *f, xmlDocPtr cur) {
1117 xmlOutputBufferPtr buf;
1118 xmlCharEncodingHandlerPtr handler = NULL;
1119 const char *encoding;
1120 int ret;
1121
1122 if (cur == NULL) {
1123#ifdef DEBUG_TREE
1124 xmlGenericError(xmlGenericErrorContext,
1125 "htmlDocDump : document == NULL\n");
1126#endif
1127 return(-1);
1128 }
1129
1130 encoding = (const char *) htmlGetMetaEncoding(cur);
1131
1132 if (encoding != NULL) {
1133 xmlCharEncoding enc;
1134
1135 enc = xmlParseCharEncoding(encoding);
1136 if (enc != cur->charset) {
1137 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1138 /*
1139 * Not supported yet
1140 */
1141 return(-1);
1142 }
1143
1144 handler = xmlFindCharEncodingHandler(encoding);
1145 if (handler == NULL)
1146 return(-1);
1147 }
1148 }
1149
1150 /*
1151 * Fallback to HTML or ASCII when the encoding is unspecified
1152 */
1153 if (handler == NULL)
1154 handler = xmlFindCharEncodingHandler("HTML");
1155 if (handler == NULL)
1156 handler = xmlFindCharEncodingHandler("ascii");
1157
1158 buf = xmlOutputBufferCreateFile(f, handler);
1159 if (buf == NULL) return(-1);
1160 htmlDocContentDumpOutput(buf, cur, NULL);
1161
1162 ret = xmlOutputBufferClose(buf);
1163 return(ret);
1164}
1165
1166/**
1167 * htmlSaveFile:
1168 * @filename: the filename (or URL)
1169 * @cur: the document
1170 *
1171 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1172 * used.
1173 * returns: the number of byte written or -1 in case of failure.
1174 */
1175int
1176htmlSaveFile(const char *filename, xmlDocPtr cur) {
1177 xmlOutputBufferPtr buf;
1178 xmlCharEncodingHandlerPtr handler = NULL;
1179 const char *encoding;
1180 int ret;
1181
1182 encoding = (const char *) htmlGetMetaEncoding(cur);
1183
1184 if (encoding != NULL) {
1185 xmlCharEncoding enc;
1186
1187 enc = xmlParseCharEncoding(encoding);
1188 if (enc != cur->charset) {
1189 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1190 /*
1191 * Not supported yet
1192 */
1193 return(-1);
1194 }
1195
1196 handler = xmlFindCharEncodingHandler(encoding);
1197 if (handler == NULL)
1198 return(-1);
1199 }
1200 }
1201
1202 /*
1203 * Fallback to HTML or ASCII when the encoding is unspecified
1204 */
1205 if (handler == NULL)
1206 handler = xmlFindCharEncodingHandler("HTML");
1207 if (handler == NULL)
1208 handler = xmlFindCharEncodingHandler("ascii");
1209
1210 /*
1211 * save the content to a temp buffer.
1212 */
1213 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1214 if (buf == NULL) return(0);
1215
1216 htmlDocContentDumpOutput(buf, cur, NULL);
1217
1218 ret = xmlOutputBufferClose(buf);
1219 return(ret);
1220}
1221
1222/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001223 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * @filename: the filename
1225 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001226 * @format: should formatting spaces been added
1227 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001228 *
1229 * Dump an HTML document to a file using a given encoding.
1230 *
1231 * returns: the number of byte written or -1 in case of failure.
1232 */
1233int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001234htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1235 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001236 xmlOutputBufferPtr buf;
1237 xmlCharEncodingHandlerPtr handler = NULL;
1238 int ret;
1239
1240 if (encoding != NULL) {
1241 xmlCharEncoding enc;
1242
1243 enc = xmlParseCharEncoding(encoding);
1244 if (enc != cur->charset) {
1245 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1246 /*
1247 * Not supported yet
1248 */
1249 return(-1);
1250 }
1251
1252 handler = xmlFindCharEncodingHandler(encoding);
1253 if (handler == NULL)
1254 return(-1);
1255 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1256 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001257 } else {
1258 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001259 }
1260
1261 /*
1262 * Fallback to HTML or ASCII when the encoding is unspecified
1263 */
1264 if (handler == NULL)
1265 handler = xmlFindCharEncodingHandler("HTML");
1266 if (handler == NULL)
1267 handler = xmlFindCharEncodingHandler("ascii");
1268
1269 /*
1270 * save the content to a temp buffer.
1271 */
1272 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1273 if (buf == NULL) return(0);
1274
Daniel Veillard95d845f2001-06-13 13:48:46 +00001275 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001276
1277 ret = xmlOutputBufferClose(buf);
1278 return(ret);
1279}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001280
1281/**
1282 * htmlSaveFileEnc:
1283 * @filename: the filename
1284 * @cur: the document
1285 * @encoding: the document encoding
1286 *
1287 * Dump an HTML document to a file using a given encoding
1288 * and formatting returns/spaces are added.
1289 *
1290 * returns: the number of byte written or -1 in case of failure.
1291 */
1292int
1293htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1294 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1295}
1296
Owen Taylor3473f882001-02-23 17:55:21 +00001297#endif /* LIBXML_HTML_ENABLED */