blob: 82687acacdea38fb730763871456452ba6d951dd [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000034#include <libxml/xmlerror.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000035
Daniel Veillard32bc74e2000-07-14 14:49:25 +000036/************************************************************************
37 * *
38 * Getting/Setting encoding meta tags *
39 * *
40 ************************************************************************/
41
42/**
43 * htmlGetMetaEncoding:
44 * @doc: the document
45 *
46 * Encoding definition lookup in the Meta tags
47 *
48 * Returns the current encoding as flagged in the HTML source
49 */
50const xmlChar *
51htmlGetMetaEncoding(htmlDocPtr doc) {
52 htmlNodePtr cur;
53 const xmlChar *content;
54 const xmlChar *encoding;
55
56 if (doc == NULL)
57 return(NULL);
58 cur = doc->children;
59
60 /*
61 * Search the html
62 */
63 while (cur != NULL) {
64 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000065 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000066 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000067 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000068 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000069 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000070 goto found_meta;
71 }
72 cur = cur->next;
73 }
74 if (cur == NULL)
75 return(NULL);
76 cur = cur->children;
77
78 /*
79 * Search the head
80 */
81 while (cur != NULL) {
82 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000083 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000084 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000085 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000086 goto found_meta;
87 }
88 cur = cur->next;
89 }
90 if (cur == NULL)
91 return(NULL);
92found_head:
93 cur = cur->children;
94
95 /*
96 * Search the meta elements
97 */
98found_meta:
99 while (cur != NULL) {
100 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000101 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000102 xmlAttrPtr attr = cur->properties;
103 int http;
104 const xmlChar *value;
105
106 content = NULL;
107 http = 0;
108 while (attr != NULL) {
109 if ((attr->children != NULL) &&
110 (attr->children->type == XML_TEXT_NODE) &&
111 (attr->children->next == NULL)) {
112#ifndef XML_USE_BUFFER_CONTENT
113 value = attr->children->content;
114#else
115 value = xmlBufferContent(attr->children->content);
116#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000117 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
118 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000119 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000120 else if ((value != NULL)
121 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000122 content = value;
123 if ((http != 0) && (content != NULL))
124 goto found_content;
125 }
126 attr = attr->next;
127 }
128 }
129 }
130 cur = cur->next;
131 }
132 return(NULL);
133
134found_content:
135 encoding = xmlStrstr(content, BAD_CAST"charset=");
136 if (encoding == NULL)
137 encoding = xmlStrstr(content, BAD_CAST"Charset=");
138 if (encoding == NULL)
139 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
140 if (encoding != NULL) {
141 encoding += 8;
142 } else {
143 encoding = xmlStrstr(content, BAD_CAST"charset =");
144 if (encoding == NULL)
145 encoding = xmlStrstr(content, BAD_CAST"Charset =");
146 if (encoding == NULL)
147 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
148 if (encoding != NULL)
149 encoding += 9;
150 }
151 if (encoding != NULL) {
152 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
153 }
154 return(encoding);
155}
156
157/**
158 * htmlSetMetaEncoding:
159 * @doc: the document
160 * @encoding: the encoding string
161 *
162 * Sets the current encoding in the Meta tags
163 * NOTE: this will not change the document content encoding, just
164 * the META flag associated.
165 *
166 * Returns 0 in case of success and -1 in case of error
167 */
168int
169htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
170 htmlNodePtr cur, meta;
171 const xmlChar *content;
172 char newcontent[100];
173
174
175 if (doc == NULL)
176 return(-1);
177
178 if (encoding != NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +0000179#ifdef HAVE_SNPRINTF
180 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
181 encoding);
182#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000183 sprintf(newcontent, "text/html; charset=%s", encoding);
Daniel Veillard39c7d712000-09-10 16:14:55 +0000184#endif
185 newcontent[sizeof(newcontent) - 1] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000186 }
187
188 cur = doc->children;
189
190 /*
191 * Search the html
192 */
193 while (cur != NULL) {
194 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000195 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000196 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000197 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000198 if (encoding == NULL)
199 return(0);
200 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
201 xmlAddPrevSibling(cur, meta);
202 cur = meta;
203 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
204 xmlAddChild(cur, meta);
205 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
206 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
207 return(0);
208 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000209 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000210 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000211 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000212 goto found_meta;
213 }
214 cur = cur->next;
215 }
216 if (cur == NULL)
217 return(-1);
218 cur = cur->children;
219
220 /*
221 * Search the head
222 */
223 while (cur != NULL) {
224 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000225 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000226 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000227 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000228 if (encoding == NULL)
229 return(0);
230 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
231 xmlAddPrevSibling(cur, meta);
232 cur = meta;
233 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
234 xmlAddChild(cur, meta);
235 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
236 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
237 return(0);
238 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000239 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000240 goto found_meta;
241 }
242 cur = cur->next;
243 }
244 if (cur == NULL)
245 return(-1);
246found_head:
247 if (cur->children == NULL) {
248 if (encoding == NULL)
249 return(0);
250 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
251 xmlAddChild(cur, meta);
252 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
253 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
254 return(0);
255 }
256 cur = cur->children;
257
258found_meta:
259 if (encoding != NULL) {
260 /*
261 * Create a new Meta element with the right aatributes
262 */
263
264 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
265 xmlAddPrevSibling(cur, meta);
266 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
267 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
268 }
269
270 /*
271 * Search and destroy all the remaining the meta elements carrying
272 * encoding informations
273 */
274 while (cur != NULL) {
275 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000276 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000277 xmlAttrPtr attr = cur->properties;
278 int http;
279 const xmlChar *value;
280
281 content = NULL;
282 http = 0;
283 while (attr != NULL) {
284 if ((attr->children != NULL) &&
285 (attr->children->type == XML_TEXT_NODE) &&
286 (attr->children->next == NULL)) {
287#ifndef XML_USE_BUFFER_CONTENT
288 value = attr->children->content;
289#else
290 value = xmlBufferContent(attr->children->content);
291#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000292 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
293 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000294 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000295 else if ((value != NULL)
296 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000297 content = value;
298 if ((http != 0) && (content != NULL))
299 break;
300 }
301 attr = attr->next;
302 }
303 if ((http != 0) && (content != NULL)) {
304 meta = cur;
305 cur = cur->next;
306 xmlUnlinkNode(meta);
307 xmlFreeNode(meta);
308 continue;
309 }
310
311 }
312 }
313 cur = cur->next;
314 }
315 return(0);
316}
317
318/************************************************************************
319 * *
320 * Dumping HTML tree content to a simple buffer *
321 * *
322 ************************************************************************/
323
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000324static void
325htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
326
Daniel Veillard167b5091999-07-07 04:19:20 +0000327/**
328 * htmlDtdDump:
329 * @buf: the HTML buffer output
330 * @doc: the document
331 *
332 * Dump the HTML document DTD, if any.
333 */
334static void
335htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
336 xmlDtdPtr cur = doc->intSubset;
337
338 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000339 xmlGenericError(xmlGenericErrorContext,
340 "htmlDtdDump : no internal subset\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000341 return;
342 }
343 xmlBufferWriteChar(buf, "<!DOCTYPE ");
344 xmlBufferWriteCHAR(buf, cur->name);
345 if (cur->ExternalID != NULL) {
346 xmlBufferWriteChar(buf, " PUBLIC ");
347 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000348 if (cur->SystemID != NULL) {
349 xmlBufferWriteChar(buf, " ");
350 xmlBufferWriteQuotedString(buf, cur->SystemID);
351 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000352 } else if (cur->SystemID != NULL) {
353 xmlBufferWriteChar(buf, " SYSTEM ");
354 xmlBufferWriteQuotedString(buf, cur->SystemID);
355 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000356 xmlBufferWriteChar(buf, ">\n");
357}
358
359/**
360 * htmlAttrDump:
361 * @buf: the HTML buffer output
362 * @doc: the document
363 * @cur: the attribute pointer
364 *
365 * Dump an HTML attribute
366 */
367static void
368htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000369 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +0000370
371 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000372 xmlGenericError(xmlGenericErrorContext,
373 "htmlAttrDump : property == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000374 return;
375 }
376 xmlBufferWriteChar(buf, " ");
377 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +0000378 if (cur->children != NULL) {
379 value = xmlNodeListGetString(doc, cur->children, 0);
380 if (value) {
381 xmlBufferWriteChar(buf, "=");
382 xmlBufferWriteQuotedString(buf, value);
383 xmlFree(value);
384 } else {
385 xmlBufferWriteChar(buf, "=\"\"");
386 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000387 }
388}
389
390/**
391 * htmlAttrListDump:
392 * @buf: the HTML buffer output
393 * @doc: the document
394 * @cur: the first attribute pointer
395 *
396 * Dump a list of HTML attributes
397 */
398static void
399htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
400 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000401 xmlGenericError(xmlGenericErrorContext,
402 "htmlAttrListDump : property == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000403 return;
404 }
405 while (cur != NULL) {
406 htmlAttrDump(buf, doc, cur);
407 cur = cur->next;
408 }
409}
410
411
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000412void
Daniel Veillard82150d81999-07-07 07:32:15 +0000413htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000414/**
415 * htmlNodeListDump:
416 * @buf: the HTML buffer output
417 * @doc: the document
418 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000419 *
420 * Dump an HTML node list, recursive behaviour,children are printed too.
421 */
422static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000423htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000424 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000425 xmlGenericError(xmlGenericErrorContext,
426 "htmlNodeListDump : node == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000427 return;
428 }
429 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000430 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000431 cur = cur->next;
432 }
433}
434
435/**
436 * htmlNodeDump:
437 * @buf: the HTML buffer output
438 * @doc: the document
439 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000440 *
441 * Dump an HTML node, recursive behaviour,children are printed too.
442 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000443void
Daniel Veillard82150d81999-07-07 07:32:15 +0000444htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000445 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000446
447 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000448 xmlGenericError(xmlGenericErrorContext,
449 "htmlNodeDump : node == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000450 return;
451 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000452 /*
453 * Special cases.
454 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000455 if (cur->type == XML_DTD_NODE)
456 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000457 if (cur->type == XML_HTML_DOCUMENT_NODE) {
458 htmlDocContentDump(buf, (xmlDocPtr) cur);
459 return;
460 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000461 if (cur->type == HTML_TEXT_NODE) {
462 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000463 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000464
Daniel Veillardd293fd11999-12-01 09:51:45 +0000465#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000466 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000467#else
468 buffer = xmlEncodeEntitiesReentrant(doc,
469 xmlBufferContent(cur->content));
470#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000471 if (buffer != NULL) {
472 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000473 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000474 }
475 }
476 return;
477 }
478 if (cur->type == HTML_COMMENT_NODE) {
479 if (cur->content != NULL) {
480 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000481#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000482 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000483#else
484 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
485#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000486 xmlBufferWriteChar(buf, "-->");
487 }
488 return;
489 }
490 if (cur->type == HTML_ENTITY_REF_NODE) {
491 xmlBufferWriteChar(buf, "&");
492 xmlBufferWriteCHAR(buf, cur->name);
493 xmlBufferWriteChar(buf, ";");
494 return;
495 }
496
Daniel Veillard82150d81999-07-07 07:32:15 +0000497 /*
498 * Get specific HTmL info for taht node.
499 */
500 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000501
Daniel Veillard82150d81999-07-07 07:32:15 +0000502 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000503 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000504 if (cur->properties != NULL)
505 htmlAttrListDump(buf, doc, cur->properties);
506
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000507 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000508 xmlBufferWriteChar(buf, ">");
509 if (cur->next != NULL) {
510 if ((cur->next->type != HTML_TEXT_NODE) &&
511 (cur->next->type != HTML_ENTITY_REF_NODE))
512 xmlBufferWriteChar(buf, "\n");
513 }
514 return;
515 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000516 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000517 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000518 xmlBufferWriteChar(buf, ">");
519 else {
520 xmlBufferWriteChar(buf, "></");
521 xmlBufferWriteCHAR(buf, cur->name);
522 xmlBufferWriteChar(buf, ">");
523 }
524 if (cur->next != NULL) {
525 if ((cur->next->type != HTML_TEXT_NODE) &&
526 (cur->next->type != HTML_ENTITY_REF_NODE))
527 xmlBufferWriteChar(buf, "\n");
528 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000529 return;
530 }
531 xmlBufferWriteChar(buf, ">");
532 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000533 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000534
Daniel Veillardd293fd11999-12-01 09:51:45 +0000535#ifndef XML_USE_BUFFER_CONTENT
536 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
537#else
538 buffer = xmlEncodeEntitiesReentrant(doc,
539 xmlBufferContent(cur->content));
540#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000541 if (buffer != NULL) {
542 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000543 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000544 }
545 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000546 if (cur->children != NULL) {
547 if ((cur->children->type != HTML_TEXT_NODE) &&
548 (cur->children->type != HTML_ENTITY_REF_NODE) &&
549 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000550 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000551 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000552 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000553 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000554 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000555 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000556 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000557 if (!htmlIsAutoClosed(doc, cur)) {
558 xmlBufferWriteChar(buf, "</");
559 xmlBufferWriteCHAR(buf, cur->name);
560 xmlBufferWriteChar(buf, ">");
561 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000562 if (cur->next != NULL) {
563 if ((cur->next->type != HTML_TEXT_NODE) &&
564 (cur->next->type != HTML_ENTITY_REF_NODE))
565 xmlBufferWriteChar(buf, "\n");
566 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000567}
568
569/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000570 * htmlNodeDumpFile:
571 * @out: the FILE pointer
572 * @doc: the document
573 * @cur: the current node
574 *
575 * Dump an HTML node, recursive behaviour,children are printed too.
576 */
577void
578htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
579 xmlBufferPtr buf;
580
581 buf = xmlBufferCreate();
582 if (buf == NULL) return;
583 htmlNodeDump(buf, doc, cur);
584 xmlBufferDump(out, buf);
585 xmlBufferFree(buf);
586}
587
588/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000589 * htmlDocContentDump:
590 * @buf: the HTML buffer output
591 * @cur: the document
592 *
593 * Dump an HTML document.
594 */
595static void
596htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000597 int type;
598
599 /*
600 * force to output the stuff as HTML, especially for entities
601 */
602 type = cur->type;
603 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000604 if (cur->intSubset != NULL)
605 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000606 else {
607 /* Default to HTML-4.0 transitionnal @@@@ */
608 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
609
610 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000611 if (cur->children != NULL) {
612 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000613 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000614 xmlBufferWriteChar(buf, "\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000615 cur->type = (xmlElementType) type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000616}
617
618/**
619 * htmlDocDumpMemory:
620 * @cur: the document
621 * @mem: OUT: the memory pointer
622 * @size: OUT: the memory lenght
623 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000624 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000625 * It's up to the caller to free the memory.
626 */
627void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000628htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000629 xmlBufferPtr buf;
630
631 if (cur == NULL) {
632#ifdef DEBUG_TREE
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000633 xmlGenericError(xmlGenericErrorContext,
634 "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000635#endif
636 *mem = NULL;
637 *size = 0;
638 return;
639 }
640 buf = xmlBufferCreate();
641 if (buf == NULL) {
642 *mem = NULL;
643 *size = 0;
644 return;
645 }
646 htmlDocContentDump(buf, cur);
647 *mem = buf->content;
648 *size = buf->use;
649 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000650 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000651}
652
653
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000654/************************************************************************
655 * *
656 * Dumping HTML tree content to an I/O output buffer *
657 * *
658 ************************************************************************/
659
660static void
661htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
662
663/**
664 * htmlDtdDump:
665 * @buf: the HTML buffer output
666 * @doc: the document
667 *
668 * Dump the HTML document DTD, if any.
669 */
670static void
671htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
672 xmlDtdPtr cur = doc->intSubset;
673
674 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000675 xmlGenericError(xmlGenericErrorContext,
676 "htmlDtdDump : no internal subset\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000677 return;
678 }
679 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
680 xmlOutputBufferWriteString(buf, (const char *)cur->name);
681 if (cur->ExternalID != NULL) {
682 xmlOutputBufferWriteString(buf, " PUBLIC ");
683 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
684 if (cur->SystemID != NULL) {
685 xmlOutputBufferWriteString(buf, " ");
686 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
687 }
688 } else if (cur->SystemID != NULL) {
689 xmlOutputBufferWriteString(buf, " SYSTEM ");
690 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
691 }
692 xmlOutputBufferWriteString(buf, ">\n");
693}
694
695/**
696 * htmlAttrDump:
697 * @buf: the HTML buffer output
698 * @doc: the document
699 * @cur: the attribute pointer
700 *
701 * Dump an HTML attribute
702 */
703static void
704htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
705 xmlChar *value;
706
707 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000708 xmlGenericError(xmlGenericErrorContext,
709 "htmlAttrDump : property == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000710 return;
711 }
712 xmlOutputBufferWriteString(buf, " ");
713 xmlOutputBufferWriteString(buf, (const char *)cur->name);
714 if (cur->children != NULL) {
715 value = xmlNodeListGetString(doc, cur->children, 0);
716 if (value) {
717 xmlOutputBufferWriteString(buf, "=");
718 xmlBufferWriteQuotedString(buf->buffer, value);
719 xmlFree(value);
720 } else {
721 xmlOutputBufferWriteString(buf, "=\"\"");
722 }
723 }
724}
725
726/**
727 * htmlAttrListDump:
728 * @buf: the HTML buffer output
729 * @doc: the document
730 * @cur: the first attribute pointer
731 *
732 * Dump a list of HTML attributes
733 */
734static void
735htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
736 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000737 xmlGenericError(xmlGenericErrorContext,
738 "htmlAttrListDump : property == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000739 return;
740 }
741 while (cur != NULL) {
742 htmlAttrDumpOutput(buf, doc, cur, encoding);
743 cur = cur->next;
744 }
745}
746
747
748void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
749 xmlNodePtr cur, const char *encoding);
750
751/**
752 * htmlNodeListDump:
753 * @buf: the HTML buffer output
754 * @doc: the document
755 * @cur: the first node
756 *
757 * Dump an HTML node list, recursive behaviour,children are printed too.
758 */
759static void
760htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
761 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000762 xmlGenericError(xmlGenericErrorContext,
763 "htmlNodeListDump : node == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000764 return;
765 }
766 while (cur != NULL) {
767 htmlNodeDumpOutput(buf, doc, cur, encoding);
768 cur = cur->next;
769 }
770}
771
772/**
773 * htmlNodeDump:
774 * @buf: the HTML buffer output
775 * @doc: the document
776 * @cur: the current node
777 *
778 * Dump an HTML node, recursive behaviour,children are printed too.
779 */
780void
781htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
782 htmlElemDescPtr info;
783
784 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000785 xmlGenericError(xmlGenericErrorContext,
786 "htmlNodeDump : node == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000787 return;
788 }
789 /*
790 * Special cases.
791 */
792 if (cur->type == XML_DTD_NODE)
793 return;
794 if (cur->type == XML_HTML_DOCUMENT_NODE) {
795 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
796 return;
797 }
798 if (cur->type == HTML_TEXT_NODE) {
799 if (cur->content != NULL) {
800 xmlChar *buffer;
801
802#ifndef XML_USE_BUFFER_CONTENT
803 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
804#else
805 buffer = xmlEncodeEntitiesReentrant(doc,
806 xmlBufferContent(cur->content));
807#endif
808 if (buffer != NULL) {
809 xmlOutputBufferWriteString(buf, (const char *)buffer);
810 xmlFree(buffer);
811 }
812 }
813 return;
814 }
815 if (cur->type == HTML_COMMENT_NODE) {
816 if (cur->content != NULL) {
817 xmlOutputBufferWriteString(buf, "<!--");
818#ifndef XML_USE_BUFFER_CONTENT
819 xmlOutputBufferWriteString(buf, (const char *)cur->content);
820#else
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000821 xmlOutputBufferWriteString(buf, (const char *)
822 xmlBufferContent(cur->content));
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000823#endif
824 xmlOutputBufferWriteString(buf, "-->");
825 }
826 return;
827 }
828 if (cur->type == HTML_ENTITY_REF_NODE) {
829 xmlOutputBufferWriteString(buf, "&");
830 xmlOutputBufferWriteString(buf, (const char *)cur->name);
831 xmlOutputBufferWriteString(buf, ";");
832 return;
833 }
Daniel Veillard7eda8452000-10-14 23:38:43 +0000834 if (cur->type == HTML_PRESERVE_NODE) {
835 if (cur->content != NULL) {
836#ifndef XML_USE_BUFFER_CONTENT
837 xmlOutputBufferWriteString(buf, (const char *)cur->content);
838#else
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000839 xmlOutputBufferWriteString(buf, (const char *)
840 xmlBufferContent(cur->content));
Daniel Veillard7eda8452000-10-14 23:38:43 +0000841#endif
842 }
843 return;
844 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000845
846 /*
847 * Get specific HTmL info for taht node.
848 */
849 info = htmlTagLookup(cur->name);
850
851 xmlOutputBufferWriteString(buf, "<");
852 xmlOutputBufferWriteString(buf, (const char *)cur->name);
853 if (cur->properties != NULL)
854 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
855
856 if ((info != NULL) && (info->empty)) {
857 xmlOutputBufferWriteString(buf, ">");
858 if (cur->next != NULL) {
859 if ((cur->next->type != HTML_TEXT_NODE) &&
860 (cur->next->type != HTML_ENTITY_REF_NODE))
861 xmlOutputBufferWriteString(buf, "\n");
862 }
863 return;
864 }
865 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard683cb022000-10-22 12:04:13 +0000866 if ((info != NULL) && (info->endTag != 0) &&
867 (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000868 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard683cb022000-10-22 12:04:13 +0000869 } else {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000870 xmlOutputBufferWriteString(buf, "></");
871 xmlOutputBufferWriteString(buf, (const char *)cur->name);
872 xmlOutputBufferWriteString(buf, ">");
873 }
874 if (cur->next != NULL) {
875 if ((cur->next->type != HTML_TEXT_NODE) &&
876 (cur->next->type != HTML_ENTITY_REF_NODE))
877 xmlOutputBufferWriteString(buf, "\n");
878 }
879 return;
880 }
881 xmlOutputBufferWriteString(buf, ">");
882 if (cur->content != NULL) {
883#if 0
884 xmlChar *buffer;
885
886#ifndef XML_USE_BUFFER_CONTENT
887 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
888#else
889 buffer = xmlEncodeEntitiesReentrant(doc,
890 xmlBufferContent(cur->content));
891#endif
892 if (buffer != NULL) {
893 xmlOutputBufferWriteString(buf, buffer);
894 xmlFree(buffer);
895 }
896#else
897 /*
898 * Uses the OutputBuffer property to automatically convert
899 * invalids to charrefs
900 */
901
902#ifndef XML_USE_BUFFER_CONTENT
903 xmlOutputBufferWriteString(buf, (const char *) cur->content);
904#else
905 xmlOutputBufferWriteString(buf,
906 (const char *) xmlBufferContent(cur->content));
907#endif
908#endif
909 }
910 if (cur->children != NULL) {
911 if ((cur->children->type != HTML_TEXT_NODE) &&
912 (cur->children->type != HTML_ENTITY_REF_NODE) &&
913 (cur->children != cur->last))
914 xmlOutputBufferWriteString(buf, "\n");
915 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
916 if ((cur->last->type != HTML_TEXT_NODE) &&
917 (cur->last->type != HTML_ENTITY_REF_NODE) &&
918 (cur->children != cur->last))
919 xmlOutputBufferWriteString(buf, "\n");
920 }
921 if (!htmlIsAutoClosed(doc, cur)) {
922 xmlOutputBufferWriteString(buf, "</");
923 xmlOutputBufferWriteString(buf, (const char *)cur->name);
924 xmlOutputBufferWriteString(buf, ">");
925 }
926 if (cur->next != NULL) {
927 if ((cur->next->type != HTML_TEXT_NODE) &&
928 (cur->next->type != HTML_ENTITY_REF_NODE))
929 xmlOutputBufferWriteString(buf, "\n");
930 }
931}
932
933/**
934 * htmlDocContentDump:
935 * @buf: the HTML buffer output
936 * @cur: the document
937 *
938 * Dump an HTML document.
939 */
940static void
941htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
942 int type;
943
944 /*
945 * force to output the stuff as HTML, especially for entities
946 */
947 type = cur->type;
948 cur->type = XML_HTML_DOCUMENT_NODE;
949 if (cur->intSubset != NULL)
950 htmlDtdDumpOutput(buf, cur, NULL);
951 else {
952 /* Default to HTML-4.0 transitionnal @@@@ */
953 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
954
955 }
956 if (cur->children != NULL) {
957 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
958 }
959 xmlOutputBufferWriteString(buf, "\n");
960 cur->type = (xmlElementType) type;
961}
962
963
964/************************************************************************
965 * *
966 * Saving functions front-ends *
967 * *
968 ************************************************************************/
969
Daniel Veillard167b5091999-07-07 04:19:20 +0000970/**
971 * htmlDocDump:
972 * @f: the FILE*
973 * @cur: the document
974 *
975 * Dump an HTML document to an open FILE.
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000976 *
977 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard167b5091999-07-07 04:19:20 +0000978 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000979int
Daniel Veillard167b5091999-07-07 04:19:20 +0000980htmlDocDump(FILE *f, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000981 xmlOutputBufferPtr buf;
982 xmlCharEncodingHandlerPtr handler = NULL;
983 const char *encoding;
984 int ret;
Daniel Veillard167b5091999-07-07 04:19:20 +0000985
986 if (cur == NULL) {
987#ifdef DEBUG_TREE
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000988 xmlGenericError(xmlGenericErrorContext,
989 "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000990#endif
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000991 return(-1);
Daniel Veillard167b5091999-07-07 04:19:20 +0000992 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000993
994 encoding = (const char *) htmlGetMetaEncoding(cur);
995
996 if (encoding != NULL) {
997 xmlCharEncoding enc;
998
999 enc = xmlParseCharEncoding(encoding);
1000 if (enc != cur->charset) {
1001 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1002 /*
1003 * Not supported yet
1004 */
1005 return(-1);
1006 }
1007
1008 handler = xmlFindCharEncodingHandler(encoding);
1009 if (handler == NULL)
1010 return(-1);
1011 }
1012 }
1013
1014 /*
1015 * Fallback to HTML or ASCII when the encoding is unspecified
1016 */
1017 if (handler == NULL)
1018 handler = xmlFindCharEncodingHandler("HTML");
1019 if (handler == NULL)
1020 handler = xmlFindCharEncodingHandler("ascii");
1021
1022 buf = xmlOutputBufferCreateFile(f, handler);
1023 if (buf == NULL) return(-1);
1024 htmlDocContentDumpOutput(buf, cur, NULL);
1025
1026 ret = xmlOutputBufferClose(buf);
1027 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001028}
1029
1030/**
1031 * htmlSaveFile:
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001032 * @filename: the filename (or URL)
Daniel Veillard167b5091999-07-07 04:19:20 +00001033 * @cur: the document
1034 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001035 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1036 * used.
Daniel Veillard167b5091999-07-07 04:19:20 +00001037 * returns: the number of byte written or -1 in case of failure.
1038 */
1039int
1040htmlSaveFile(const char *filename, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001041 xmlOutputBufferPtr buf;
1042 xmlCharEncodingHandlerPtr handler = NULL;
1043 const char *encoding;
Daniel Veillard167b5091999-07-07 04:19:20 +00001044 int ret;
1045
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001046 encoding = (const char *) htmlGetMetaEncoding(cur);
1047
1048 if (encoding != NULL) {
1049 xmlCharEncoding enc;
1050
1051 enc = xmlParseCharEncoding(encoding);
1052 if (enc != cur->charset) {
1053 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1054 /*
1055 * Not supported yet
1056 */
1057 return(-1);
1058 }
1059
1060 handler = xmlFindCharEncodingHandler(encoding);
1061 if (handler == NULL)
1062 return(-1);
1063 }
1064 }
1065
1066 /*
1067 * Fallback to HTML or ASCII when the encoding is unspecified
1068 */
1069 if (handler == NULL)
1070 handler = xmlFindCharEncodingHandler("HTML");
1071 if (handler == NULL)
1072 handler = xmlFindCharEncodingHandler("ascii");
1073
Daniel Veillard167b5091999-07-07 04:19:20 +00001074 /*
1075 * save the content to a temp buffer.
1076 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001077 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
Daniel Veillard167b5091999-07-07 04:19:20 +00001078 if (buf == NULL) return(0);
Daniel Veillard167b5091999-07-07 04:19:20 +00001079
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001080 htmlDocContentDumpOutput(buf, cur, NULL);
Daniel Veillard167b5091999-07-07 04:19:20 +00001081
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001082 ret = xmlOutputBufferClose(buf);
1083 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001084}
1085
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001086/**
1087 * htmlSaveFileEnc:
1088 * @filename: the filename
1089 * @cur: the document
1090 *
1091 * Dump an HTML document to a file using a given encoding.
1092 *
1093 * returns: the number of byte written or -1 in case of failure.
1094 */
1095int
1096htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1097 xmlOutputBufferPtr buf;
1098 xmlCharEncodingHandlerPtr handler = NULL;
1099 int ret;
1100
1101 if (encoding != NULL) {
1102 xmlCharEncoding enc;
1103
1104 enc = xmlParseCharEncoding(encoding);
1105 if (enc != cur->charset) {
1106 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1107 /*
1108 * Not supported yet
1109 */
1110 return(-1);
1111 }
1112
1113 handler = xmlFindCharEncodingHandler(encoding);
1114 if (handler == NULL)
1115 return(-1);
1116 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1117 }
1118 }
1119
1120 /*
1121 * Fallback to HTML or ASCII when the encoding is unspecified
1122 */
1123 if (handler == NULL)
1124 handler = xmlFindCharEncodingHandler("HTML");
1125 if (handler == NULL)
1126 handler = xmlFindCharEncodingHandler("ascii");
1127
1128 /*
1129 * save the content to a temp buffer.
1130 */
1131 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1132 if (buf == NULL) return(0);
1133
1134 htmlDocContentDumpOutput(buf, cur, encoding);
1135
1136 ret = xmlOutputBufferClose(buf);
1137 return(ret);
1138}
Daniel Veillard361d8452000-04-03 19:48:13 +00001139#endif /* LIBXML_HTML_ENABLED */