blob: 17fef14ef3977f2455c73b92436ab98a2e4bec24 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000034#include <libxml/xmlerror.h>
Daniel Veillard0f2a53c2001-02-05 17:57:33 +000035#include <libxml/parserInternals.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000036
Daniel Veillard32bc74e2000-07-14 14:49:25 +000037/************************************************************************
38 * *
39 * Getting/Setting encoding meta tags *
40 * *
41 ************************************************************************/
42
43/**
44 * htmlGetMetaEncoding:
45 * @doc: the document
46 *
47 * Encoding definition lookup in the Meta tags
48 *
49 * Returns the current encoding as flagged in the HTML source
50 */
51const xmlChar *
52htmlGetMetaEncoding(htmlDocPtr doc) {
53 htmlNodePtr cur;
54 const xmlChar *content;
55 const xmlChar *encoding;
56
57 if (doc == NULL)
58 return(NULL);
59 cur = doc->children;
60
61 /*
62 * Search the html
63 */
64 while (cur != NULL) {
65 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000066 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000067 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000068 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000069 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000070 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000071 goto found_meta;
72 }
73 cur = cur->next;
74 }
75 if (cur == NULL)
76 return(NULL);
77 cur = cur->children;
78
79 /*
80 * Search the head
81 */
82 while (cur != NULL) {
83 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000084 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000085 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000086 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000087 goto found_meta;
88 }
89 cur = cur->next;
90 }
91 if (cur == NULL)
92 return(NULL);
93found_head:
94 cur = cur->children;
95
96 /*
97 * Search the meta elements
98 */
99found_meta:
100 while (cur != NULL) {
101 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000102 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000103 xmlAttrPtr attr = cur->properties;
104 int http;
105 const xmlChar *value;
106
107 content = NULL;
108 http = 0;
109 while (attr != NULL) {
110 if ((attr->children != NULL) &&
111 (attr->children->type == XML_TEXT_NODE) &&
112 (attr->children->next == NULL)) {
113#ifndef XML_USE_BUFFER_CONTENT
114 value = attr->children->content;
115#else
116 value = xmlBufferContent(attr->children->content);
117#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000118 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
119 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000120 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000121 else if ((value != NULL)
122 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000123 content = value;
124 if ((http != 0) && (content != NULL))
125 goto found_content;
126 }
127 attr = attr->next;
128 }
129 }
130 }
131 cur = cur->next;
132 }
133 return(NULL);
134
135found_content:
136 encoding = xmlStrstr(content, BAD_CAST"charset=");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset=");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
141 if (encoding != NULL) {
142 encoding += 8;
143 } else {
144 encoding = xmlStrstr(content, BAD_CAST"charset =");
145 if (encoding == NULL)
146 encoding = xmlStrstr(content, BAD_CAST"Charset =");
147 if (encoding == NULL)
148 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
149 if (encoding != NULL)
150 encoding += 9;
151 }
152 if (encoding != NULL) {
153 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
154 }
155 return(encoding);
156}
157
158/**
159 * htmlSetMetaEncoding:
160 * @doc: the document
161 * @encoding: the encoding string
162 *
163 * Sets the current encoding in the Meta tags
164 * NOTE: this will not change the document content encoding, just
165 * the META flag associated.
166 *
167 * Returns 0 in case of success and -1 in case of error
168 */
169int
170htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
171 htmlNodePtr cur, meta;
172 const xmlChar *content;
173 char newcontent[100];
174
175
176 if (doc == NULL)
177 return(-1);
178
179 if (encoding != NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +0000180#ifdef HAVE_SNPRINTF
181 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
182 encoding);
183#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000184 sprintf(newcontent, "text/html; charset=%s", encoding);
Daniel Veillard39c7d712000-09-10 16:14:55 +0000185#endif
186 newcontent[sizeof(newcontent) - 1] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000187 }
188
189 cur = doc->children;
190
191 /*
192 * Search the html
193 */
194 while (cur != NULL) {
195 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000196 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000197 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000198 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000199 if (encoding == NULL)
200 return(0);
201 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
202 xmlAddPrevSibling(cur, meta);
203 cur = meta;
204 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
205 xmlAddChild(cur, meta);
206 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
207 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
208 return(0);
209 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000210 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000211 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000212 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000213 goto found_meta;
214 }
215 cur = cur->next;
216 }
217 if (cur == NULL)
218 return(-1);
219 cur = cur->children;
220
221 /*
222 * Search the head
223 */
224 while (cur != NULL) {
225 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000226 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000227 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000228 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000229 if (encoding == NULL)
230 return(0);
231 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
232 xmlAddPrevSibling(cur, meta);
233 cur = meta;
234 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
235 xmlAddChild(cur, meta);
236 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
237 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
238 return(0);
239 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000240 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000241 goto found_meta;
242 }
243 cur = cur->next;
244 }
245 if (cur == NULL)
246 return(-1);
247found_head:
248 if (cur->children == NULL) {
249 if (encoding == NULL)
250 return(0);
251 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
252 xmlAddChild(cur, meta);
253 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
254 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
255 return(0);
256 }
257 cur = cur->children;
258
259found_meta:
260 if (encoding != NULL) {
261 /*
262 * Create a new Meta element with the right aatributes
263 */
264
265 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
266 xmlAddPrevSibling(cur, meta);
267 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
268 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
269 }
270
271 /*
272 * Search and destroy all the remaining the meta elements carrying
273 * encoding informations
274 */
275 while (cur != NULL) {
276 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000277 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000278 xmlAttrPtr attr = cur->properties;
279 int http;
280 const xmlChar *value;
281
282 content = NULL;
283 http = 0;
284 while (attr != NULL) {
285 if ((attr->children != NULL) &&
286 (attr->children->type == XML_TEXT_NODE) &&
287 (attr->children->next == NULL)) {
288#ifndef XML_USE_BUFFER_CONTENT
289 value = attr->children->content;
290#else
291 value = xmlBufferContent(attr->children->content);
292#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000293 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
294 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000295 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000296 else if ((value != NULL)
297 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000298 content = value;
299 if ((http != 0) && (content != NULL))
300 break;
301 }
302 attr = attr->next;
303 }
304 if ((http != 0) && (content != NULL)) {
305 meta = cur;
306 cur = cur->next;
307 xmlUnlinkNode(meta);
308 xmlFreeNode(meta);
309 continue;
310 }
311
312 }
313 }
314 cur = cur->next;
315 }
316 return(0);
317}
318
319/************************************************************************
320 * *
321 * Dumping HTML tree content to a simple buffer *
322 * *
323 ************************************************************************/
324
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000325static void
326htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
327
Daniel Veillard167b5091999-07-07 04:19:20 +0000328/**
329 * htmlDtdDump:
330 * @buf: the HTML buffer output
331 * @doc: the document
332 *
333 * Dump the HTML document DTD, if any.
334 */
335static void
336htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
337 xmlDtdPtr cur = doc->intSubset;
338
339 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000340 xmlGenericError(xmlGenericErrorContext,
341 "htmlDtdDump : no internal subset\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000342 return;
343 }
344 xmlBufferWriteChar(buf, "<!DOCTYPE ");
345 xmlBufferWriteCHAR(buf, cur->name);
346 if (cur->ExternalID != NULL) {
347 xmlBufferWriteChar(buf, " PUBLIC ");
348 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000349 if (cur->SystemID != NULL) {
350 xmlBufferWriteChar(buf, " ");
351 xmlBufferWriteQuotedString(buf, cur->SystemID);
352 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000353 } else if (cur->SystemID != NULL) {
354 xmlBufferWriteChar(buf, " SYSTEM ");
355 xmlBufferWriteQuotedString(buf, cur->SystemID);
356 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000357 xmlBufferWriteChar(buf, ">\n");
358}
359
360/**
361 * htmlAttrDump:
362 * @buf: the HTML buffer output
363 * @doc: the document
364 * @cur: the attribute pointer
365 *
366 * Dump an HTML attribute
367 */
368static void
369htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000370 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +0000371
372 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000373 xmlGenericError(xmlGenericErrorContext,
374 "htmlAttrDump : property == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000375 return;
376 }
377 xmlBufferWriteChar(buf, " ");
378 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +0000379 if (cur->children != NULL) {
380 value = xmlNodeListGetString(doc, cur->children, 0);
381 if (value) {
382 xmlBufferWriteChar(buf, "=");
383 xmlBufferWriteQuotedString(buf, value);
384 xmlFree(value);
385 } else {
386 xmlBufferWriteChar(buf, "=\"\"");
387 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000388 }
389}
390
391/**
392 * htmlAttrListDump:
393 * @buf: the HTML buffer output
394 * @doc: the document
395 * @cur: the first attribute pointer
396 *
397 * Dump a list of HTML attributes
398 */
399static void
400htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
401 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000402 xmlGenericError(xmlGenericErrorContext,
403 "htmlAttrListDump : property == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000404 return;
405 }
406 while (cur != NULL) {
407 htmlAttrDump(buf, doc, cur);
408 cur = cur->next;
409 }
410}
411
412
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000413void
Daniel Veillard82150d81999-07-07 07:32:15 +0000414htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000415/**
416 * htmlNodeListDump:
417 * @buf: the HTML buffer output
418 * @doc: the document
419 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000420 *
421 * Dump an HTML node list, recursive behaviour,children are printed too.
422 */
423static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000424htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000425 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000426 xmlGenericError(xmlGenericErrorContext,
427 "htmlNodeListDump : node == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000428 return;
429 }
430 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000431 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000432 cur = cur->next;
433 }
434}
435
436/**
437 * htmlNodeDump:
438 * @buf: the HTML buffer output
439 * @doc: the document
440 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000441 *
442 * Dump an HTML node, recursive behaviour,children are printed too.
443 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000444void
Daniel Veillard82150d81999-07-07 07:32:15 +0000445htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000446 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000447
448 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000449 xmlGenericError(xmlGenericErrorContext,
450 "htmlNodeDump : node == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000451 return;
452 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000453 /*
454 * Special cases.
455 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000456 if (cur->type == XML_DTD_NODE)
457 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000458 if (cur->type == XML_HTML_DOCUMENT_NODE) {
459 htmlDocContentDump(buf, (xmlDocPtr) cur);
460 return;
461 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000462 if (cur->type == HTML_TEXT_NODE) {
463 if (cur->content != NULL) {
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000464 if ((cur->name == xmlStringText) ||
465 (cur->name != xmlStringTextNoenc)) {
466 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000467
Daniel Veillardd293fd11999-12-01 09:51:45 +0000468#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000469 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000470#else
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000471 buffer = xmlEncodeEntitiesReentrant(doc,
472 xmlBufferContent(cur->content));
Daniel Veillardd293fd11999-12-01 09:51:45 +0000473#endif
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000474 if (buffer != NULL) {
475 xmlBufferWriteCHAR(buf, buffer);
476 xmlFree(buffer);
477 }
478 } else {
479 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard167b5091999-07-07 04:19:20 +0000480 }
481 }
482 return;
483 }
484 if (cur->type == HTML_COMMENT_NODE) {
485 if (cur->content != NULL) {
486 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000487#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000488 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000489#else
490 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
491#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000492 xmlBufferWriteChar(buf, "-->");
493 }
494 return;
495 }
496 if (cur->type == HTML_ENTITY_REF_NODE) {
497 xmlBufferWriteChar(buf, "&");
498 xmlBufferWriteCHAR(buf, cur->name);
499 xmlBufferWriteChar(buf, ";");
500 return;
501 }
502
Daniel Veillard82150d81999-07-07 07:32:15 +0000503 /*
504 * Get specific HTmL info for taht node.
505 */
506 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000507
Daniel Veillard82150d81999-07-07 07:32:15 +0000508 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000509 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000510 if (cur->properties != NULL)
511 htmlAttrListDump(buf, doc, cur->properties);
512
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000513 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000514 xmlBufferWriteChar(buf, ">");
515 if (cur->next != NULL) {
516 if ((cur->next->type != HTML_TEXT_NODE) &&
517 (cur->next->type != HTML_ENTITY_REF_NODE))
518 xmlBufferWriteChar(buf, "\n");
519 }
520 return;
521 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000522 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000523 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000524 xmlBufferWriteChar(buf, ">");
525 else {
526 xmlBufferWriteChar(buf, "></");
527 xmlBufferWriteCHAR(buf, cur->name);
528 xmlBufferWriteChar(buf, ">");
529 }
530 if (cur->next != NULL) {
531 if ((cur->next->type != HTML_TEXT_NODE) &&
532 (cur->next->type != HTML_ENTITY_REF_NODE))
533 xmlBufferWriteChar(buf, "\n");
534 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000535 return;
536 }
537 xmlBufferWriteChar(buf, ">");
538 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000539 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000540
Daniel Veillardd293fd11999-12-01 09:51:45 +0000541#ifndef XML_USE_BUFFER_CONTENT
542 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
543#else
544 buffer = xmlEncodeEntitiesReentrant(doc,
545 xmlBufferContent(cur->content));
546#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000547 if (buffer != NULL) {
548 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000549 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000550 }
551 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000552 if (cur->children != NULL) {
553 if ((cur->children->type != HTML_TEXT_NODE) &&
554 (cur->children->type != HTML_ENTITY_REF_NODE) &&
555 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000556 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000557 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000558 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000559 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000560 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000561 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000562 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000563 if (!htmlIsAutoClosed(doc, cur)) {
564 xmlBufferWriteChar(buf, "</");
565 xmlBufferWriteCHAR(buf, cur->name);
566 xmlBufferWriteChar(buf, ">");
567 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000568 if (cur->next != NULL) {
569 if ((cur->next->type != HTML_TEXT_NODE) &&
570 (cur->next->type != HTML_ENTITY_REF_NODE))
571 xmlBufferWriteChar(buf, "\n");
572 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000573}
574
575/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000576 * htmlNodeDumpFile:
577 * @out: the FILE pointer
578 * @doc: the document
579 * @cur: the current node
580 *
581 * Dump an HTML node, recursive behaviour,children are printed too.
582 */
583void
584htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
585 xmlBufferPtr buf;
586
587 buf = xmlBufferCreate();
588 if (buf == NULL) return;
589 htmlNodeDump(buf, doc, cur);
590 xmlBufferDump(out, buf);
591 xmlBufferFree(buf);
592}
593
594/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000595 * htmlDocContentDump:
596 * @buf: the HTML buffer output
597 * @cur: the document
598 *
599 * Dump an HTML document.
600 */
601static void
602htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000603 int type;
604
605 /*
606 * force to output the stuff as HTML, especially for entities
607 */
608 type = cur->type;
609 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000610 if (cur->intSubset != NULL)
611 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000612 else {
613 /* Default to HTML-4.0 transitionnal @@@@ */
614 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
615
616 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000617 if (cur->children != NULL) {
618 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000619 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000620 xmlBufferWriteChar(buf, "\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000621 cur->type = (xmlElementType) type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000622}
623
624/**
625 * htmlDocDumpMemory:
626 * @cur: the document
627 * @mem: OUT: the memory pointer
628 * @size: OUT: the memory lenght
629 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000630 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000631 * It's up to the caller to free the memory.
632 */
633void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000634htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000635 xmlBufferPtr buf;
636
637 if (cur == NULL) {
638#ifdef DEBUG_TREE
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000639 xmlGenericError(xmlGenericErrorContext,
640 "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000641#endif
642 *mem = NULL;
643 *size = 0;
644 return;
645 }
646 buf = xmlBufferCreate();
647 if (buf == NULL) {
648 *mem = NULL;
649 *size = 0;
650 return;
651 }
652 htmlDocContentDump(buf, cur);
653 *mem = buf->content;
654 *size = buf->use;
655 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000656 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000657}
658
659
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000660/************************************************************************
661 * *
662 * Dumping HTML tree content to an I/O output buffer *
663 * *
664 ************************************************************************/
665
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000666/**
667 * htmlDtdDump:
668 * @buf: the HTML buffer output
669 * @doc: the document
670 *
671 * Dump the HTML document DTD, if any.
672 */
673static void
674htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
675 xmlDtdPtr cur = doc->intSubset;
676
677 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000678 xmlGenericError(xmlGenericErrorContext,
679 "htmlDtdDump : no internal subset\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000680 return;
681 }
682 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
683 xmlOutputBufferWriteString(buf, (const char *)cur->name);
684 if (cur->ExternalID != NULL) {
685 xmlOutputBufferWriteString(buf, " PUBLIC ");
686 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
687 if (cur->SystemID != NULL) {
688 xmlOutputBufferWriteString(buf, " ");
689 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
690 }
691 } else if (cur->SystemID != NULL) {
692 xmlOutputBufferWriteString(buf, " SYSTEM ");
693 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
694 }
695 xmlOutputBufferWriteString(buf, ">\n");
696}
697
698/**
699 * htmlAttrDump:
700 * @buf: the HTML buffer output
701 * @doc: the document
702 * @cur: the attribute pointer
703 *
704 * Dump an HTML attribute
705 */
706static void
707htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
708 xmlChar *value;
709
710 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000711 xmlGenericError(xmlGenericErrorContext,
712 "htmlAttrDump : property == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000713 return;
714 }
715 xmlOutputBufferWriteString(buf, " ");
716 xmlOutputBufferWriteString(buf, (const char *)cur->name);
717 if (cur->children != NULL) {
718 value = xmlNodeListGetString(doc, cur->children, 0);
719 if (value) {
720 xmlOutputBufferWriteString(buf, "=");
721 xmlBufferWriteQuotedString(buf->buffer, value);
722 xmlFree(value);
723 } else {
724 xmlOutputBufferWriteString(buf, "=\"\"");
725 }
726 }
727}
728
729/**
730 * htmlAttrListDump:
731 * @buf: the HTML buffer output
732 * @doc: the document
733 * @cur: the first attribute pointer
734 *
735 * Dump a list of HTML attributes
736 */
737static void
738htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
739 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000740 xmlGenericError(xmlGenericErrorContext,
741 "htmlAttrListDump : property == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000742 return;
743 }
744 while (cur != NULL) {
745 htmlAttrDumpOutput(buf, doc, cur, encoding);
746 cur = cur->next;
747 }
748}
749
750
751void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
752 xmlNodePtr cur, const char *encoding);
753
754/**
755 * htmlNodeListDump:
756 * @buf: the HTML buffer output
757 * @doc: the document
758 * @cur: the first node
759 *
760 * Dump an HTML node list, recursive behaviour,children are printed too.
761 */
762static void
763htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
764 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000765 xmlGenericError(xmlGenericErrorContext,
766 "htmlNodeListDump : node == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000767 return;
768 }
769 while (cur != NULL) {
770 htmlNodeDumpOutput(buf, doc, cur, encoding);
771 cur = cur->next;
772 }
773}
774
775/**
776 * htmlNodeDump:
777 * @buf: the HTML buffer output
778 * @doc: the document
779 * @cur: the current node
780 *
781 * Dump an HTML node, recursive behaviour,children are printed too.
782 */
783void
784htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
785 htmlElemDescPtr info;
786
787 if (cur == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000788 xmlGenericError(xmlGenericErrorContext,
789 "htmlNodeDump : node == NULL\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000790 return;
791 }
792 /*
793 * Special cases.
794 */
795 if (cur->type == XML_DTD_NODE)
796 return;
797 if (cur->type == XML_HTML_DOCUMENT_NODE) {
798 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
799 return;
800 }
801 if (cur->type == HTML_TEXT_NODE) {
802 if (cur->content != NULL) {
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000803 if ((cur->name == xmlStringText) ||
804 (cur->name != xmlStringTextNoenc)) {
805 xmlChar *buffer;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000806
807#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000808 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000809#else
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000810 buffer = xmlEncodeEntitiesReentrant(doc,
811 xmlBufferContent(cur->content));
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000812#endif
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000813 if (buffer != NULL) {
814 xmlOutputBufferWriteString(buf, (const char *)buffer);
815 xmlFree(buffer);
816 }
817 } else {
818 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000819 }
820 }
821 return;
822 }
823 if (cur->type == HTML_COMMENT_NODE) {
824 if (cur->content != NULL) {
825 xmlOutputBufferWriteString(buf, "<!--");
826#ifndef XML_USE_BUFFER_CONTENT
827 xmlOutputBufferWriteString(buf, (const char *)cur->content);
828#else
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000829 xmlOutputBufferWriteString(buf, (const char *)
830 xmlBufferContent(cur->content));
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000831#endif
832 xmlOutputBufferWriteString(buf, "-->");
833 }
834 return;
835 }
836 if (cur->type == HTML_ENTITY_REF_NODE) {
837 xmlOutputBufferWriteString(buf, "&");
838 xmlOutputBufferWriteString(buf, (const char *)cur->name);
839 xmlOutputBufferWriteString(buf, ";");
840 return;
841 }
Daniel Veillard7eda8452000-10-14 23:38:43 +0000842 if (cur->type == HTML_PRESERVE_NODE) {
843 if (cur->content != NULL) {
844#ifndef XML_USE_BUFFER_CONTENT
845 xmlOutputBufferWriteString(buf, (const char *)cur->content);
846#else
Daniel Veillard9e8bfae2000-11-06 16:43:11 +0000847 xmlOutputBufferWriteString(buf, (const char *)
848 xmlBufferContent(cur->content));
Daniel Veillard7eda8452000-10-14 23:38:43 +0000849#endif
850 }
851 return;
852 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000853
854 /*
855 * Get specific HTmL info for taht node.
856 */
857 info = htmlTagLookup(cur->name);
858
859 xmlOutputBufferWriteString(buf, "<");
860 xmlOutputBufferWriteString(buf, (const char *)cur->name);
861 if (cur->properties != NULL)
862 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
863
864 if ((info != NULL) && (info->empty)) {
865 xmlOutputBufferWriteString(buf, ">");
866 if (cur->next != NULL) {
867 if ((cur->next->type != HTML_TEXT_NODE) &&
868 (cur->next->type != HTML_ENTITY_REF_NODE))
869 xmlOutputBufferWriteString(buf, "\n");
870 }
871 return;
872 }
873 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard683cb022000-10-22 12:04:13 +0000874 if ((info != NULL) && (info->endTag != 0) &&
875 (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000876 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard683cb022000-10-22 12:04:13 +0000877 } else {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000878 xmlOutputBufferWriteString(buf, "></");
879 xmlOutputBufferWriteString(buf, (const char *)cur->name);
880 xmlOutputBufferWriteString(buf, ">");
881 }
882 if (cur->next != NULL) {
883 if ((cur->next->type != HTML_TEXT_NODE) &&
884 (cur->next->type != HTML_ENTITY_REF_NODE))
885 xmlOutputBufferWriteString(buf, "\n");
886 }
887 return;
888 }
889 xmlOutputBufferWriteString(buf, ">");
890 if (cur->content != NULL) {
891#if 0
892 xmlChar *buffer;
893
894#ifndef XML_USE_BUFFER_CONTENT
895 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
896#else
897 buffer = xmlEncodeEntitiesReentrant(doc,
898 xmlBufferContent(cur->content));
899#endif
900 if (buffer != NULL) {
901 xmlOutputBufferWriteString(buf, buffer);
902 xmlFree(buffer);
903 }
904#else
905 /*
906 * Uses the OutputBuffer property to automatically convert
907 * invalids to charrefs
908 */
909
910#ifndef XML_USE_BUFFER_CONTENT
911 xmlOutputBufferWriteString(buf, (const char *) cur->content);
912#else
913 xmlOutputBufferWriteString(buf,
914 (const char *) xmlBufferContent(cur->content));
915#endif
916#endif
917 }
918 if (cur->children != NULL) {
919 if ((cur->children->type != HTML_TEXT_NODE) &&
920 (cur->children->type != HTML_ENTITY_REF_NODE) &&
921 (cur->children != cur->last))
922 xmlOutputBufferWriteString(buf, "\n");
923 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
924 if ((cur->last->type != HTML_TEXT_NODE) &&
925 (cur->last->type != HTML_ENTITY_REF_NODE) &&
926 (cur->children != cur->last))
927 xmlOutputBufferWriteString(buf, "\n");
928 }
929 if (!htmlIsAutoClosed(doc, cur)) {
930 xmlOutputBufferWriteString(buf, "</");
931 xmlOutputBufferWriteString(buf, (const char *)cur->name);
932 xmlOutputBufferWriteString(buf, ">");
933 }
934 if (cur->next != NULL) {
935 if ((cur->next->type != HTML_TEXT_NODE) &&
936 (cur->next->type != HTML_ENTITY_REF_NODE))
937 xmlOutputBufferWriteString(buf, "\n");
938 }
939}
940
941/**
942 * htmlDocContentDump:
943 * @buf: the HTML buffer output
944 * @cur: the document
945 *
946 * Dump an HTML document.
947 */
Daniel Veillard701c7362001-01-21 09:48:59 +0000948void
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000949htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
950 int type;
951
952 /*
953 * force to output the stuff as HTML, especially for entities
954 */
955 type = cur->type;
956 cur->type = XML_HTML_DOCUMENT_NODE;
957 if (cur->intSubset != NULL)
958 htmlDtdDumpOutput(buf, cur, NULL);
959 else {
960 /* Default to HTML-4.0 transitionnal @@@@ */
Daniel Veillarde4566462001-01-22 09:58:39 +0000961 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000962
963 }
964 if (cur->children != NULL) {
965 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
966 }
967 xmlOutputBufferWriteString(buf, "\n");
968 cur->type = (xmlElementType) type;
969}
970
971
972/************************************************************************
973 * *
974 * Saving functions front-ends *
975 * *
976 ************************************************************************/
977
Daniel Veillard167b5091999-07-07 04:19:20 +0000978/**
979 * htmlDocDump:
980 * @f: the FILE*
981 * @cur: the document
982 *
983 * Dump an HTML document to an open FILE.
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000984 *
985 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard167b5091999-07-07 04:19:20 +0000986 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000987int
Daniel Veillard167b5091999-07-07 04:19:20 +0000988htmlDocDump(FILE *f, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000989 xmlOutputBufferPtr buf;
990 xmlCharEncodingHandlerPtr handler = NULL;
991 const char *encoding;
992 int ret;
Daniel Veillard167b5091999-07-07 04:19:20 +0000993
994 if (cur == NULL) {
995#ifdef DEBUG_TREE
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000996 xmlGenericError(xmlGenericErrorContext,
997 "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000998#endif
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000999 return(-1);
Daniel Veillard167b5091999-07-07 04:19:20 +00001000 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001001
1002 encoding = (const char *) htmlGetMetaEncoding(cur);
1003
1004 if (encoding != NULL) {
1005 xmlCharEncoding enc;
1006
1007 enc = xmlParseCharEncoding(encoding);
1008 if (enc != cur->charset) {
1009 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1010 /*
1011 * Not supported yet
1012 */
1013 return(-1);
1014 }
1015
1016 handler = xmlFindCharEncodingHandler(encoding);
1017 if (handler == NULL)
1018 return(-1);
1019 }
1020 }
1021
1022 /*
1023 * Fallback to HTML or ASCII when the encoding is unspecified
1024 */
1025 if (handler == NULL)
1026 handler = xmlFindCharEncodingHandler("HTML");
1027 if (handler == NULL)
1028 handler = xmlFindCharEncodingHandler("ascii");
1029
1030 buf = xmlOutputBufferCreateFile(f, handler);
1031 if (buf == NULL) return(-1);
1032 htmlDocContentDumpOutput(buf, cur, NULL);
1033
1034 ret = xmlOutputBufferClose(buf);
1035 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001036}
1037
1038/**
1039 * htmlSaveFile:
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001040 * @filename: the filename (or URL)
Daniel Veillard167b5091999-07-07 04:19:20 +00001041 * @cur: the document
1042 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001043 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1044 * used.
Daniel Veillard167b5091999-07-07 04:19:20 +00001045 * returns: the number of byte written or -1 in case of failure.
1046 */
1047int
1048htmlSaveFile(const char *filename, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001049 xmlOutputBufferPtr buf;
1050 xmlCharEncodingHandlerPtr handler = NULL;
1051 const char *encoding;
Daniel Veillard167b5091999-07-07 04:19:20 +00001052 int ret;
1053
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001054 encoding = (const char *) htmlGetMetaEncoding(cur);
1055
1056 if (encoding != NULL) {
1057 xmlCharEncoding enc;
1058
1059 enc = xmlParseCharEncoding(encoding);
1060 if (enc != cur->charset) {
1061 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1062 /*
1063 * Not supported yet
1064 */
1065 return(-1);
1066 }
1067
1068 handler = xmlFindCharEncodingHandler(encoding);
1069 if (handler == NULL)
1070 return(-1);
1071 }
1072 }
1073
1074 /*
1075 * Fallback to HTML or ASCII when the encoding is unspecified
1076 */
1077 if (handler == NULL)
1078 handler = xmlFindCharEncodingHandler("HTML");
1079 if (handler == NULL)
1080 handler = xmlFindCharEncodingHandler("ascii");
1081
Daniel Veillard167b5091999-07-07 04:19:20 +00001082 /*
1083 * save the content to a temp buffer.
1084 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001085 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
Daniel Veillard167b5091999-07-07 04:19:20 +00001086 if (buf == NULL) return(0);
Daniel Veillard167b5091999-07-07 04:19:20 +00001087
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001088 htmlDocContentDumpOutput(buf, cur, NULL);
Daniel Veillard167b5091999-07-07 04:19:20 +00001089
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001090 ret = xmlOutputBufferClose(buf);
1091 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001092}
1093
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001094/**
1095 * htmlSaveFileEnc:
1096 * @filename: the filename
1097 * @cur: the document
1098 *
1099 * Dump an HTML document to a file using a given encoding.
1100 *
1101 * returns: the number of byte written or -1 in case of failure.
1102 */
1103int
1104htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1105 xmlOutputBufferPtr buf;
1106 xmlCharEncodingHandlerPtr handler = NULL;
1107 int ret;
1108
1109 if (encoding != NULL) {
1110 xmlCharEncoding enc;
1111
1112 enc = xmlParseCharEncoding(encoding);
1113 if (enc != cur->charset) {
1114 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1115 /*
1116 * Not supported yet
1117 */
1118 return(-1);
1119 }
1120
1121 handler = xmlFindCharEncodingHandler(encoding);
1122 if (handler == NULL)
1123 return(-1);
1124 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1125 }
1126 }
1127
1128 /*
1129 * Fallback to HTML or ASCII when the encoding is unspecified
1130 */
1131 if (handler == NULL)
1132 handler = xmlFindCharEncodingHandler("HTML");
1133 if (handler == NULL)
1134 handler = xmlFindCharEncodingHandler("ascii");
1135
1136 /*
1137 * save the content to a temp buffer.
1138 */
1139 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1140 if (buf == NULL) return(0);
1141
1142 htmlDocContentDumpOutput(buf, cur, encoding);
1143
1144 ret = xmlOutputBufferClose(buf);
1145 return(ret);
1146}
Daniel Veillard361d8452000-04-03 19:48:13 +00001147#endif /* LIBXML_HTML_ENABLED */