blob: 4efce27f2ab3775e339093b0e6ce3a2558964721 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
33/************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc: the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
52
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
56
57 /*
58 * Search the html
59 */
60 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000062 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
68 }
69 cur = cur->next;
70 }
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
74
75 /*
76 * Search the head
77 */
78 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000080 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89found_head:
90 cur = cur->children;
91
92 /*
93 * Search the meta elements
94 */
95found_meta:
96 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
102
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
245
246 content = NULL;
247 http = 0;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000252 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
255 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000256 else
257 {
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
260 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000262 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000263 break;
264 }
265 attr = attr->next;
266 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000267 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 meta = cur;
269 cur = cur->next;
270 xmlUnlinkNode(meta);
271 xmlFreeNode(meta);
272 continue;
273 }
274
275 }
276 }
277 cur = cur->next;
278 }
279 return(0);
280}
281
Daniel Veillardc084e472002-08-12 13:27:28 +0000282/**
283 * booleanHTMLAttrs:
284 *
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
288 *
289 */
290static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
293 "selected", NULL
294};
295
296
297/**
298 * htmlIsBooleanAttr:
299 * @name: the name of the attribute to check
300 *
301 * Determine if a given attribute is a boolean attribute.
302 *
303 * returns: false if the attribute is not boolean, true otherwise.
304 */
305int
306htmlIsBooleanAttr(const xmlChar *name)
307{
308 int i = 0;
309
310 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000312 return 1;
313 i++;
314 }
315 return 0;
316}
317
Owen Taylor3473f882001-02-23 17:55:21 +0000318/************************************************************************
319 * *
320 * Dumping HTML tree content to a simple buffer *
321 * *
322 ************************************************************************/
323
Daniel Veillard8db67d22002-11-27 19:39:27 +0000324static int
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000325htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
326 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000327
328/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000329 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000330 * @buf: the HTML buffer output
331 * @doc: the document
332 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000333 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000334 *
335 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000336 *
337 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000338 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000339static int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000340htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
341 int format) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000342 unsigned int use;
343 int ret;
344 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000345
346 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000347 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000348 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000349 if (buf == NULL) {
350 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000351 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000352 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
353 if (outbuf == NULL) {
354 xmlGenericError(xmlGenericErrorContext,
355 "htmlNodeDumpFormat: out of memory!\n");
356 return (-1);
357 }
358 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
359 outbuf->buffer = buf;
360 outbuf->encoder = NULL;
361 outbuf->writecallback = NULL;
362 outbuf->closecallback = NULL;
363 outbuf->context = NULL;
364 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000365
Daniel Veillard8db67d22002-11-27 19:39:27 +0000366 use = buf->use;
367 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
368 xmlFree(outbuf);
369 ret = buf->use - use;
370 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000371}
372
373/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000374 * htmlNodeDump:
375 * @buf: the HTML buffer output
376 * @doc: the document
377 * @cur: the current node
378 *
379 * Dump an HTML node, recursive behaviour,children are printed too,
380 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000381 *
382 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000383 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000384int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000385htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000386 xmlInitParser();
387
Daniel Veillard8db67d22002-11-27 19:39:27 +0000388 return(htmlNodeDumpFormat(buf, doc, cur, 1));
Daniel Veillard95d845f2001-06-13 13:48:46 +0000389}
390
391/**
392 * htmlNodeDumpFileFormat:
393 * @out: the FILE pointer
394 * @doc: the document
395 * @cur: the current node
396 * @encoding: the document encoding
397 * @format: should formatting spaces been added
398 *
399 * Dump an HTML node, recursive behaviour,children are printed too.
400 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000401 * TODO: if encoding == NULL try to save in the doc encoding
402 *
403 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000404 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000405int
406htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
407 xmlNodePtr cur, const char *encoding, int format) {
408 xmlOutputBufferPtr buf;
409 xmlCharEncodingHandlerPtr handler = NULL;
410 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000411
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000412 xmlInitParser();
413
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000414 if (encoding != NULL) {
415 xmlCharEncoding enc;
416
417 enc = xmlParseCharEncoding(encoding);
418 if (enc != XML_CHAR_ENCODING_UTF8) {
419 handler = xmlFindCharEncodingHandler(encoding);
420 if (handler == NULL)
421 return(-1);
422 }
423 }
424
425 /*
426 * Fallback to HTML or ASCII when the encoding is unspecified
427 */
428 if (handler == NULL)
429 handler = xmlFindCharEncodingHandler("HTML");
430 if (handler == NULL)
431 handler = xmlFindCharEncodingHandler("ascii");
432
433 /*
434 * save the content to a temp buffer.
435 */
436 buf = xmlOutputBufferCreateFile(out, handler);
437 if (buf == NULL) return(0);
438
439 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
440
441 ret = xmlOutputBufferClose(buf);
442 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000443}
444
445/**
Owen Taylor3473f882001-02-23 17:55:21 +0000446 * htmlNodeDumpFile:
447 * @out: the FILE pointer
448 * @doc: the document
449 * @cur: the current node
450 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000451 * Dump an HTML node, recursive behaviour,children are printed too,
452 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000453 */
454void
455htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000456 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000457}
458
459/**
Owen Taylor3473f882001-02-23 17:55:21 +0000460 * htmlDocDumpMemory:
461 * @cur: the document
462 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000463 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000464 *
465 * Dump an HTML document in memory and return the xmlChar * and it's size.
466 * It's up to the caller to free the memory.
467 */
468void
469htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000470 xmlOutputBufferPtr buf;
471 xmlCharEncodingHandlerPtr handler = NULL;
472 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000473
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000474 xmlInitParser();
475
Owen Taylor3473f882001-02-23 17:55:21 +0000476 if (cur == NULL) {
477#ifdef DEBUG_TREE
478 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000479 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000480#endif
481 *mem = NULL;
482 *size = 0;
483 return;
484 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000485
486 encoding = (const char *) htmlGetMetaEncoding(cur);
487
488 if (encoding != NULL) {
489 xmlCharEncoding enc;
490
491 enc = xmlParseCharEncoding(encoding);
492 if (enc != cur->charset) {
493 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
494 /*
495 * Not supported yet
496 */
497 *mem = NULL;
498 *size = 0;
499 return;
500 }
501
502 handler = xmlFindCharEncodingHandler(encoding);
503 if (handler == NULL) {
504 *mem = NULL;
505 *size = 0;
506 return;
507 }
508 }
509 }
510
511 /*
512 * Fallback to HTML or ASCII when the encoding is unspecified
513 */
514 if (handler == NULL)
515 handler = xmlFindCharEncodingHandler("HTML");
516 if (handler == NULL)
517 handler = xmlFindCharEncodingHandler("ascii");
518
519 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000520 if (buf == NULL) {
521 *mem = NULL;
522 *size = 0;
523 return;
524 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000525
526 htmlDocContentDumpOutput(buf, cur, NULL);
527 xmlOutputBufferFlush(buf);
528 if (buf->conv != NULL) {
529 *size = buf->conv->use;
530 *mem = xmlStrndup(buf->conv->content, *size);
531 } else {
532 *size = buf->buffer->use;
533 *mem = xmlStrndup(buf->buffer->content, *size);
534 }
535 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000536}
537
538
539/************************************************************************
540 * *
541 * Dumping HTML tree content to an I/O output buffer *
542 * *
543 ************************************************************************/
544
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000545void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000546
Owen Taylor3473f882001-02-23 17:55:21 +0000547/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000548 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000549 * @buf: the HTML buffer output
550 * @doc: the document
551 * @encoding: the encoding string
552 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000553 * TODO: check whether encoding is needed
554 *
Owen Taylor3473f882001-02-23 17:55:21 +0000555 * Dump the HTML document DTD, if any.
556 */
557static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000558htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000559 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000560 xmlDtdPtr cur = doc->intSubset;
561
562 if (cur == NULL) {
563 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000564 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000565 return;
566 }
567 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
568 xmlOutputBufferWriteString(buf, (const char *)cur->name);
569 if (cur->ExternalID != NULL) {
570 xmlOutputBufferWriteString(buf, " PUBLIC ");
571 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
572 if (cur->SystemID != NULL) {
573 xmlOutputBufferWriteString(buf, " ");
574 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
575 }
576 } else if (cur->SystemID != NULL) {
577 xmlOutputBufferWriteString(buf, " SYSTEM ");
578 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
579 }
580 xmlOutputBufferWriteString(buf, ">\n");
581}
582
583/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000584 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000585 * @buf: the HTML buffer output
586 * @doc: the document
587 * @cur: the attribute pointer
588 * @encoding: the encoding string
589 *
590 * Dump an HTML attribute
591 */
592static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000593htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000594 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000595 xmlChar *value;
596
Daniel Veillardeca60d02001-06-13 07:45:41 +0000597 /*
598 * TODO: The html output method should not escape a & character
599 * occurring in an attribute value immediately followed by
600 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
601 */
602
Owen Taylor3473f882001-02-23 17:55:21 +0000603 if (cur == NULL) {
604 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000605 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000606 return;
607 }
608 xmlOutputBufferWriteString(buf, " ");
609 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000610 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000611 value = xmlNodeListGetString(doc, cur->children, 0);
612 if (value) {
613 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000614 if ((cur->ns == NULL) && (cur->parent != NULL) &&
615 (cur->parent->ns == NULL) &&
616 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
617 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
618 (!xmlStrcasecmp(cur->name, BAD_CAST "src")))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000619 xmlChar *escaped;
620 xmlChar *tmp = value;
621
622 while (IS_BLANK(*tmp)) tmp++;
623
Daniel Veillard5f5b7bb2003-05-16 17:19:40 +0000624 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000625 if (escaped != NULL) {
626 xmlBufferWriteQuotedString(buf->buffer, escaped);
627 xmlFree(escaped);
628 } else {
629 xmlBufferWriteQuotedString(buf->buffer, value);
630 }
631 } else {
632 xmlBufferWriteQuotedString(buf->buffer, value);
633 }
Owen Taylor3473f882001-02-23 17:55:21 +0000634 xmlFree(value);
635 } else {
636 xmlOutputBufferWriteString(buf, "=\"\"");
637 }
638 }
639}
640
641/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000642 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000643 * @buf: the HTML buffer output
644 * @doc: the document
645 * @cur: the first attribute pointer
646 * @encoding: the encoding string
647 *
648 * Dump a list of HTML attributes
649 */
650static void
651htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
652 if (cur == NULL) {
653 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000654 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000655 return;
656 }
657 while (cur != NULL) {
658 htmlAttrDumpOutput(buf, doc, cur, encoding);
659 cur = cur->next;
660 }
661}
662
663
Owen Taylor3473f882001-02-23 17:55:21 +0000664
665/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000666 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000667 * @buf: the HTML buffer output
668 * @doc: the document
669 * @cur: the first node
670 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000671 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000672 *
673 * Dump an HTML node list, recursive behaviour,children are printed too.
674 */
675static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000676htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
677 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000678 if (cur == NULL) {
679 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000680 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000681 return;
682 }
683 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000684 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000685 cur = cur->next;
686 }
687}
688
689/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000690 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000691 * @buf: the HTML buffer output
692 * @doc: the document
693 * @cur: the current node
694 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000695 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000696 *
697 * Dump an HTML node, recursive behaviour,children are printed too.
698 */
699void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000700htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
701 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000702 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000703
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000704 xmlInitParser();
705
Owen Taylor3473f882001-02-23 17:55:21 +0000706 if (cur == NULL) {
707 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000708 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000709 return;
710 }
711 /*
712 * Special cases.
713 */
714 if (cur->type == XML_DTD_NODE)
715 return;
716 if (cur->type == XML_HTML_DOCUMENT_NODE) {
717 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
718 return;
719 }
720 if (cur->type == HTML_TEXT_NODE) {
721 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000722 if (((cur->name == (const xmlChar *)xmlStringText) ||
723 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000724 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000725 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
726 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000727 xmlChar *buffer;
728
Owen Taylor3473f882001-02-23 17:55:21 +0000729 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000730 if (buffer != NULL) {
731 xmlOutputBufferWriteString(buf, (const char *)buffer);
732 xmlFree(buffer);
733 }
734 } else {
735 xmlOutputBufferWriteString(buf, (const char *)cur->content);
736 }
737 }
738 return;
739 }
740 if (cur->type == HTML_COMMENT_NODE) {
741 if (cur->content != NULL) {
742 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000743 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000744 xmlOutputBufferWriteString(buf, "-->");
745 }
746 return;
747 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000748 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000749 if (cur->name == NULL)
750 return;
751 xmlOutputBufferWriteString(buf, "<?");
752 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000753 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000754 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000755 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000756 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000757 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000758 return;
759 }
Owen Taylor3473f882001-02-23 17:55:21 +0000760 if (cur->type == HTML_ENTITY_REF_NODE) {
761 xmlOutputBufferWriteString(buf, "&");
762 xmlOutputBufferWriteString(buf, (const char *)cur->name);
763 xmlOutputBufferWriteString(buf, ";");
764 return;
765 }
766 if (cur->type == HTML_PRESERVE_NODE) {
767 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000768 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 }
770 return;
771 }
772
773 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000774 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000775 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000776 if (cur->ns == NULL)
777 info = htmlTagLookup(cur->name);
778 else
779 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000780
781 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000782 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
783 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
784 xmlOutputBufferWriteString(buf, ":");
785 }
Owen Taylor3473f882001-02-23 17:55:21 +0000786 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000787 if (cur->nsDef)
788 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000789 if (cur->properties != NULL)
790 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
791
792 if ((info != NULL) && (info->empty)) {
793 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000794 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000795 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000796 (cur->next->type != HTML_ENTITY_REF_NODE) &&
797 (cur->parent != NULL) &&
798 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000799 xmlOutputBufferWriteString(buf, "\n");
800 }
801 return;
802 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000803 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
804 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000805 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000806 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
807 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000808 xmlOutputBufferWriteString(buf, ">");
809 } else {
810 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000811 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
812 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
813 xmlOutputBufferWriteString(buf, ":");
814 }
Owen Taylor3473f882001-02-23 17:55:21 +0000815 xmlOutputBufferWriteString(buf, (const char *)cur->name);
816 xmlOutputBufferWriteString(buf, ">");
817 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000818 if ((format) && (cur->next != NULL) &&
819 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000820 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000821 (cur->next->type != HTML_ENTITY_REF_NODE) &&
822 (cur->parent != NULL) &&
823 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000824 xmlOutputBufferWriteString(buf, "\n");
825 }
826 return;
827 }
828 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000829 if ((cur->type != XML_ELEMENT_NODE) &&
830 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000831 /*
832 * Uses the OutputBuffer property to automatically convert
833 * invalids to charrefs
834 */
835
Owen Taylor3473f882001-02-23 17:55:21 +0000836 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000837 }
838 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000839 if ((format) && (info != NULL) && (!info->isinline) &&
840 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000841 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000842 (cur->children != cur->last) &&
843 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000844 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000845 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000846 if ((format) && (info != NULL) && (!info->isinline) &&
847 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000848 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000849 (cur->children != cur->last) &&
850 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000851 xmlOutputBufferWriteString(buf, "\n");
852 }
Owen Taylor3473f882001-02-23 17:55:21 +0000853 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000854 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
855 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
856 xmlOutputBufferWriteString(buf, ":");
857 }
Owen Taylor3473f882001-02-23 17:55:21 +0000858 xmlOutputBufferWriteString(buf, (const char *)cur->name);
859 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000860 if ((format) && (info != NULL) && (!info->isinline) &&
861 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000862 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000863 (cur->next->type != HTML_ENTITY_REF_NODE) &&
864 (cur->parent != NULL) &&
865 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000866 xmlOutputBufferWriteString(buf, "\n");
867 }
868}
869
870/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000871 * htmlNodeDumpOutput:
872 * @buf: the HTML buffer output
873 * @doc: the document
874 * @cur: the current node
875 * @encoding: the encoding string
876 *
877 * Dump an HTML node, recursive behaviour,children are printed too,
878 * and formatting returns/spaces are added.
879 */
880void
881htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
882 xmlNodePtr cur, const char *encoding) {
883 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
884}
885
886/**
887 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000888 * @buf: the HTML buffer output
889 * @cur: the document
890 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +0000891 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000892 *
893 * Dump an HTML document.
894 */
895void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000896htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
897 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000898 int type;
899
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000900 xmlInitParser();
901
Owen Taylor3473f882001-02-23 17:55:21 +0000902 /*
903 * force to output the stuff as HTML, especially for entities
904 */
905 type = cur->type;
906 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +0000907 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000908 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000909 }
910 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000911 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000912 }
913 xmlOutputBufferWriteString(buf, "\n");
914 cur->type = (xmlElementType) type;
915}
916
Daniel Veillard95d845f2001-06-13 13:48:46 +0000917/**
918 * htmlDocContentDumpOutput:
919 * @buf: the HTML buffer output
920 * @cur: the document
921 * @encoding: the encoding string
922 *
923 * Dump an HTML document. Formating return/spaces are added.
924 */
925void
926htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
927 const char *encoding) {
928 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
929}
930
Owen Taylor3473f882001-02-23 17:55:21 +0000931/************************************************************************
932 * *
933 * Saving functions front-ends *
934 * *
935 ************************************************************************/
936
937/**
938 * htmlDocDump:
939 * @f: the FILE*
940 * @cur: the document
941 *
942 * Dump an HTML document to an open FILE.
943 *
944 * returns: the number of byte written or -1 in case of failure.
945 */
946int
947htmlDocDump(FILE *f, xmlDocPtr cur) {
948 xmlOutputBufferPtr buf;
949 xmlCharEncodingHandlerPtr handler = NULL;
950 const char *encoding;
951 int ret;
952
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000953 xmlInitParser();
954
Owen Taylor3473f882001-02-23 17:55:21 +0000955 if (cur == NULL) {
956#ifdef DEBUG_TREE
957 xmlGenericError(xmlGenericErrorContext,
958 "htmlDocDump : document == NULL\n");
959#endif
960 return(-1);
961 }
962
963 encoding = (const char *) htmlGetMetaEncoding(cur);
964
965 if (encoding != NULL) {
966 xmlCharEncoding enc;
967
968 enc = xmlParseCharEncoding(encoding);
969 if (enc != cur->charset) {
970 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
971 /*
972 * Not supported yet
973 */
974 return(-1);
975 }
976
977 handler = xmlFindCharEncodingHandler(encoding);
978 if (handler == NULL)
979 return(-1);
980 }
981 }
982
983 /*
984 * Fallback to HTML or ASCII when the encoding is unspecified
985 */
986 if (handler == NULL)
987 handler = xmlFindCharEncodingHandler("HTML");
988 if (handler == NULL)
989 handler = xmlFindCharEncodingHandler("ascii");
990
991 buf = xmlOutputBufferCreateFile(f, handler);
992 if (buf == NULL) return(-1);
993 htmlDocContentDumpOutput(buf, cur, NULL);
994
995 ret = xmlOutputBufferClose(buf);
996 return(ret);
997}
998
999/**
1000 * htmlSaveFile:
1001 * @filename: the filename (or URL)
1002 * @cur: the document
1003 *
1004 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1005 * used.
1006 * returns: the number of byte written or -1 in case of failure.
1007 */
1008int
1009htmlSaveFile(const char *filename, xmlDocPtr cur) {
1010 xmlOutputBufferPtr buf;
1011 xmlCharEncodingHandlerPtr handler = NULL;
1012 const char *encoding;
1013 int ret;
1014
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001015 xmlInitParser();
1016
Owen Taylor3473f882001-02-23 17:55:21 +00001017 encoding = (const char *) htmlGetMetaEncoding(cur);
1018
1019 if (encoding != NULL) {
1020 xmlCharEncoding enc;
1021
1022 enc = xmlParseCharEncoding(encoding);
1023 if (enc != cur->charset) {
1024 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1025 /*
1026 * Not supported yet
1027 */
1028 return(-1);
1029 }
1030
1031 handler = xmlFindCharEncodingHandler(encoding);
1032 if (handler == NULL)
1033 return(-1);
1034 }
1035 }
1036
1037 /*
1038 * Fallback to HTML or ASCII when the encoding is unspecified
1039 */
1040 if (handler == NULL)
1041 handler = xmlFindCharEncodingHandler("HTML");
1042 if (handler == NULL)
1043 handler = xmlFindCharEncodingHandler("ascii");
1044
1045 /*
1046 * save the content to a temp buffer.
1047 */
1048 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1049 if (buf == NULL) return(0);
1050
1051 htmlDocContentDumpOutput(buf, cur, NULL);
1052
1053 ret = xmlOutputBufferClose(buf);
1054 return(ret);
1055}
1056
1057/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001058 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001059 * @filename: the filename
1060 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001061 * @format: should formatting spaces been added
1062 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001063 *
1064 * Dump an HTML document to a file using a given encoding.
1065 *
1066 * returns: the number of byte written or -1 in case of failure.
1067 */
1068int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001069htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1070 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001071 xmlOutputBufferPtr buf;
1072 xmlCharEncodingHandlerPtr handler = NULL;
1073 int ret;
1074
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001075 xmlInitParser();
1076
Owen Taylor3473f882001-02-23 17:55:21 +00001077 if (encoding != NULL) {
1078 xmlCharEncoding enc;
1079
1080 enc = xmlParseCharEncoding(encoding);
1081 if (enc != cur->charset) {
1082 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1083 /*
1084 * Not supported yet
1085 */
1086 return(-1);
1087 }
1088
1089 handler = xmlFindCharEncodingHandler(encoding);
1090 if (handler == NULL)
1091 return(-1);
1092 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1093 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001094 } else {
1095 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001096 }
1097
1098 /*
1099 * Fallback to HTML or ASCII when the encoding is unspecified
1100 */
1101 if (handler == NULL)
1102 handler = xmlFindCharEncodingHandler("HTML");
1103 if (handler == NULL)
1104 handler = xmlFindCharEncodingHandler("ascii");
1105
1106 /*
1107 * save the content to a temp buffer.
1108 */
1109 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1110 if (buf == NULL) return(0);
1111
Daniel Veillard95d845f2001-06-13 13:48:46 +00001112 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001113
1114 ret = xmlOutputBufferClose(buf);
1115 return(ret);
1116}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001117
1118/**
1119 * htmlSaveFileEnc:
1120 * @filename: the filename
1121 * @cur: the document
1122 * @encoding: the document encoding
1123 *
1124 * Dump an HTML document to a file using a given encoding
1125 * and formatting returns/spaces are added.
1126 *
1127 * returns: the number of byte written or -1 in case of failure.
1128 */
1129int
1130htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1131 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1132}
1133
Daniel Veillardc084e472002-08-12 13:27:28 +00001134
1135
Owen Taylor3473f882001-02-23 17:55:21 +00001136#endif /* LIBXML_HTML_ENABLED */