blob: 41ce1eefa055e0a3cd3a3cfc68d629785652afc7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
33/************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc: the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
52
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
56
57 /*
58 * Search the html
59 */
60 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000062 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
68 }
69 cur = cur->next;
70 }
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
74
75 /*
76 * Search the head
77 */
78 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000080 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89found_head:
90 cur = cur->children;
91
92 /*
93 * Search the meta elements
94 */
95found_meta:
96 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
102
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
245
246 content = NULL;
247 http = 0;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000252 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
255 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000256 else
257 {
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
260 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000262 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000263 break;
264 }
265 attr = attr->next;
266 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000267 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 meta = cur;
269 cur = cur->next;
270 xmlUnlinkNode(meta);
271 xmlFreeNode(meta);
272 continue;
273 }
274
275 }
276 }
277 cur = cur->next;
278 }
279 return(0);
280}
281
Daniel Veillardc084e472002-08-12 13:27:28 +0000282/**
283 * booleanHTMLAttrs:
284 *
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
288 *
289 */
290static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
293 "selected", NULL
294};
295
296
297/**
298 * htmlIsBooleanAttr:
299 * @name: the name of the attribute to check
300 *
301 * Determine if a given attribute is a boolean attribute.
302 *
303 * returns: false if the attribute is not boolean, true otherwise.
304 */
305int
306htmlIsBooleanAttr(const xmlChar *name)
307{
308 int i = 0;
309
310 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000312 return 1;
313 i++;
314 }
315 return 0;
316}
317
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000318#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000319/************************************************************************
320 * *
321 * Dumping HTML tree content to a simple buffer *
322 * *
323 ************************************************************************/
324
Daniel Veillard8db67d22002-11-27 19:39:27 +0000325static int
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000326htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
327 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000328
329/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000330 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000331 * @buf: the HTML buffer output
332 * @doc: the document
333 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000334 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000335 *
336 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000337 *
338 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000339 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000340static int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000341htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
342 int format) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000343 unsigned int use;
344 int ret;
345 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000346
347 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000348 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000349 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000350 if (buf == NULL) {
351 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000352 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000353 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
354 if (outbuf == NULL) {
355 xmlGenericError(xmlGenericErrorContext,
356 "htmlNodeDumpFormat: out of memory!\n");
357 return (-1);
358 }
359 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
360 outbuf->buffer = buf;
361 outbuf->encoder = NULL;
362 outbuf->writecallback = NULL;
363 outbuf->closecallback = NULL;
364 outbuf->context = NULL;
365 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000366
Daniel Veillard8db67d22002-11-27 19:39:27 +0000367 use = buf->use;
368 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
369 xmlFree(outbuf);
370 ret = buf->use - use;
371 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000372}
373
374/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000375 * htmlNodeDump:
376 * @buf: the HTML buffer output
377 * @doc: the document
378 * @cur: the current node
379 *
380 * Dump an HTML node, recursive behaviour,children are printed too,
381 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000382 *
383 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000384 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000385int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000386htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000387 xmlInitParser();
388
Daniel Veillard8db67d22002-11-27 19:39:27 +0000389 return(htmlNodeDumpFormat(buf, doc, cur, 1));
Daniel Veillard95d845f2001-06-13 13:48:46 +0000390}
391
392/**
393 * htmlNodeDumpFileFormat:
394 * @out: the FILE pointer
395 * @doc: the document
396 * @cur: the current node
397 * @encoding: the document encoding
398 * @format: should formatting spaces been added
399 *
400 * Dump an HTML node, recursive behaviour,children are printed too.
401 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000402 * TODO: if encoding == NULL try to save in the doc encoding
403 *
404 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000405 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000406int
407htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
408 xmlNodePtr cur, const char *encoding, int format) {
409 xmlOutputBufferPtr buf;
410 xmlCharEncodingHandlerPtr handler = NULL;
411 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000412
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000413 xmlInitParser();
414
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000415 if (encoding != NULL) {
416 xmlCharEncoding enc;
417
418 enc = xmlParseCharEncoding(encoding);
419 if (enc != XML_CHAR_ENCODING_UTF8) {
420 handler = xmlFindCharEncodingHandler(encoding);
421 if (handler == NULL)
422 return(-1);
423 }
424 }
425
426 /*
427 * Fallback to HTML or ASCII when the encoding is unspecified
428 */
429 if (handler == NULL)
430 handler = xmlFindCharEncodingHandler("HTML");
431 if (handler == NULL)
432 handler = xmlFindCharEncodingHandler("ascii");
433
434 /*
435 * save the content to a temp buffer.
436 */
437 buf = xmlOutputBufferCreateFile(out, handler);
438 if (buf == NULL) return(0);
439
440 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
441
442 ret = xmlOutputBufferClose(buf);
443 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000444}
445
446/**
Owen Taylor3473f882001-02-23 17:55:21 +0000447 * htmlNodeDumpFile:
448 * @out: the FILE pointer
449 * @doc: the document
450 * @cur: the current node
451 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000452 * Dump an HTML node, recursive behaviour,children are printed too,
453 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000454 */
455void
456htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000457 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000458}
459
460/**
Owen Taylor3473f882001-02-23 17:55:21 +0000461 * htmlDocDumpMemory:
462 * @cur: the document
463 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000464 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000465 *
466 * Dump an HTML document in memory and return the xmlChar * and it's size.
467 * It's up to the caller to free the memory.
468 */
469void
470htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000471 xmlOutputBufferPtr buf;
472 xmlCharEncodingHandlerPtr handler = NULL;
473 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000474
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000475 xmlInitParser();
476
Owen Taylor3473f882001-02-23 17:55:21 +0000477 if (cur == NULL) {
478#ifdef DEBUG_TREE
479 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000480 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000481#endif
482 *mem = NULL;
483 *size = 0;
484 return;
485 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000486
487 encoding = (const char *) htmlGetMetaEncoding(cur);
488
489 if (encoding != NULL) {
490 xmlCharEncoding enc;
491
492 enc = xmlParseCharEncoding(encoding);
493 if (enc != cur->charset) {
494 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
495 /*
496 * Not supported yet
497 */
498 *mem = NULL;
499 *size = 0;
500 return;
501 }
502
503 handler = xmlFindCharEncodingHandler(encoding);
504 if (handler == NULL) {
505 *mem = NULL;
506 *size = 0;
507 return;
508 }
509 }
510 }
511
512 /*
513 * Fallback to HTML or ASCII when the encoding is unspecified
514 */
515 if (handler == NULL)
516 handler = xmlFindCharEncodingHandler("HTML");
517 if (handler == NULL)
518 handler = xmlFindCharEncodingHandler("ascii");
519
520 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000521 if (buf == NULL) {
522 *mem = NULL;
523 *size = 0;
524 return;
525 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000526
527 htmlDocContentDumpOutput(buf, cur, NULL);
528 xmlOutputBufferFlush(buf);
529 if (buf->conv != NULL) {
530 *size = buf->conv->use;
531 *mem = xmlStrndup(buf->conv->content, *size);
532 } else {
533 *size = buf->buffer->use;
534 *mem = xmlStrndup(buf->buffer->content, *size);
535 }
536 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000537}
538
539
540/************************************************************************
541 * *
542 * Dumping HTML tree content to an I/O output buffer *
543 * *
544 ************************************************************************/
545
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000546void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000547
Owen Taylor3473f882001-02-23 17:55:21 +0000548/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000549 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000550 * @buf: the HTML buffer output
551 * @doc: the document
552 * @encoding: the encoding string
553 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000554 * TODO: check whether encoding is needed
555 *
Owen Taylor3473f882001-02-23 17:55:21 +0000556 * Dump the HTML document DTD, if any.
557 */
558static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000560 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000561 xmlDtdPtr cur = doc->intSubset;
562
563 if (cur == NULL) {
564 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000565 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000566 return;
567 }
568 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
569 xmlOutputBufferWriteString(buf, (const char *)cur->name);
570 if (cur->ExternalID != NULL) {
571 xmlOutputBufferWriteString(buf, " PUBLIC ");
572 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
573 if (cur->SystemID != NULL) {
574 xmlOutputBufferWriteString(buf, " ");
575 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
576 }
577 } else if (cur->SystemID != NULL) {
578 xmlOutputBufferWriteString(buf, " SYSTEM ");
579 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
580 }
581 xmlOutputBufferWriteString(buf, ">\n");
582}
583
584/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000585 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000586 * @buf: the HTML buffer output
587 * @doc: the document
588 * @cur: the attribute pointer
589 * @encoding: the encoding string
590 *
591 * Dump an HTML attribute
592 */
593static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000594htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000595 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000596 xmlChar *value;
597
Daniel Veillardeca60d02001-06-13 07:45:41 +0000598 /*
599 * TODO: The html output method should not escape a & character
600 * occurring in an attribute value immediately followed by
601 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
602 */
603
Owen Taylor3473f882001-02-23 17:55:21 +0000604 if (cur == NULL) {
605 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000606 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000607 return;
608 }
609 xmlOutputBufferWriteString(buf, " ");
William M. Brack3a6da762003-09-15 04:58:14 +0000610 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
611 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
612 xmlOutputBufferWriteString(buf, ":");
613 }
Owen Taylor3473f882001-02-23 17:55:21 +0000614 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000615 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000616 value = xmlNodeListGetString(doc, cur->children, 0);
617 if (value) {
618 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000619 if ((cur->ns == NULL) && (cur->parent != NULL) &&
620 (cur->parent->ns == NULL) &&
621 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
622 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
623 (!xmlStrcasecmp(cur->name, BAD_CAST "src")))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000624 xmlChar *escaped;
625 xmlChar *tmp = value;
626
627 while (IS_BLANK(*tmp)) tmp++;
628
Daniel Veillard5f5b7bb2003-05-16 17:19:40 +0000629 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000630 if (escaped != NULL) {
631 xmlBufferWriteQuotedString(buf->buffer, escaped);
632 xmlFree(escaped);
633 } else {
634 xmlBufferWriteQuotedString(buf->buffer, value);
635 }
636 } else {
637 xmlBufferWriteQuotedString(buf->buffer, value);
638 }
Owen Taylor3473f882001-02-23 17:55:21 +0000639 xmlFree(value);
640 } else {
641 xmlOutputBufferWriteString(buf, "=\"\"");
642 }
643 }
644}
645
646/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000647 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000648 * @buf: the HTML buffer output
649 * @doc: the document
650 * @cur: the first attribute pointer
651 * @encoding: the encoding string
652 *
653 * Dump a list of HTML attributes
654 */
655static void
656htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
657 if (cur == NULL) {
658 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000659 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000660 return;
661 }
662 while (cur != NULL) {
663 htmlAttrDumpOutput(buf, doc, cur, encoding);
664 cur = cur->next;
665 }
666}
667
668
Owen Taylor3473f882001-02-23 17:55:21 +0000669
670/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000671 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000672 * @buf: the HTML buffer output
673 * @doc: the document
674 * @cur: the first node
675 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000676 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000677 *
678 * Dump an HTML node list, recursive behaviour,children are printed too.
679 */
680static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000681htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
682 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000683 if (cur == NULL) {
684 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000685 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000686 return;
687 }
688 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000689 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000690 cur = cur->next;
691 }
692}
693
694/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000695 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000696 * @buf: the HTML buffer output
697 * @doc: the document
698 * @cur: the current node
699 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000700 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000701 *
702 * Dump an HTML node, recursive behaviour,children are printed too.
703 */
704void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000705htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
706 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000707 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000708
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000709 xmlInitParser();
710
Owen Taylor3473f882001-02-23 17:55:21 +0000711 if (cur == NULL) {
712 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000713 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000714 return;
715 }
716 /*
717 * Special cases.
718 */
719 if (cur->type == XML_DTD_NODE)
720 return;
721 if (cur->type == XML_HTML_DOCUMENT_NODE) {
722 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
723 return;
724 }
725 if (cur->type == HTML_TEXT_NODE) {
726 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000727 if (((cur->name == (const xmlChar *)xmlStringText) ||
728 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000729 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000730 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
731 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000732 xmlChar *buffer;
733
Owen Taylor3473f882001-02-23 17:55:21 +0000734 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000735 if (buffer != NULL) {
736 xmlOutputBufferWriteString(buf, (const char *)buffer);
737 xmlFree(buffer);
738 }
739 } else {
740 xmlOutputBufferWriteString(buf, (const char *)cur->content);
741 }
742 }
743 return;
744 }
745 if (cur->type == HTML_COMMENT_NODE) {
746 if (cur->content != NULL) {
747 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000748 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000749 xmlOutputBufferWriteString(buf, "-->");
750 }
751 return;
752 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000753 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000754 if (cur->name == NULL)
755 return;
756 xmlOutputBufferWriteString(buf, "<?");
757 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000758 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000759 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000760 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000761 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000762 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000763 return;
764 }
Owen Taylor3473f882001-02-23 17:55:21 +0000765 if (cur->type == HTML_ENTITY_REF_NODE) {
766 xmlOutputBufferWriteString(buf, "&");
767 xmlOutputBufferWriteString(buf, (const char *)cur->name);
768 xmlOutputBufferWriteString(buf, ";");
769 return;
770 }
771 if (cur->type == HTML_PRESERVE_NODE) {
772 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000773 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000774 }
775 return;
776 }
777
778 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000779 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000780 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000781 if (cur->ns == NULL)
782 info = htmlTagLookup(cur->name);
783 else
784 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000785
786 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000787 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
788 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
789 xmlOutputBufferWriteString(buf, ":");
790 }
Owen Taylor3473f882001-02-23 17:55:21 +0000791 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000792 if (cur->nsDef)
793 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000794 if (cur->properties != NULL)
795 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
796
797 if ((info != NULL) && (info->empty)) {
798 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000799 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000800 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000801 (cur->next->type != HTML_ENTITY_REF_NODE) &&
802 (cur->parent != NULL) &&
803 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000804 xmlOutputBufferWriteString(buf, "\n");
805 }
806 return;
807 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000808 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
809 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000810 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000811 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
812 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000813 xmlOutputBufferWriteString(buf, ">");
814 } else {
815 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000816 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
817 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
818 xmlOutputBufferWriteString(buf, ":");
819 }
Owen Taylor3473f882001-02-23 17:55:21 +0000820 xmlOutputBufferWriteString(buf, (const char *)cur->name);
821 xmlOutputBufferWriteString(buf, ">");
822 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000823 if ((format) && (cur->next != NULL) &&
824 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000825 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000826 (cur->next->type != HTML_ENTITY_REF_NODE) &&
827 (cur->parent != NULL) &&
828 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000829 xmlOutputBufferWriteString(buf, "\n");
830 }
831 return;
832 }
833 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000834 if ((cur->type != XML_ELEMENT_NODE) &&
835 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000836 /*
837 * Uses the OutputBuffer property to automatically convert
838 * invalids to charrefs
839 */
840
Owen Taylor3473f882001-02-23 17:55:21 +0000841 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000842 }
843 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000844 if ((format) && (info != NULL) && (!info->isinline) &&
845 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000846 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000847 (cur->children != cur->last) &&
848 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000849 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000850 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000851 if ((format) && (info != NULL) && (!info->isinline) &&
852 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000853 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000854 (cur->children != cur->last) &&
855 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000856 xmlOutputBufferWriteString(buf, "\n");
857 }
Owen Taylor3473f882001-02-23 17:55:21 +0000858 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000859 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
860 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
861 xmlOutputBufferWriteString(buf, ":");
862 }
Owen Taylor3473f882001-02-23 17:55:21 +0000863 xmlOutputBufferWriteString(buf, (const char *)cur->name);
864 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000865 if ((format) && (info != NULL) && (!info->isinline) &&
866 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000867 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000868 (cur->next->type != HTML_ENTITY_REF_NODE) &&
869 (cur->parent != NULL) &&
870 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +0000871 xmlOutputBufferWriteString(buf, "\n");
872 }
873}
874
875/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000876 * htmlNodeDumpOutput:
877 * @buf: the HTML buffer output
878 * @doc: the document
879 * @cur: the current node
880 * @encoding: the encoding string
881 *
882 * Dump an HTML node, recursive behaviour,children are printed too,
883 * and formatting returns/spaces are added.
884 */
885void
886htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
887 xmlNodePtr cur, const char *encoding) {
888 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
889}
890
891/**
892 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000893 * @buf: the HTML buffer output
894 * @cur: the document
895 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +0000896 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * Dump an HTML document.
899 */
900void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000901htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
902 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000903 int type;
904
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000905 xmlInitParser();
906
Owen Taylor3473f882001-02-23 17:55:21 +0000907 /*
908 * force to output the stuff as HTML, especially for entities
909 */
910 type = cur->type;
911 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +0000912 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000913 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000914 }
915 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000916 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000917 }
918 xmlOutputBufferWriteString(buf, "\n");
919 cur->type = (xmlElementType) type;
920}
921
Daniel Veillard95d845f2001-06-13 13:48:46 +0000922/**
923 * htmlDocContentDumpOutput:
924 * @buf: the HTML buffer output
925 * @cur: the document
926 * @encoding: the encoding string
927 *
928 * Dump an HTML document. Formating return/spaces are added.
929 */
930void
931htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
932 const char *encoding) {
933 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
934}
935
Owen Taylor3473f882001-02-23 17:55:21 +0000936/************************************************************************
937 * *
938 * Saving functions front-ends *
939 * *
940 ************************************************************************/
941
942/**
943 * htmlDocDump:
944 * @f: the FILE*
945 * @cur: the document
946 *
947 * Dump an HTML document to an open FILE.
948 *
949 * returns: the number of byte written or -1 in case of failure.
950 */
951int
952htmlDocDump(FILE *f, xmlDocPtr cur) {
953 xmlOutputBufferPtr buf;
954 xmlCharEncodingHandlerPtr handler = NULL;
955 const char *encoding;
956 int ret;
957
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000958 xmlInitParser();
959
Owen Taylor3473f882001-02-23 17:55:21 +0000960 if (cur == NULL) {
961#ifdef DEBUG_TREE
962 xmlGenericError(xmlGenericErrorContext,
963 "htmlDocDump : document == NULL\n");
964#endif
965 return(-1);
966 }
967
968 encoding = (const char *) htmlGetMetaEncoding(cur);
969
970 if (encoding != NULL) {
971 xmlCharEncoding enc;
972
973 enc = xmlParseCharEncoding(encoding);
974 if (enc != cur->charset) {
975 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
976 /*
977 * Not supported yet
978 */
979 return(-1);
980 }
981
982 handler = xmlFindCharEncodingHandler(encoding);
983 if (handler == NULL)
984 return(-1);
985 }
986 }
987
988 /*
989 * Fallback to HTML or ASCII when the encoding is unspecified
990 */
991 if (handler == NULL)
992 handler = xmlFindCharEncodingHandler("HTML");
993 if (handler == NULL)
994 handler = xmlFindCharEncodingHandler("ascii");
995
996 buf = xmlOutputBufferCreateFile(f, handler);
997 if (buf == NULL) return(-1);
998 htmlDocContentDumpOutput(buf, cur, NULL);
999
1000 ret = xmlOutputBufferClose(buf);
1001 return(ret);
1002}
1003
1004/**
1005 * htmlSaveFile:
1006 * @filename: the filename (or URL)
1007 * @cur: the document
1008 *
1009 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1010 * used.
1011 * returns: the number of byte written or -1 in case of failure.
1012 */
1013int
1014htmlSaveFile(const char *filename, xmlDocPtr cur) {
1015 xmlOutputBufferPtr buf;
1016 xmlCharEncodingHandlerPtr handler = NULL;
1017 const char *encoding;
1018 int ret;
1019
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001020 xmlInitParser();
1021
Owen Taylor3473f882001-02-23 17:55:21 +00001022 encoding = (const char *) htmlGetMetaEncoding(cur);
1023
1024 if (encoding != NULL) {
1025 xmlCharEncoding enc;
1026
1027 enc = xmlParseCharEncoding(encoding);
1028 if (enc != cur->charset) {
1029 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1030 /*
1031 * Not supported yet
1032 */
1033 return(-1);
1034 }
1035
1036 handler = xmlFindCharEncodingHandler(encoding);
1037 if (handler == NULL)
1038 return(-1);
1039 }
1040 }
1041
1042 /*
1043 * Fallback to HTML or ASCII when the encoding is unspecified
1044 */
1045 if (handler == NULL)
1046 handler = xmlFindCharEncodingHandler("HTML");
1047 if (handler == NULL)
1048 handler = xmlFindCharEncodingHandler("ascii");
1049
1050 /*
1051 * save the content to a temp buffer.
1052 */
1053 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1054 if (buf == NULL) return(0);
1055
1056 htmlDocContentDumpOutput(buf, cur, NULL);
1057
1058 ret = xmlOutputBufferClose(buf);
1059 return(ret);
1060}
1061
1062/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001063 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001064 * @filename: the filename
1065 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001066 * @format: should formatting spaces been added
1067 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001068 *
1069 * Dump an HTML document to a file using a given encoding.
1070 *
1071 * returns: the number of byte written or -1 in case of failure.
1072 */
1073int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001074htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1075 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001076 xmlOutputBufferPtr buf;
1077 xmlCharEncodingHandlerPtr handler = NULL;
1078 int ret;
1079
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001080 xmlInitParser();
1081
Owen Taylor3473f882001-02-23 17:55:21 +00001082 if (encoding != NULL) {
1083 xmlCharEncoding enc;
1084
1085 enc = xmlParseCharEncoding(encoding);
1086 if (enc != cur->charset) {
1087 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1088 /*
1089 * Not supported yet
1090 */
1091 return(-1);
1092 }
1093
1094 handler = xmlFindCharEncodingHandler(encoding);
1095 if (handler == NULL)
1096 return(-1);
1097 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1098 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001099 } else {
1100 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001101 }
1102
1103 /*
1104 * Fallback to HTML or ASCII when the encoding is unspecified
1105 */
1106 if (handler == NULL)
1107 handler = xmlFindCharEncodingHandler("HTML");
1108 if (handler == NULL)
1109 handler = xmlFindCharEncodingHandler("ascii");
1110
1111 /*
1112 * save the content to a temp buffer.
1113 */
1114 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1115 if (buf == NULL) return(0);
1116
Daniel Veillard95d845f2001-06-13 13:48:46 +00001117 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001118
1119 ret = xmlOutputBufferClose(buf);
1120 return(ret);
1121}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001122
1123/**
1124 * htmlSaveFileEnc:
1125 * @filename: the filename
1126 * @cur: the document
1127 * @encoding: the document encoding
1128 *
1129 * Dump an HTML document to a file using a given encoding
1130 * and formatting returns/spaces are added.
1131 *
1132 * returns: the number of byte written or -1 in case of failure.
1133 */
1134int
1135htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1136 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1137}
1138
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001139#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardc084e472002-08-12 13:27:28 +00001140
Owen Taylor3473f882001-02-23 17:55:21 +00001141#endif /* LIBXML_HTML_ENABLED */