blob: e77ee65f117db0ddc56c5a233fc8212671745c5b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
33/************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc: the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
52
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
56
57 /*
58 * Search the html
59 */
60 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000062 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
68 }
69 cur = cur->next;
70 }
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
74
75 /*
76 * Search the head
77 */
78 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000080 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89found_head:
90 cur = cur->children;
91
92 /*
93 * Search the meta elements
94 */
95found_meta:
96 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
102
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
William M. Brack13dfa872004-09-18 04:52:08 +0000173 (char *)encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
245
246 content = NULL;
247 http = 0;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000252 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
255 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000256 else
257 {
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
260 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000262 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000263 break;
264 }
265 attr = attr->next;
266 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000267 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 meta = cur;
269 cur = cur->next;
270 xmlUnlinkNode(meta);
271 xmlFreeNode(meta);
272 continue;
273 }
274
275 }
276 }
277 cur = cur->next;
278 }
279 return(0);
280}
281
Daniel Veillardc084e472002-08-12 13:27:28 +0000282/**
283 * booleanHTMLAttrs:
284 *
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
288 *
289 */
290static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
293 "selected", NULL
294};
295
296
297/**
298 * htmlIsBooleanAttr:
299 * @name: the name of the attribute to check
300 *
301 * Determine if a given attribute is a boolean attribute.
302 *
303 * returns: false if the attribute is not boolean, true otherwise.
304 */
305int
306htmlIsBooleanAttr(const xmlChar *name)
307{
308 int i = 0;
309
310 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000312 return 1;
313 i++;
314 }
315 return 0;
316}
317
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000318#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +0000319/************************************************************************
320 * *
Daniel Veillarde2238d52003-10-09 13:14:55 +0000321 * Output error handlers *
322 * *
323 ************************************************************************/
324/**
325 * htmlSaveErrMemory:
326 * @extra: extra informations
327 *
328 * Handle an out of memory condition
329 */
330static void
331htmlSaveErrMemory(const char *extra)
332{
333 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
334}
335
336/**
337 * htmlSaveErr:
338 * @code: the error number
339 * @node: the location of the error.
340 * @extra: extra informations
341 *
342 * Handle an out of memory condition
343 */
344static void
345htmlSaveErr(int code, xmlNodePtr node, const char *extra)
346{
347 const char *msg = NULL;
348
349 switch(code) {
350 case XML_SAVE_NOT_UTF8:
351 msg = "string is not in UTF-8";
352 break;
353 case XML_SAVE_CHAR_INVALID:
354 msg = "invalid character value";
355 break;
356 case XML_SAVE_UNKNOWN_ENCODING:
357 msg = "unknown encoding %s";
358 break;
359 case XML_SAVE_NO_DOCTYPE:
360 msg = "HTML has no DOCTYPE";
361 break;
362 default:
363 msg = "unexpected error number";
364 }
365 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
366}
367
368/************************************************************************
369 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000370 * Dumping HTML tree content to a simple buffer *
371 * *
372 ************************************************************************/
373
Daniel Veillard8db67d22002-11-27 19:39:27 +0000374static int
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000375htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
376 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000377
378/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000379 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000380 * @buf: the HTML buffer output
381 * @doc: the document
382 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000383 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000384 *
385 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000386 *
387 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000388 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000389static int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000390htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
391 int format) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000392 unsigned int use;
393 int ret;
394 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000397 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000398 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000399 if (buf == NULL) {
400 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000401 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000402 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
403 if (outbuf == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000404 htmlSaveErrMemory("allocating HTML output buffer");
Daniel Veillard8db67d22002-11-27 19:39:27 +0000405 return (-1);
406 }
407 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
408 outbuf->buffer = buf;
409 outbuf->encoder = NULL;
410 outbuf->writecallback = NULL;
411 outbuf->closecallback = NULL;
412 outbuf->context = NULL;
413 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000414
Daniel Veillard8db67d22002-11-27 19:39:27 +0000415 use = buf->use;
416 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
417 xmlFree(outbuf);
418 ret = buf->use - use;
419 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000420}
421
422/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000423 * htmlNodeDump:
424 * @buf: the HTML buffer output
425 * @doc: the document
426 * @cur: the current node
427 *
428 * Dump an HTML node, recursive behaviour,children are printed too,
429 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000430 *
431 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000432 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000433int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000434htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000435 xmlInitParser();
436
Daniel Veillard8db67d22002-11-27 19:39:27 +0000437 return(htmlNodeDumpFormat(buf, doc, cur, 1));
Daniel Veillard95d845f2001-06-13 13:48:46 +0000438}
439
440/**
441 * htmlNodeDumpFileFormat:
442 * @out: the FILE pointer
443 * @doc: the document
444 * @cur: the current node
445 * @encoding: the document encoding
446 * @format: should formatting spaces been added
447 *
448 * Dump an HTML node, recursive behaviour,children are printed too.
449 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000450 * TODO: if encoding == NULL try to save in the doc encoding
451 *
452 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000453 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000454int
455htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
456 xmlNodePtr cur, const char *encoding, int format) {
457 xmlOutputBufferPtr buf;
458 xmlCharEncodingHandlerPtr handler = NULL;
459 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000460
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000461 xmlInitParser();
462
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000463 if (encoding != NULL) {
464 xmlCharEncoding enc;
465
466 enc = xmlParseCharEncoding(encoding);
467 if (enc != XML_CHAR_ENCODING_UTF8) {
468 handler = xmlFindCharEncodingHandler(encoding);
469 if (handler == NULL)
470 return(-1);
471 }
472 }
473
474 /*
475 * Fallback to HTML or ASCII when the encoding is unspecified
476 */
477 if (handler == NULL)
478 handler = xmlFindCharEncodingHandler("HTML");
479 if (handler == NULL)
480 handler = xmlFindCharEncodingHandler("ascii");
481
482 /*
483 * save the content to a temp buffer.
484 */
485 buf = xmlOutputBufferCreateFile(out, handler);
486 if (buf == NULL) return(0);
487
488 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
489
490 ret = xmlOutputBufferClose(buf);
491 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000492}
493
494/**
Owen Taylor3473f882001-02-23 17:55:21 +0000495 * htmlNodeDumpFile:
496 * @out: the FILE pointer
497 * @doc: the document
498 * @cur: the current node
499 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000500 * Dump an HTML node, recursive behaviour,children are printed too,
501 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000502 */
503void
504htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000505 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000506}
507
508/**
Owen Taylor3473f882001-02-23 17:55:21 +0000509 * htmlDocDumpMemory:
510 * @cur: the document
511 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000512 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000513 *
514 * Dump an HTML document in memory and return the xmlChar * and it's size.
515 * It's up to the caller to free the memory.
516 */
517void
518htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000519 xmlOutputBufferPtr buf;
520 xmlCharEncodingHandlerPtr handler = NULL;
521 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000522
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000523 xmlInitParser();
524
Daniel Veillardd5cc0f72004-11-06 19:24:28 +0000525 if ((mem == NULL) || (size == NULL))
526 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000527 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000528 *mem = NULL;
529 *size = 0;
530 return;
531 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000532
533 encoding = (const char *) htmlGetMetaEncoding(cur);
534
535 if (encoding != NULL) {
536 xmlCharEncoding enc;
537
538 enc = xmlParseCharEncoding(encoding);
539 if (enc != cur->charset) {
540 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
541 /*
542 * Not supported yet
543 */
544 *mem = NULL;
545 *size = 0;
546 return;
547 }
548
549 handler = xmlFindCharEncodingHandler(encoding);
550 if (handler == NULL) {
551 *mem = NULL;
552 *size = 0;
553 return;
554 }
Daniel Veillardb8c80162005-08-08 13:46:45 +0000555 } else {
556 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillard2d703722001-05-30 18:32:34 +0000557 }
558 }
559
560 /*
561 * Fallback to HTML or ASCII when the encoding is unspecified
562 */
563 if (handler == NULL)
564 handler = xmlFindCharEncodingHandler("HTML");
565 if (handler == NULL)
566 handler = xmlFindCharEncodingHandler("ascii");
567
568 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000569 if (buf == NULL) {
570 *mem = NULL;
571 *size = 0;
572 return;
573 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000574
575 htmlDocContentDumpOutput(buf, cur, NULL);
576 xmlOutputBufferFlush(buf);
577 if (buf->conv != NULL) {
578 *size = buf->conv->use;
579 *mem = xmlStrndup(buf->conv->content, *size);
580 } else {
581 *size = buf->buffer->use;
582 *mem = xmlStrndup(buf->buffer->content, *size);
583 }
584 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000585}
586
587
588/************************************************************************
589 * *
590 * Dumping HTML tree content to an I/O output buffer *
591 * *
592 ************************************************************************/
593
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000594void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000595
Owen Taylor3473f882001-02-23 17:55:21 +0000596/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000597 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000598 * @buf: the HTML buffer output
599 * @doc: the document
600 * @encoding: the encoding string
601 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000602 * TODO: check whether encoding is needed
603 *
Owen Taylor3473f882001-02-23 17:55:21 +0000604 * Dump the HTML document DTD, if any.
605 */
606static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000607htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000608 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000609 xmlDtdPtr cur = doc->intSubset;
610
611 if (cur == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000612 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000613 return;
614 }
615 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
616 xmlOutputBufferWriteString(buf, (const char *)cur->name);
617 if (cur->ExternalID != NULL) {
618 xmlOutputBufferWriteString(buf, " PUBLIC ");
619 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
620 if (cur->SystemID != NULL) {
621 xmlOutputBufferWriteString(buf, " ");
622 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
623 }
624 } else if (cur->SystemID != NULL) {
625 xmlOutputBufferWriteString(buf, " SYSTEM ");
626 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
627 }
628 xmlOutputBufferWriteString(buf, ">\n");
629}
630
631/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000632 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000633 * @buf: the HTML buffer output
634 * @doc: the document
635 * @cur: the attribute pointer
636 * @encoding: the encoding string
637 *
638 * Dump an HTML attribute
639 */
640static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000641htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000642 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000643 xmlChar *value;
644
Daniel Veillardeca60d02001-06-13 07:45:41 +0000645 /*
646 * TODO: The html output method should not escape a & character
647 * occurring in an attribute value immediately followed by
648 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
649 */
650
Owen Taylor3473f882001-02-23 17:55:21 +0000651 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000652 return;
653 }
654 xmlOutputBufferWriteString(buf, " ");
William M. Brack3a6da762003-09-15 04:58:14 +0000655 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
656 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
657 xmlOutputBufferWriteString(buf, ":");
658 }
Owen Taylor3473f882001-02-23 17:55:21 +0000659 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000660 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000661 value = xmlNodeListGetString(doc, cur->children, 0);
662 if (value) {
663 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000664 if ((cur->ns == NULL) && (cur->parent != NULL) &&
665 (cur->parent->ns == NULL) &&
666 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
667 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
Daniel Veillardaa9a9832005-03-29 20:30:17 +0000668 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
669 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
670 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000671 xmlChar *escaped;
672 xmlChar *tmp = value;
673
William M. Brack76e95df2003-10-18 16:20:14 +0000674 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillardeb475a32002-04-14 22:00:22 +0000675
Daniel Veillard5f5b7bb2003-05-16 17:19:40 +0000676 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000677 if (escaped != NULL) {
678 xmlBufferWriteQuotedString(buf->buffer, escaped);
679 xmlFree(escaped);
680 } else {
681 xmlBufferWriteQuotedString(buf->buffer, value);
682 }
683 } else {
684 xmlBufferWriteQuotedString(buf->buffer, value);
685 }
Owen Taylor3473f882001-02-23 17:55:21 +0000686 xmlFree(value);
687 } else {
688 xmlOutputBufferWriteString(buf, "=\"\"");
689 }
690 }
691}
692
693/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000694 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000695 * @buf: the HTML buffer output
696 * @doc: the document
697 * @cur: the first attribute pointer
698 * @encoding: the encoding string
699 *
700 * Dump a list of HTML attributes
701 */
702static void
703htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
704 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000705 return;
706 }
707 while (cur != NULL) {
708 htmlAttrDumpOutput(buf, doc, cur, encoding);
709 cur = cur->next;
710 }
711}
712
713
Owen Taylor3473f882001-02-23 17:55:21 +0000714
715/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000716 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000717 * @buf: the HTML buffer output
718 * @doc: the document
719 * @cur: the first node
720 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000721 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000722 *
723 * Dump an HTML node list, recursive behaviour,children are printed too.
724 */
725static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000726htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
727 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000728 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000729 return;
730 }
731 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000732 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000733 cur = cur->next;
734 }
735}
736
737/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000738 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000739 * @buf: the HTML buffer output
740 * @doc: the document
741 * @cur: the current node
742 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000743 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000744 *
745 * Dump an HTML node, recursive behaviour,children are printed too.
746 */
747void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000748htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
749 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000750 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000751
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000752 xmlInitParser();
753
Daniel Veillardce244ad2004-11-05 10:03:46 +0000754 if ((cur == NULL) || (buf == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000755 return;
756 }
757 /*
758 * Special cases.
759 */
760 if (cur->type == XML_DTD_NODE)
761 return;
Daniel Veillardce244ad2004-11-05 10:03:46 +0000762 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
763 (cur->type == XML_DOCUMENT_NODE)){
Owen Taylor3473f882001-02-23 17:55:21 +0000764 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
765 return;
766 }
767 if (cur->type == HTML_TEXT_NODE) {
768 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000769 if (((cur->name == (const xmlChar *)xmlStringText) ||
770 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000771 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000772 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
773 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000774 xmlChar *buffer;
775
Owen Taylor3473f882001-02-23 17:55:21 +0000776 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000777 if (buffer != NULL) {
778 xmlOutputBufferWriteString(buf, (const char *)buffer);
779 xmlFree(buffer);
780 }
781 } else {
782 xmlOutputBufferWriteString(buf, (const char *)cur->content);
783 }
784 }
785 return;
786 }
787 if (cur->type == HTML_COMMENT_NODE) {
788 if (cur->content != NULL) {
789 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000790 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000791 xmlOutputBufferWriteString(buf, "-->");
792 }
793 return;
794 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000795 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000796 if (cur->name == NULL)
797 return;
798 xmlOutputBufferWriteString(buf, "<?");
799 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000800 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000801 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000802 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000803 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000804 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000805 return;
806 }
Owen Taylor3473f882001-02-23 17:55:21 +0000807 if (cur->type == HTML_ENTITY_REF_NODE) {
808 xmlOutputBufferWriteString(buf, "&");
809 xmlOutputBufferWriteString(buf, (const char *)cur->name);
810 xmlOutputBufferWriteString(buf, ";");
811 return;
812 }
813 if (cur->type == HTML_PRESERVE_NODE) {
814 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000815 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000816 }
817 return;
818 }
819
820 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000821 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000822 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000823 if (cur->ns == NULL)
824 info = htmlTagLookup(cur->name);
825 else
826 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000827
828 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000829 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
830 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
831 xmlOutputBufferWriteString(buf, ":");
832 }
Owen Taylor3473f882001-02-23 17:55:21 +0000833 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000834 if (cur->nsDef)
835 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000836 if (cur->properties != NULL)
837 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
838
839 if ((info != NULL) && (info->empty)) {
840 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000841 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000842 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000843 (cur->next->type != HTML_ENTITY_REF_NODE) &&
844 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000845 (cur->parent->name != NULL) &&
846 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000847 xmlOutputBufferWriteString(buf, "\n");
848 }
849 return;
850 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000851 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
852 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000853 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000854 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
855 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000856 xmlOutputBufferWriteString(buf, ">");
857 } else {
858 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000859 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
860 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
861 xmlOutputBufferWriteString(buf, ":");
862 }
Owen Taylor3473f882001-02-23 17:55:21 +0000863 xmlOutputBufferWriteString(buf, (const char *)cur->name);
864 xmlOutputBufferWriteString(buf, ">");
865 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000866 if ((format) && (cur->next != NULL) &&
867 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000868 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000869 (cur->next->type != HTML_ENTITY_REF_NODE) &&
870 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000871 (cur->parent->name != NULL) &&
872 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000873 xmlOutputBufferWriteString(buf, "\n");
874 }
875 return;
876 }
877 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000878 if ((cur->type != XML_ELEMENT_NODE) &&
879 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000880 /*
881 * Uses the OutputBuffer property to automatically convert
882 * invalids to charrefs
883 */
884
Owen Taylor3473f882001-02-23 17:55:21 +0000885 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000886 }
887 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000888 if ((format) && (info != NULL) && (!info->isinline) &&
889 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000890 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000891 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000892 (cur->name != NULL) &&
893 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000894 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000895 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000896 if ((format) && (info != NULL) && (!info->isinline) &&
897 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000898 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000899 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000900 (cur->name != NULL) &&
901 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000902 xmlOutputBufferWriteString(buf, "\n");
903 }
Owen Taylor3473f882001-02-23 17:55:21 +0000904 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000905 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
906 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
907 xmlOutputBufferWriteString(buf, ":");
908 }
Owen Taylor3473f882001-02-23 17:55:21 +0000909 xmlOutputBufferWriteString(buf, (const char *)cur->name);
910 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000911 if ((format) && (info != NULL) && (!info->isinline) &&
912 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000913 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000914 (cur->next->type != HTML_ENTITY_REF_NODE) &&
915 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000916 (cur->parent->name != NULL) &&
917 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000918 xmlOutputBufferWriteString(buf, "\n");
919 }
920}
921
922/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000923 * htmlNodeDumpOutput:
924 * @buf: the HTML buffer output
925 * @doc: the document
926 * @cur: the current node
927 * @encoding: the encoding string
928 *
929 * Dump an HTML node, recursive behaviour,children are printed too,
930 * and formatting returns/spaces are added.
931 */
932void
933htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
934 xmlNodePtr cur, const char *encoding) {
935 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
936}
937
938/**
939 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000940 * @buf: the HTML buffer output
941 * @cur: the document
942 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +0000943 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000944 *
945 * Dump an HTML document.
946 */
947void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000948htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
949 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000950 int type;
951
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000952 xmlInitParser();
953
Daniel Veillard3d97e662004-11-04 10:49:00 +0000954 if ((buf == NULL) || (cur == NULL))
955 return;
956
Owen Taylor3473f882001-02-23 17:55:21 +0000957 /*
958 * force to output the stuff as HTML, especially for entities
959 */
960 type = cur->type;
961 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +0000962 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000963 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000964 }
965 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000966 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000967 }
968 xmlOutputBufferWriteString(buf, "\n");
969 cur->type = (xmlElementType) type;
970}
971
Daniel Veillard95d845f2001-06-13 13:48:46 +0000972/**
973 * htmlDocContentDumpOutput:
974 * @buf: the HTML buffer output
975 * @cur: the document
976 * @encoding: the encoding string
977 *
978 * Dump an HTML document. Formating return/spaces are added.
979 */
980void
981htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
982 const char *encoding) {
983 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
984}
985
Owen Taylor3473f882001-02-23 17:55:21 +0000986/************************************************************************
987 * *
988 * Saving functions front-ends *
989 * *
990 ************************************************************************/
991
992/**
993 * htmlDocDump:
994 * @f: the FILE*
995 * @cur: the document
996 *
997 * Dump an HTML document to an open FILE.
998 *
999 * returns: the number of byte written or -1 in case of failure.
1000 */
1001int
1002htmlDocDump(FILE *f, xmlDocPtr cur) {
1003 xmlOutputBufferPtr buf;
1004 xmlCharEncodingHandlerPtr handler = NULL;
1005 const char *encoding;
1006 int ret;
1007
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001008 xmlInitParser();
1009
Daniel Veillard3d97e662004-11-04 10:49:00 +00001010 if ((cur == NULL) || (f == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001011 return(-1);
1012 }
1013
1014 encoding = (const char *) htmlGetMetaEncoding(cur);
1015
1016 if (encoding != NULL) {
1017 xmlCharEncoding enc;
1018
1019 enc = xmlParseCharEncoding(encoding);
1020 if (enc != cur->charset) {
1021 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1022 /*
1023 * Not supported yet
1024 */
1025 return(-1);
1026 }
1027
1028 handler = xmlFindCharEncodingHandler(encoding);
1029 if (handler == NULL)
1030 return(-1);
Daniel Veillardb8c80162005-08-08 13:46:45 +00001031 } else {
1032 handler = xmlFindCharEncodingHandler(encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001033 }
1034 }
1035
1036 /*
1037 * Fallback to HTML or ASCII when the encoding is unspecified
1038 */
1039 if (handler == NULL)
1040 handler = xmlFindCharEncodingHandler("HTML");
1041 if (handler == NULL)
1042 handler = xmlFindCharEncodingHandler("ascii");
1043
1044 buf = xmlOutputBufferCreateFile(f, handler);
1045 if (buf == NULL) return(-1);
1046 htmlDocContentDumpOutput(buf, cur, NULL);
1047
1048 ret = xmlOutputBufferClose(buf);
1049 return(ret);
1050}
1051
1052/**
1053 * htmlSaveFile:
1054 * @filename: the filename (or URL)
1055 * @cur: the document
1056 *
1057 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1058 * used.
1059 * returns: the number of byte written or -1 in case of failure.
1060 */
1061int
1062htmlSaveFile(const char *filename, xmlDocPtr cur) {
1063 xmlOutputBufferPtr buf;
1064 xmlCharEncodingHandlerPtr handler = NULL;
1065 const char *encoding;
1066 int ret;
1067
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001068 if ((cur == NULL) || (filename == NULL))
1069 return(-1);
1070
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001071 xmlInitParser();
1072
Owen Taylor3473f882001-02-23 17:55:21 +00001073 encoding = (const char *) htmlGetMetaEncoding(cur);
1074
1075 if (encoding != NULL) {
1076 xmlCharEncoding enc;
1077
1078 enc = xmlParseCharEncoding(encoding);
1079 if (enc != cur->charset) {
1080 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1081 /*
1082 * Not supported yet
1083 */
1084 return(-1);
1085 }
1086
1087 handler = xmlFindCharEncodingHandler(encoding);
1088 if (handler == NULL)
1089 return(-1);
1090 }
1091 }
1092
1093 /*
1094 * Fallback to HTML or ASCII when the encoding is unspecified
1095 */
1096 if (handler == NULL)
1097 handler = xmlFindCharEncodingHandler("HTML");
1098 if (handler == NULL)
1099 handler = xmlFindCharEncodingHandler("ascii");
1100
1101 /*
1102 * save the content to a temp buffer.
1103 */
1104 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1105 if (buf == NULL) return(0);
1106
1107 htmlDocContentDumpOutput(buf, cur, NULL);
1108
1109 ret = xmlOutputBufferClose(buf);
1110 return(ret);
1111}
1112
1113/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001114 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001115 * @filename: the filename
1116 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001117 * @format: should formatting spaces been added
1118 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001119 *
1120 * Dump an HTML document to a file using a given encoding.
1121 *
1122 * returns: the number of byte written or -1 in case of failure.
1123 */
1124int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001125htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1126 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001127 xmlOutputBufferPtr buf;
1128 xmlCharEncodingHandlerPtr handler = NULL;
1129 int ret;
1130
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001131 if ((cur == NULL) || (filename == NULL))
1132 return(-1);
1133
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001134 xmlInitParser();
1135
Owen Taylor3473f882001-02-23 17:55:21 +00001136 if (encoding != NULL) {
1137 xmlCharEncoding enc;
1138
1139 enc = xmlParseCharEncoding(encoding);
1140 if (enc != cur->charset) {
1141 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1142 /*
1143 * Not supported yet
1144 */
1145 return(-1);
1146 }
1147
1148 handler = xmlFindCharEncodingHandler(encoding);
1149 if (handler == NULL)
1150 return(-1);
1151 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1152 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001153 } else {
1154 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001155 }
1156
1157 /*
1158 * Fallback to HTML or ASCII when the encoding is unspecified
1159 */
1160 if (handler == NULL)
1161 handler = xmlFindCharEncodingHandler("HTML");
1162 if (handler == NULL)
1163 handler = xmlFindCharEncodingHandler("ascii");
1164
1165 /*
1166 * save the content to a temp buffer.
1167 */
1168 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1169 if (buf == NULL) return(0);
1170
Daniel Veillard95d845f2001-06-13 13:48:46 +00001171 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001172
1173 ret = xmlOutputBufferClose(buf);
1174 return(ret);
1175}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001176
1177/**
1178 * htmlSaveFileEnc:
1179 * @filename: the filename
1180 * @cur: the document
1181 * @encoding: the document encoding
1182 *
1183 * Dump an HTML document to a file using a given encoding
1184 * and formatting returns/spaces are added.
1185 *
1186 * returns: the number of byte written or -1 in case of failure.
1187 */
1188int
1189htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1190 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1191}
1192
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001193#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardc084e472002-08-12 13:27:28 +00001194
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001195#define bottom_HTMLtree
1196#include "elfgcchack.h"
Owen Taylor3473f882001-02-23 17:55:21 +00001197#endif /* LIBXML_HTML_ENABLED */