blob: 37999f7927362dfd78c2b371e2066e59a663ffdb [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
33/************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc: the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
52
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
56
57 /*
58 * Search the html
59 */
60 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000062 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
68 }
69 cur = cur->next;
70 }
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
74
75 /*
76 * Search the head
77 */
78 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000080 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89found_head:
90 cur = cur->children;
91
92 /*
93 * Search the meta elements
94 */
95found_meta:
96 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
102
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
William M. Brack13dfa872004-09-18 04:52:08 +0000173 (char *)encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000218 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000232 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
245
246 content = NULL;
247 http = 0;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000252 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000253 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
255 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000256 else
257 {
258 if ((value != NULL) &&
259 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
260 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000262 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000263 break;
264 }
265 attr = attr->next;
266 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000267 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 meta = cur;
269 cur = cur->next;
270 xmlUnlinkNode(meta);
271 xmlFreeNode(meta);
272 continue;
273 }
274
275 }
276 }
277 cur = cur->next;
278 }
279 return(0);
280}
281
Daniel Veillardc084e472002-08-12 13:27:28 +0000282/**
283 * booleanHTMLAttrs:
284 *
285 * These are the HTML attributes which will be output
286 * in minimized form, i.e. <option selected="selected"> will be
287 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
288 *
289 */
290static const char* htmlBooleanAttrs[] = {
291 "checked", "compact", "declare", "defer", "disabled", "ismap",
292 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
293 "selected", NULL
294};
295
296
297/**
298 * htmlIsBooleanAttr:
299 * @name: the name of the attribute to check
300 *
301 * Determine if a given attribute is a boolean attribute.
302 *
303 * returns: false if the attribute is not boolean, true otherwise.
304 */
305int
306htmlIsBooleanAttr(const xmlChar *name)
307{
308 int i = 0;
309
310 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000311 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000312 return 1;
313 i++;
314 }
315 return 0;
316}
317
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000318#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardda3fee42008-09-01 13:08:57 +0000319/*
320 * private routine exported from xmlIO.c
321 */
322xmlOutputBufferPtr
323xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
Owen Taylor3473f882001-02-23 17:55:21 +0000324/************************************************************************
325 * *
Daniel Veillarde2238d52003-10-09 13:14:55 +0000326 * Output error handlers *
327 * *
328 ************************************************************************/
329/**
330 * htmlSaveErrMemory:
331 * @extra: extra informations
332 *
333 * Handle an out of memory condition
334 */
335static void
336htmlSaveErrMemory(const char *extra)
337{
338 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
339}
340
341/**
342 * htmlSaveErr:
343 * @code: the error number
344 * @node: the location of the error.
345 * @extra: extra informations
346 *
347 * Handle an out of memory condition
348 */
349static void
350htmlSaveErr(int code, xmlNodePtr node, const char *extra)
351{
352 const char *msg = NULL;
353
354 switch(code) {
355 case XML_SAVE_NOT_UTF8:
Rob Richards417b74d2006-08-15 23:14:24 +0000356 msg = "string is not in UTF-8\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000357 break;
358 case XML_SAVE_CHAR_INVALID:
Rob Richards417b74d2006-08-15 23:14:24 +0000359 msg = "invalid character value\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000360 break;
361 case XML_SAVE_UNKNOWN_ENCODING:
Rob Richards417b74d2006-08-15 23:14:24 +0000362 msg = "unknown encoding %s\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000363 break;
364 case XML_SAVE_NO_DOCTYPE:
Rob Richards417b74d2006-08-15 23:14:24 +0000365 msg = "HTML has no DOCTYPE\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000366 break;
367 default:
Rob Richards417b74d2006-08-15 23:14:24 +0000368 msg = "unexpected error number\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000369 }
370 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
371}
372
373/************************************************************************
374 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000375 * Dumping HTML tree content to a simple buffer *
376 * *
377 ************************************************************************/
378
Daniel Veillard8db67d22002-11-27 19:39:27 +0000379static int
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000380htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
381 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000382
383/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000384 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000385 * @buf: the HTML buffer output
386 * @doc: the document
387 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000388 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000389 *
390 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000391 *
392 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000393 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000394static int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000395htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
396 int format) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000397 unsigned int use;
398 int ret;
399 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000400
401 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000402 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000403 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000404 if (buf == NULL) {
405 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000406 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000407 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
408 if (outbuf == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000409 htmlSaveErrMemory("allocating HTML output buffer");
Daniel Veillard8db67d22002-11-27 19:39:27 +0000410 return (-1);
411 }
412 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
413 outbuf->buffer = buf;
414 outbuf->encoder = NULL;
415 outbuf->writecallback = NULL;
416 outbuf->closecallback = NULL;
417 outbuf->context = NULL;
418 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000419
Daniel Veillard8db67d22002-11-27 19:39:27 +0000420 use = buf->use;
421 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
422 xmlFree(outbuf);
423 ret = buf->use - use;
424 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000425}
426
427/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000428 * htmlNodeDump:
429 * @buf: the HTML buffer output
430 * @doc: the document
431 * @cur: the current node
432 *
433 * Dump an HTML node, recursive behaviour,children are printed too,
434 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000435 *
436 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000437 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000438int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000439htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000440 xmlInitParser();
441
Daniel Veillard8db67d22002-11-27 19:39:27 +0000442 return(htmlNodeDumpFormat(buf, doc, cur, 1));
Daniel Veillard95d845f2001-06-13 13:48:46 +0000443}
444
445/**
446 * htmlNodeDumpFileFormat:
447 * @out: the FILE pointer
448 * @doc: the document
449 * @cur: the current node
450 * @encoding: the document encoding
451 * @format: should formatting spaces been added
452 *
453 * Dump an HTML node, recursive behaviour,children are printed too.
454 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000455 * TODO: if encoding == NULL try to save in the doc encoding
456 *
457 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000458 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000459int
460htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
461 xmlNodePtr cur, const char *encoding, int format) {
462 xmlOutputBufferPtr buf;
463 xmlCharEncodingHandlerPtr handler = NULL;
464 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000465
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000466 xmlInitParser();
467
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000468 if (encoding != NULL) {
469 xmlCharEncoding enc;
470
471 enc = xmlParseCharEncoding(encoding);
472 if (enc != XML_CHAR_ENCODING_UTF8) {
473 handler = xmlFindCharEncodingHandler(encoding);
474 if (handler == NULL)
475 return(-1);
476 }
477 }
478
479 /*
480 * Fallback to HTML or ASCII when the encoding is unspecified
481 */
482 if (handler == NULL)
483 handler = xmlFindCharEncodingHandler("HTML");
484 if (handler == NULL)
485 handler = xmlFindCharEncodingHandler("ascii");
486
487 /*
488 * save the content to a temp buffer.
489 */
490 buf = xmlOutputBufferCreateFile(out, handler);
491 if (buf == NULL) return(0);
492
493 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
494
495 ret = xmlOutputBufferClose(buf);
496 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000497}
498
499/**
Owen Taylor3473f882001-02-23 17:55:21 +0000500 * htmlNodeDumpFile:
501 * @out: the FILE pointer
502 * @doc: the document
503 * @cur: the current node
504 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000505 * Dump an HTML node, recursive behaviour,children are printed too,
506 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000507 */
508void
509htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000510 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000511}
512
513/**
Rob Richards77b92ff2005-12-20 15:55:14 +0000514 * htmlDocDumpMemoryFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000515 * @cur: the document
516 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000517 * @size: OUT: the memory length
Rob Richards77b92ff2005-12-20 15:55:14 +0000518 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000519 *
520 * Dump an HTML document in memory and return the xmlChar * and it's size.
521 * It's up to the caller to free the memory.
522 */
523void
Rob Richards77b92ff2005-12-20 15:55:14 +0000524htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000525 xmlOutputBufferPtr buf;
526 xmlCharEncodingHandlerPtr handler = NULL;
527 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000528
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000529 xmlInitParser();
530
Daniel Veillardd5cc0f72004-11-06 19:24:28 +0000531 if ((mem == NULL) || (size == NULL))
532 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000533 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000534 *mem = NULL;
535 *size = 0;
536 return;
537 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000538
539 encoding = (const char *) htmlGetMetaEncoding(cur);
540
541 if (encoding != NULL) {
542 xmlCharEncoding enc;
543
544 enc = xmlParseCharEncoding(encoding);
545 if (enc != cur->charset) {
546 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
547 /*
548 * Not supported yet
549 */
550 *mem = NULL;
551 *size = 0;
552 return;
553 }
554
555 handler = xmlFindCharEncodingHandler(encoding);
556 if (handler == NULL) {
557 *mem = NULL;
558 *size = 0;
559 return;
560 }
Daniel Veillardb8c80162005-08-08 13:46:45 +0000561 } else {
562 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillard2d703722001-05-30 18:32:34 +0000563 }
564 }
565
566 /*
567 * Fallback to HTML or ASCII when the encoding is unspecified
568 */
569 if (handler == NULL)
570 handler = xmlFindCharEncodingHandler("HTML");
571 if (handler == NULL)
572 handler = xmlFindCharEncodingHandler("ascii");
573
Daniel Veillardda3fee42008-09-01 13:08:57 +0000574 buf = xmlAllocOutputBufferInternal(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000575 if (buf == NULL) {
576 *mem = NULL;
577 *size = 0;
578 return;
579 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000580
Rob Richards77b92ff2005-12-20 15:55:14 +0000581 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
582
Daniel Veillard2d703722001-05-30 18:32:34 +0000583 xmlOutputBufferFlush(buf);
584 if (buf->conv != NULL) {
585 *size = buf->conv->use;
586 *mem = xmlStrndup(buf->conv->content, *size);
587 } else {
588 *size = buf->buffer->use;
589 *mem = xmlStrndup(buf->buffer->content, *size);
590 }
591 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000592}
593
Rob Richards77b92ff2005-12-20 15:55:14 +0000594/**
595 * htmlDocDumpMemory:
596 * @cur: the document
597 * @mem: OUT: the memory pointer
598 * @size: OUT: the memory length
599 *
600 * Dump an HTML document in memory and return the xmlChar * and it's size.
601 * It's up to the caller to free the memory.
602 */
603void
604htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
605 htmlDocDumpMemoryFormat(cur, mem, size, 1);
606}
607
Owen Taylor3473f882001-02-23 17:55:21 +0000608
609/************************************************************************
610 * *
611 * Dumping HTML tree content to an I/O output buffer *
612 * *
613 ************************************************************************/
614
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000615void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000616
Owen Taylor3473f882001-02-23 17:55:21 +0000617/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000618 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000619 * @buf: the HTML buffer output
620 * @doc: the document
621 * @encoding: the encoding string
622 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000623 * TODO: check whether encoding is needed
624 *
Owen Taylor3473f882001-02-23 17:55:21 +0000625 * Dump the HTML document DTD, if any.
626 */
627static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000628htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000629 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000630 xmlDtdPtr cur = doc->intSubset;
631
632 if (cur == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000633 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000634 return;
635 }
636 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
637 xmlOutputBufferWriteString(buf, (const char *)cur->name);
638 if (cur->ExternalID != NULL) {
639 xmlOutputBufferWriteString(buf, " PUBLIC ");
640 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
641 if (cur->SystemID != NULL) {
642 xmlOutputBufferWriteString(buf, " ");
643 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
644 }
645 } else if (cur->SystemID != NULL) {
646 xmlOutputBufferWriteString(buf, " SYSTEM ");
647 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
648 }
649 xmlOutputBufferWriteString(buf, ">\n");
650}
651
652/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000653 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000654 * @buf: the HTML buffer output
655 * @doc: the document
656 * @cur: the attribute pointer
657 * @encoding: the encoding string
658 *
659 * Dump an HTML attribute
660 */
661static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000662htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000663 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000664 xmlChar *value;
665
Daniel Veillardeca60d02001-06-13 07:45:41 +0000666 /*
667 * TODO: The html output method should not escape a & character
668 * occurring in an attribute value immediately followed by
669 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
670 */
671
Owen Taylor3473f882001-02-23 17:55:21 +0000672 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000673 return;
674 }
675 xmlOutputBufferWriteString(buf, " ");
William M. Brack3a6da762003-09-15 04:58:14 +0000676 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
677 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
678 xmlOutputBufferWriteString(buf, ":");
679 }
Owen Taylor3473f882001-02-23 17:55:21 +0000680 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000681 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000682 value = xmlNodeListGetString(doc, cur->children, 0);
683 if (value) {
684 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000685 if ((cur->ns == NULL) && (cur->parent != NULL) &&
686 (cur->parent->ns == NULL) &&
687 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
688 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
Daniel Veillardaa9a9832005-03-29 20:30:17 +0000689 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
690 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
691 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000692 xmlChar *escaped;
693 xmlChar *tmp = value;
694
William M. Brack76e95df2003-10-18 16:20:14 +0000695 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillardeb475a32002-04-14 22:00:22 +0000696
Daniel Veillard5f5b7bb2003-05-16 17:19:40 +0000697 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000698 if (escaped != NULL) {
699 xmlBufferWriteQuotedString(buf->buffer, escaped);
700 xmlFree(escaped);
701 } else {
702 xmlBufferWriteQuotedString(buf->buffer, value);
703 }
704 } else {
705 xmlBufferWriteQuotedString(buf->buffer, value);
706 }
Owen Taylor3473f882001-02-23 17:55:21 +0000707 xmlFree(value);
708 } else {
709 xmlOutputBufferWriteString(buf, "=\"\"");
710 }
711 }
712}
713
714/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000715 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000716 * @buf: the HTML buffer output
717 * @doc: the document
718 * @cur: the first attribute pointer
719 * @encoding: the encoding string
720 *
721 * Dump a list of HTML attributes
722 */
723static void
724htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
725 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000726 return;
727 }
728 while (cur != NULL) {
729 htmlAttrDumpOutput(buf, doc, cur, encoding);
730 cur = cur->next;
731 }
732}
733
734
Owen Taylor3473f882001-02-23 17:55:21 +0000735
736/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000737 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000738 * @buf: the HTML buffer output
739 * @doc: the document
740 * @cur: the first node
741 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000742 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000743 *
744 * Dump an HTML node list, recursive behaviour,children are printed too.
745 */
746static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000747htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
748 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000749 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000750 return;
751 }
752 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000753 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000754 cur = cur->next;
755 }
756}
757
758/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000759 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000760 * @buf: the HTML buffer output
761 * @doc: the document
762 * @cur: the current node
763 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000764 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000765 *
766 * Dump an HTML node, recursive behaviour,children are printed too.
767 */
768void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000769htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
770 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000771 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000772
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000773 xmlInitParser();
774
Daniel Veillardce244ad2004-11-05 10:03:46 +0000775 if ((cur == NULL) || (buf == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000776 return;
777 }
778 /*
779 * Special cases.
780 */
781 if (cur->type == XML_DTD_NODE)
782 return;
Daniel Veillardce244ad2004-11-05 10:03:46 +0000783 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
784 (cur->type == XML_DOCUMENT_NODE)){
Owen Taylor3473f882001-02-23 17:55:21 +0000785 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
786 return;
787 }
Daniel Veillardfcd02ad2007-06-12 09:49:40 +0000788 if (cur->type == XML_ATTRIBUTE_NODE) {
789 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
790 return;
791 }
Owen Taylor3473f882001-02-23 17:55:21 +0000792 if (cur->type == HTML_TEXT_NODE) {
793 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000794 if (((cur->name == (const xmlChar *)xmlStringText) ||
795 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000796 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000797 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
798 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000799 xmlChar *buffer;
800
Owen Taylor3473f882001-02-23 17:55:21 +0000801 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000802 if (buffer != NULL) {
803 xmlOutputBufferWriteString(buf, (const char *)buffer);
804 xmlFree(buffer);
805 }
806 } else {
807 xmlOutputBufferWriteString(buf, (const char *)cur->content);
808 }
809 }
810 return;
811 }
812 if (cur->type == HTML_COMMENT_NODE) {
813 if (cur->content != NULL) {
814 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000815 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000816 xmlOutputBufferWriteString(buf, "-->");
817 }
818 return;
819 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000820 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000821 if (cur->name == NULL)
822 return;
823 xmlOutputBufferWriteString(buf, "<?");
824 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000825 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000826 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000827 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000828 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000829 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000830 return;
831 }
Owen Taylor3473f882001-02-23 17:55:21 +0000832 if (cur->type == HTML_ENTITY_REF_NODE) {
833 xmlOutputBufferWriteString(buf, "&");
834 xmlOutputBufferWriteString(buf, (const char *)cur->name);
835 xmlOutputBufferWriteString(buf, ";");
836 return;
837 }
838 if (cur->type == HTML_PRESERVE_NODE) {
839 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000840 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000841 }
842 return;
843 }
844
845 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000846 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000847 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000848 if (cur->ns == NULL)
849 info = htmlTagLookup(cur->name);
850 else
851 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000852
853 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000854 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
855 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
856 xmlOutputBufferWriteString(buf, ":");
857 }
Owen Taylor3473f882001-02-23 17:55:21 +0000858 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000859 if (cur->nsDef)
860 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000861 if (cur->properties != NULL)
862 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
863
864 if ((info != NULL) && (info->empty)) {
865 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000866 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000867 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000868 (cur->next->type != HTML_ENTITY_REF_NODE) &&
869 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000870 (cur->parent->name != NULL) &&
871 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000872 xmlOutputBufferWriteString(buf, "\n");
873 }
874 return;
875 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000876 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
877 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000878 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000879 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
880 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000881 xmlOutputBufferWriteString(buf, ">");
882 } else {
883 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000884 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
885 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
886 xmlOutputBufferWriteString(buf, ":");
887 }
Owen Taylor3473f882001-02-23 17:55:21 +0000888 xmlOutputBufferWriteString(buf, (const char *)cur->name);
889 xmlOutputBufferWriteString(buf, ">");
890 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000891 if ((format) && (cur->next != NULL) &&
892 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000893 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000894 (cur->next->type != HTML_ENTITY_REF_NODE) &&
895 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000896 (cur->parent->name != NULL) &&
897 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000898 xmlOutputBufferWriteString(buf, "\n");
899 }
900 return;
901 }
902 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000903 if ((cur->type != XML_ELEMENT_NODE) &&
904 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000905 /*
906 * Uses the OutputBuffer property to automatically convert
907 * invalids to charrefs
908 */
909
Owen Taylor3473f882001-02-23 17:55:21 +0000910 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000911 }
912 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000913 if ((format) && (info != NULL) && (!info->isinline) &&
914 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000915 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000916 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000917 (cur->name != NULL) &&
918 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000919 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000920 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000921 if ((format) && (info != NULL) && (!info->isinline) &&
922 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000923 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000924 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000925 (cur->name != NULL) &&
926 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000927 xmlOutputBufferWriteString(buf, "\n");
928 }
Owen Taylor3473f882001-02-23 17:55:21 +0000929 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000930 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
931 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
932 xmlOutputBufferWriteString(buf, ":");
933 }
Owen Taylor3473f882001-02-23 17:55:21 +0000934 xmlOutputBufferWriteString(buf, (const char *)cur->name);
935 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000936 if ((format) && (info != NULL) && (!info->isinline) &&
937 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000938 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000939 (cur->next->type != HTML_ENTITY_REF_NODE) &&
940 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000941 (cur->parent->name != NULL) &&
942 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000943 xmlOutputBufferWriteString(buf, "\n");
944 }
945}
946
947/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000948 * htmlNodeDumpOutput:
949 * @buf: the HTML buffer output
950 * @doc: the document
951 * @cur: the current node
952 * @encoding: the encoding string
953 *
954 * Dump an HTML node, recursive behaviour,children are printed too,
955 * and formatting returns/spaces are added.
956 */
957void
958htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
959 xmlNodePtr cur, const char *encoding) {
960 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
961}
962
963/**
964 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000965 * @buf: the HTML buffer output
966 * @cur: the document
967 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +0000968 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000969 *
970 * Dump an HTML document.
971 */
972void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000973htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
974 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000975 int type;
976
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000977 xmlInitParser();
978
Daniel Veillard3d97e662004-11-04 10:49:00 +0000979 if ((buf == NULL) || (cur == NULL))
980 return;
981
Owen Taylor3473f882001-02-23 17:55:21 +0000982 /*
983 * force to output the stuff as HTML, especially for entities
984 */
985 type = cur->type;
986 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +0000987 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000988 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000989 }
990 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000991 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000992 }
993 xmlOutputBufferWriteString(buf, "\n");
994 cur->type = (xmlElementType) type;
995}
996
Daniel Veillard95d845f2001-06-13 13:48:46 +0000997/**
998 * htmlDocContentDumpOutput:
999 * @buf: the HTML buffer output
1000 * @cur: the document
1001 * @encoding: the encoding string
1002 *
1003 * Dump an HTML document. Formating return/spaces are added.
1004 */
1005void
1006htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1007 const char *encoding) {
1008 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1009}
1010
Owen Taylor3473f882001-02-23 17:55:21 +00001011/************************************************************************
1012 * *
1013 * Saving functions front-ends *
1014 * *
1015 ************************************************************************/
1016
1017/**
1018 * htmlDocDump:
1019 * @f: the FILE*
1020 * @cur: the document
1021 *
1022 * Dump an HTML document to an open FILE.
1023 *
1024 * returns: the number of byte written or -1 in case of failure.
1025 */
1026int
1027htmlDocDump(FILE *f, xmlDocPtr cur) {
1028 xmlOutputBufferPtr buf;
1029 xmlCharEncodingHandlerPtr handler = NULL;
1030 const char *encoding;
1031 int ret;
1032
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001033 xmlInitParser();
1034
Daniel Veillard3d97e662004-11-04 10:49:00 +00001035 if ((cur == NULL) || (f == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001036 return(-1);
1037 }
1038
1039 encoding = (const char *) htmlGetMetaEncoding(cur);
1040
1041 if (encoding != NULL) {
1042 xmlCharEncoding enc;
1043
1044 enc = xmlParseCharEncoding(encoding);
1045 if (enc != cur->charset) {
1046 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1047 /*
1048 * Not supported yet
1049 */
1050 return(-1);
1051 }
1052
1053 handler = xmlFindCharEncodingHandler(encoding);
1054 if (handler == NULL)
1055 return(-1);
Daniel Veillardb8c80162005-08-08 13:46:45 +00001056 } else {
1057 handler = xmlFindCharEncodingHandler(encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001058 }
1059 }
1060
1061 /*
1062 * Fallback to HTML or ASCII when the encoding is unspecified
1063 */
1064 if (handler == NULL)
1065 handler = xmlFindCharEncodingHandler("HTML");
1066 if (handler == NULL)
1067 handler = xmlFindCharEncodingHandler("ascii");
1068
1069 buf = xmlOutputBufferCreateFile(f, handler);
1070 if (buf == NULL) return(-1);
1071 htmlDocContentDumpOutput(buf, cur, NULL);
1072
1073 ret = xmlOutputBufferClose(buf);
1074 return(ret);
1075}
1076
1077/**
1078 * htmlSaveFile:
1079 * @filename: the filename (or URL)
1080 * @cur: the document
1081 *
1082 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1083 * used.
1084 * returns: the number of byte written or -1 in case of failure.
1085 */
1086int
1087htmlSaveFile(const char *filename, xmlDocPtr cur) {
1088 xmlOutputBufferPtr buf;
1089 xmlCharEncodingHandlerPtr handler = NULL;
1090 const char *encoding;
1091 int ret;
1092
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001093 if ((cur == NULL) || (filename == NULL))
1094 return(-1);
1095
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001096 xmlInitParser();
1097
Owen Taylor3473f882001-02-23 17:55:21 +00001098 encoding = (const char *) htmlGetMetaEncoding(cur);
1099
1100 if (encoding != NULL) {
1101 xmlCharEncoding enc;
1102
1103 enc = xmlParseCharEncoding(encoding);
1104 if (enc != cur->charset) {
1105 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1106 /*
1107 * Not supported yet
1108 */
1109 return(-1);
1110 }
1111
1112 handler = xmlFindCharEncodingHandler(encoding);
1113 if (handler == NULL)
1114 return(-1);
1115 }
1116 }
1117
1118 /*
1119 * Fallback to HTML or ASCII when the encoding is unspecified
1120 */
1121 if (handler == NULL)
1122 handler = xmlFindCharEncodingHandler("HTML");
1123 if (handler == NULL)
1124 handler = xmlFindCharEncodingHandler("ascii");
1125
1126 /*
1127 * save the content to a temp buffer.
1128 */
1129 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1130 if (buf == NULL) return(0);
1131
1132 htmlDocContentDumpOutput(buf, cur, NULL);
1133
1134 ret = xmlOutputBufferClose(buf);
1135 return(ret);
1136}
1137
1138/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001139 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001140 * @filename: the filename
1141 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001142 * @format: should formatting spaces been added
1143 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001144 *
1145 * Dump an HTML document to a file using a given encoding.
1146 *
1147 * returns: the number of byte written or -1 in case of failure.
1148 */
1149int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001150htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1151 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001152 xmlOutputBufferPtr buf;
1153 xmlCharEncodingHandlerPtr handler = NULL;
1154 int ret;
1155
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001156 if ((cur == NULL) || (filename == NULL))
1157 return(-1);
1158
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001159 xmlInitParser();
1160
Owen Taylor3473f882001-02-23 17:55:21 +00001161 if (encoding != NULL) {
1162 xmlCharEncoding enc;
1163
1164 enc = xmlParseCharEncoding(encoding);
1165 if (enc != cur->charset) {
1166 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1167 /*
1168 * Not supported yet
1169 */
1170 return(-1);
1171 }
1172
1173 handler = xmlFindCharEncodingHandler(encoding);
1174 if (handler == NULL)
1175 return(-1);
1176 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1177 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001178 } else {
1179 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001180 }
1181
1182 /*
1183 * Fallback to HTML or ASCII when the encoding is unspecified
1184 */
1185 if (handler == NULL)
1186 handler = xmlFindCharEncodingHandler("HTML");
1187 if (handler == NULL)
1188 handler = xmlFindCharEncodingHandler("ascii");
1189
1190 /*
1191 * save the content to a temp buffer.
1192 */
1193 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1194 if (buf == NULL) return(0);
1195
Daniel Veillard95d845f2001-06-13 13:48:46 +00001196 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001197
1198 ret = xmlOutputBufferClose(buf);
1199 return(ret);
1200}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001201
1202/**
1203 * htmlSaveFileEnc:
1204 * @filename: the filename
1205 * @cur: the document
1206 * @encoding: the document encoding
1207 *
1208 * Dump an HTML document to a file using a given encoding
1209 * and formatting returns/spaces are added.
1210 *
1211 * returns: the number of byte written or -1 in case of failure.
1212 */
1213int
1214htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1215 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1216}
1217
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001218#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardc084e472002-08-12 13:27:28 +00001219
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001220#define bottom_HTMLtree
1221#include "elfgcchack.h"
Owen Taylor3473f882001-02-23 17:55:21 +00001222#endif /* LIBXML_HTML_ENABLED */