blob: f23ae023f6f63b9658186ce80db35df8c0be2049 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
33/************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc: the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
52
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
56
57 /*
58 * Search the html
59 */
60 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000062 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
68 }
69 cur = cur->next;
70 }
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
74
75 /*
76 * Search the head
77 */
78 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000080 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89found_head:
90 cur = cur->children;
91
92 /*
93 * Search the meta elements
94 */
95found_meta:
96 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
102
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200163 htmlNodePtr cur, meta = NULL, head = NULL;
164 const xmlChar *content = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
Daniel Veillard74eb54b2009-08-12 15:59:01 +0200171 /* html isn't a real encoding it's just libxml2 way to get entities */
172 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
173 return(-1);
174
Owen Taylor3473f882001-02-23 17:55:21 +0000175 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000176 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
William M. Brack13dfa872004-09-18 04:52:08 +0000177 (char *)encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000178 newcontent[sizeof(newcontent) - 1] = 0;
179 }
180
181 cur = doc->children;
182
183 /*
184 * Search the html
185 */
186 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000187 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000188 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
189 break;
190 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
191 goto found_head;
192 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
193 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000194 }
195 cur = cur->next;
196 }
197 if (cur == NULL)
198 return(-1);
199 cur = cur->children;
200
201 /*
202 * Search the head
203 */
204 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000205 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000206 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
207 break;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200208 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
209 head = cur->parent;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000210 goto found_meta;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200211 }
Owen Taylor3473f882001-02-23 17:55:21 +0000212 }
213 cur = cur->next;
214 }
215 if (cur == NULL)
216 return(-1);
217found_head:
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200218 head = cur;
219 if (cur->children == NULL)
220 goto create;
Owen Taylor3473f882001-02-23 17:55:21 +0000221 cur = cur->children;
222
223found_meta:
Owen Taylor3473f882001-02-23 17:55:21 +0000224 /*
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200225 * Search and update all the remaining the meta elements carrying
Owen Taylor3473f882001-02-23 17:55:21 +0000226 * encoding informations
227 */
228 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000229 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000230 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000231 xmlAttrPtr attr = cur->properties;
232 int http;
233 const xmlChar *value;
234
235 content = NULL;
236 http = 0;
237 while (attr != NULL) {
238 if ((attr->children != NULL) &&
239 (attr->children->type == XML_TEXT_NODE) &&
240 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000241 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000242 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
243 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
244 http = 1;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200245 else
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000246 {
247 if ((value != NULL) &&
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200248 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
249 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000250 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000251 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000252 break;
253 }
254 attr = attr->next;
255 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000256 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000257 meta = cur;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200258 break;
Owen Taylor3473f882001-02-23 17:55:21 +0000259 }
260
261 }
262 }
263 cur = cur->next;
264 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200265create:
266 if (meta == NULL) {
267 if ((encoding != NULL) && (head != NULL)) {
268 /*
269 * Create a new Meta element with the right attributes
270 */
271
272 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
273 if (head->children == NULL)
274 xmlAddChild(head, meta);
275 else
276 xmlAddPrevSibling(head->children, meta);
277 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
278 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
279 }
280 } else {
281 /* change the document only if there is a real encoding change */
282 if (xmlStrcasestr(content, encoding) == NULL) {
283 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
284 }
285 }
286
287
Owen Taylor3473f882001-02-23 17:55:21 +0000288 return(0);
289}
290
Daniel Veillardc084e472002-08-12 13:27:28 +0000291/**
292 * booleanHTMLAttrs:
293 *
294 * These are the HTML attributes which will be output
295 * in minimized form, i.e. <option selected="selected"> will be
296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
297 *
298 */
299static const char* htmlBooleanAttrs[] = {
300 "checked", "compact", "declare", "defer", "disabled", "ismap",
301 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
302 "selected", NULL
303};
304
305
306/**
307 * htmlIsBooleanAttr:
308 * @name: the name of the attribute to check
309 *
310 * Determine if a given attribute is a boolean attribute.
311 *
312 * returns: false if the attribute is not boolean, true otherwise.
313 */
314int
315htmlIsBooleanAttr(const xmlChar *name)
316{
317 int i = 0;
318
319 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000320 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000321 return 1;
322 i++;
323 }
324 return 0;
325}
326
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000327#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardda3fee42008-09-01 13:08:57 +0000328/*
329 * private routine exported from xmlIO.c
330 */
331xmlOutputBufferPtr
332xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
Owen Taylor3473f882001-02-23 17:55:21 +0000333/************************************************************************
334 * *
Daniel Veillarde2238d52003-10-09 13:14:55 +0000335 * Output error handlers *
336 * *
337 ************************************************************************/
338/**
339 * htmlSaveErrMemory:
340 * @extra: extra informations
341 *
342 * Handle an out of memory condition
343 */
344static void
345htmlSaveErrMemory(const char *extra)
346{
347 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
348}
349
350/**
351 * htmlSaveErr:
352 * @code: the error number
353 * @node: the location of the error.
354 * @extra: extra informations
355 *
356 * Handle an out of memory condition
357 */
358static void
359htmlSaveErr(int code, xmlNodePtr node, const char *extra)
360{
361 const char *msg = NULL;
362
363 switch(code) {
364 case XML_SAVE_NOT_UTF8:
Rob Richards417b74d2006-08-15 23:14:24 +0000365 msg = "string is not in UTF-8\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000366 break;
367 case XML_SAVE_CHAR_INVALID:
Rob Richards417b74d2006-08-15 23:14:24 +0000368 msg = "invalid character value\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000369 break;
370 case XML_SAVE_UNKNOWN_ENCODING:
Rob Richards417b74d2006-08-15 23:14:24 +0000371 msg = "unknown encoding %s\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000372 break;
373 case XML_SAVE_NO_DOCTYPE:
Rob Richards417b74d2006-08-15 23:14:24 +0000374 msg = "HTML has no DOCTYPE\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000375 break;
376 default:
Rob Richards417b74d2006-08-15 23:14:24 +0000377 msg = "unexpected error number\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000378 }
379 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
380}
381
382/************************************************************************
383 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000384 * Dumping HTML tree content to a simple buffer *
385 * *
386 ************************************************************************/
387
Daniel Veillard8db67d22002-11-27 19:39:27 +0000388static int
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000389htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
390 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000391
392/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000393 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000394 * @buf: the HTML buffer output
395 * @doc: the document
396 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000397 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000398 *
399 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000400 *
401 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000402 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000403static int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000404htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
405 int format) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000406 unsigned int use;
407 int ret;
408 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000411 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000412 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000413 if (buf == NULL) {
414 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000415 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000416 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
417 if (outbuf == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000418 htmlSaveErrMemory("allocating HTML output buffer");
Daniel Veillard8db67d22002-11-27 19:39:27 +0000419 return (-1);
420 }
421 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
422 outbuf->buffer = buf;
423 outbuf->encoder = NULL;
424 outbuf->writecallback = NULL;
425 outbuf->closecallback = NULL;
426 outbuf->context = NULL;
427 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000428
Daniel Veillard8db67d22002-11-27 19:39:27 +0000429 use = buf->use;
430 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
431 xmlFree(outbuf);
432 ret = buf->use - use;
433 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000434}
435
436/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000437 * htmlNodeDump:
438 * @buf: the HTML buffer output
439 * @doc: the document
440 * @cur: the current node
441 *
442 * Dump an HTML node, recursive behaviour,children are printed too,
443 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000444 *
445 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000446 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000447int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000448htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000449 xmlInitParser();
450
Daniel Veillard8db67d22002-11-27 19:39:27 +0000451 return(htmlNodeDumpFormat(buf, doc, cur, 1));
Daniel Veillard95d845f2001-06-13 13:48:46 +0000452}
453
454/**
455 * htmlNodeDumpFileFormat:
456 * @out: the FILE pointer
457 * @doc: the document
458 * @cur: the current node
459 * @encoding: the document encoding
460 * @format: should formatting spaces been added
461 *
462 * Dump an HTML node, recursive behaviour,children are printed too.
463 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000464 * TODO: if encoding == NULL try to save in the doc encoding
465 *
466 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000467 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000468int
469htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
470 xmlNodePtr cur, const char *encoding, int format) {
471 xmlOutputBufferPtr buf;
472 xmlCharEncodingHandlerPtr handler = NULL;
473 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000474
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000475 xmlInitParser();
476
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000477 if (encoding != NULL) {
478 xmlCharEncoding enc;
479
480 enc = xmlParseCharEncoding(encoding);
481 if (enc != XML_CHAR_ENCODING_UTF8) {
482 handler = xmlFindCharEncodingHandler(encoding);
483 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +0800484 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000485 }
486 }
487
488 /*
489 * Fallback to HTML or ASCII when the encoding is unspecified
490 */
491 if (handler == NULL)
492 handler = xmlFindCharEncodingHandler("HTML");
493 if (handler == NULL)
494 handler = xmlFindCharEncodingHandler("ascii");
495
496 /*
497 * save the content to a temp buffer.
498 */
499 buf = xmlOutputBufferCreateFile(out, handler);
500 if (buf == NULL) return(0);
501
502 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
503
504 ret = xmlOutputBufferClose(buf);
505 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000506}
507
508/**
Owen Taylor3473f882001-02-23 17:55:21 +0000509 * htmlNodeDumpFile:
510 * @out: the FILE pointer
511 * @doc: the document
512 * @cur: the current node
513 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000514 * Dump an HTML node, recursive behaviour,children are printed too,
515 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000516 */
517void
518htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000519 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000520}
521
522/**
Rob Richards77b92ff2005-12-20 15:55:14 +0000523 * htmlDocDumpMemoryFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000524 * @cur: the document
525 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000526 * @size: OUT: the memory length
Rob Richards77b92ff2005-12-20 15:55:14 +0000527 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000528 *
529 * Dump an HTML document in memory and return the xmlChar * and it's size.
530 * It's up to the caller to free the memory.
531 */
532void
Rob Richards77b92ff2005-12-20 15:55:14 +0000533htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000534 xmlOutputBufferPtr buf;
535 xmlCharEncodingHandlerPtr handler = NULL;
536 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000537
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000538 xmlInitParser();
539
Daniel Veillardd5cc0f72004-11-06 19:24:28 +0000540 if ((mem == NULL) || (size == NULL))
541 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000542 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000543 *mem = NULL;
544 *size = 0;
545 return;
546 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000547
548 encoding = (const char *) htmlGetMetaEncoding(cur);
549
550 if (encoding != NULL) {
551 xmlCharEncoding enc;
552
553 enc = xmlParseCharEncoding(encoding);
554 if (enc != cur->charset) {
555 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
556 /*
557 * Not supported yet
558 */
559 *mem = NULL;
560 *size = 0;
561 return;
562 }
563
564 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillardc62efc82011-05-16 16:03:50 +0800565 if (handler == NULL)
566 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
567
Daniel Veillardb8c80162005-08-08 13:46:45 +0000568 } else {
569 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillard2d703722001-05-30 18:32:34 +0000570 }
571 }
572
573 /*
574 * Fallback to HTML or ASCII when the encoding is unspecified
575 */
576 if (handler == NULL)
577 handler = xmlFindCharEncodingHandler("HTML");
578 if (handler == NULL)
579 handler = xmlFindCharEncodingHandler("ascii");
580
Daniel Veillardda3fee42008-09-01 13:08:57 +0000581 buf = xmlAllocOutputBufferInternal(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000582 if (buf == NULL) {
583 *mem = NULL;
584 *size = 0;
585 return;
586 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000587
Daniel Veillardc62efc82011-05-16 16:03:50 +0800588 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
Rob Richards77b92ff2005-12-20 15:55:14 +0000589
Daniel Veillard2d703722001-05-30 18:32:34 +0000590 xmlOutputBufferFlush(buf);
591 if (buf->conv != NULL) {
592 *size = buf->conv->use;
593 *mem = xmlStrndup(buf->conv->content, *size);
594 } else {
595 *size = buf->buffer->use;
596 *mem = xmlStrndup(buf->buffer->content, *size);
597 }
598 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000599}
600
Rob Richards77b92ff2005-12-20 15:55:14 +0000601/**
602 * htmlDocDumpMemory:
603 * @cur: the document
604 * @mem: OUT: the memory pointer
605 * @size: OUT: the memory length
606 *
607 * Dump an HTML document in memory and return the xmlChar * and it's size.
608 * It's up to the caller to free the memory.
609 */
610void
611htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
612 htmlDocDumpMemoryFormat(cur, mem, size, 1);
613}
614
Owen Taylor3473f882001-02-23 17:55:21 +0000615
616/************************************************************************
617 * *
618 * Dumping HTML tree content to an I/O output buffer *
619 * *
620 ************************************************************************/
621
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000622void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000623
Owen Taylor3473f882001-02-23 17:55:21 +0000624/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000625 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000626 * @buf: the HTML buffer output
627 * @doc: the document
628 * @encoding: the encoding string
629 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000630 * TODO: check whether encoding is needed
631 *
Owen Taylor3473f882001-02-23 17:55:21 +0000632 * Dump the HTML document DTD, if any.
633 */
634static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000635htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000636 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000637 xmlDtdPtr cur = doc->intSubset;
638
639 if (cur == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000640 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000641 return;
642 }
643 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
644 xmlOutputBufferWriteString(buf, (const char *)cur->name);
645 if (cur->ExternalID != NULL) {
646 xmlOutputBufferWriteString(buf, " PUBLIC ");
647 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
648 if (cur->SystemID != NULL) {
649 xmlOutputBufferWriteString(buf, " ");
650 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
651 }
652 } else if (cur->SystemID != NULL) {
653 xmlOutputBufferWriteString(buf, " SYSTEM ");
654 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
655 }
656 xmlOutputBufferWriteString(buf, ">\n");
657}
658
659/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000660 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000661 * @buf: the HTML buffer output
662 * @doc: the document
663 * @cur: the attribute pointer
664 * @encoding: the encoding string
665 *
666 * Dump an HTML attribute
667 */
668static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000669htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000670 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000671 xmlChar *value;
672
Daniel Veillardeca60d02001-06-13 07:45:41 +0000673 /*
674 * TODO: The html output method should not escape a & character
675 * occurring in an attribute value immediately followed by
676 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
677 */
678
Owen Taylor3473f882001-02-23 17:55:21 +0000679 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000680 return;
681 }
682 xmlOutputBufferWriteString(buf, " ");
William M. Brack3a6da762003-09-15 04:58:14 +0000683 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
684 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
685 xmlOutputBufferWriteString(buf, ":");
686 }
Owen Taylor3473f882001-02-23 17:55:21 +0000687 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000688 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000689 value = xmlNodeListGetString(doc, cur->children, 0);
690 if (value) {
691 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000692 if ((cur->ns == NULL) && (cur->parent != NULL) &&
693 (cur->parent->ns == NULL) &&
694 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
695 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
Daniel Veillardaa9a9832005-03-29 20:30:17 +0000696 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
697 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
698 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000699 xmlChar *escaped;
700 xmlChar *tmp = value;
701
William M. Brack76e95df2003-10-18 16:20:14 +0000702 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillardeb475a32002-04-14 22:00:22 +0000703
Daniel Veillard5f5b7bb2003-05-16 17:19:40 +0000704 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000705 if (escaped != NULL) {
706 xmlBufferWriteQuotedString(buf->buffer, escaped);
707 xmlFree(escaped);
708 } else {
709 xmlBufferWriteQuotedString(buf->buffer, value);
710 }
711 } else {
712 xmlBufferWriteQuotedString(buf->buffer, value);
713 }
Owen Taylor3473f882001-02-23 17:55:21 +0000714 xmlFree(value);
715 } else {
716 xmlOutputBufferWriteString(buf, "=\"\"");
717 }
718 }
719}
720
721/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000722 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000723 * @buf: the HTML buffer output
724 * @doc: the document
725 * @cur: the first attribute pointer
726 * @encoding: the encoding string
727 *
728 * Dump a list of HTML attributes
729 */
730static void
731htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
732 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000733 return;
734 }
735 while (cur != NULL) {
736 htmlAttrDumpOutput(buf, doc, cur, encoding);
737 cur = cur->next;
738 }
739}
740
741
Owen Taylor3473f882001-02-23 17:55:21 +0000742
743/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000744 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000745 * @buf: the HTML buffer output
746 * @doc: the document
747 * @cur: the first node
748 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000749 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000750 *
751 * Dump an HTML node list, recursive behaviour,children are printed too.
752 */
753static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000754htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
755 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000756 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000757 return;
758 }
759 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000760 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000761 cur = cur->next;
762 }
763}
764
765/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000766 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000767 * @buf: the HTML buffer output
768 * @doc: the document
769 * @cur: the current node
770 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000771 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000772 *
773 * Dump an HTML node, recursive behaviour,children are printed too.
774 */
775void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000776htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
777 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000778 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000779
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000780 xmlInitParser();
781
Daniel Veillardce244ad2004-11-05 10:03:46 +0000782 if ((cur == NULL) || (buf == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000783 return;
784 }
785 /*
786 * Special cases.
787 */
788 if (cur->type == XML_DTD_NODE)
789 return;
Daniel Veillardce244ad2004-11-05 10:03:46 +0000790 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
791 (cur->type == XML_DOCUMENT_NODE)){
Owen Taylor3473f882001-02-23 17:55:21 +0000792 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
793 return;
794 }
Daniel Veillardfcd02ad2007-06-12 09:49:40 +0000795 if (cur->type == XML_ATTRIBUTE_NODE) {
796 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
797 return;
798 }
Owen Taylor3473f882001-02-23 17:55:21 +0000799 if (cur->type == HTML_TEXT_NODE) {
800 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000801 if (((cur->name == (const xmlChar *)xmlStringText) ||
802 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000803 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000804 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
805 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000806 xmlChar *buffer;
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000809 if (buffer != NULL) {
810 xmlOutputBufferWriteString(buf, (const char *)buffer);
811 xmlFree(buffer);
812 }
813 } else {
814 xmlOutputBufferWriteString(buf, (const char *)cur->content);
815 }
816 }
817 return;
818 }
819 if (cur->type == HTML_COMMENT_NODE) {
820 if (cur->content != NULL) {
821 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000822 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000823 xmlOutputBufferWriteString(buf, "-->");
824 }
825 return;
826 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000827 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000828 if (cur->name == NULL)
829 return;
830 xmlOutputBufferWriteString(buf, "<?");
831 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000832 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000833 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000834 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000835 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000836 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000837 return;
838 }
Owen Taylor3473f882001-02-23 17:55:21 +0000839 if (cur->type == HTML_ENTITY_REF_NODE) {
840 xmlOutputBufferWriteString(buf, "&");
841 xmlOutputBufferWriteString(buf, (const char *)cur->name);
842 xmlOutputBufferWriteString(buf, ";");
843 return;
844 }
845 if (cur->type == HTML_PRESERVE_NODE) {
846 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000847 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000848 }
849 return;
850 }
851
852 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000853 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000854 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000855 if (cur->ns == NULL)
856 info = htmlTagLookup(cur->name);
857 else
858 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000859
860 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000861 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
862 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
863 xmlOutputBufferWriteString(buf, ":");
864 }
Owen Taylor3473f882001-02-23 17:55:21 +0000865 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000866 if (cur->nsDef)
867 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000868 if (cur->properties != NULL)
869 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
870
871 if ((info != NULL) && (info->empty)) {
872 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000873 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000874 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000875 (cur->next->type != HTML_ENTITY_REF_NODE) &&
876 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000877 (cur->parent->name != NULL) &&
878 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000879 xmlOutputBufferWriteString(buf, "\n");
880 }
881 return;
882 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000883 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
884 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000885 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000886 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
887 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000888 xmlOutputBufferWriteString(buf, ">");
889 } else {
890 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000891 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
892 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
893 xmlOutputBufferWriteString(buf, ":");
894 }
Owen Taylor3473f882001-02-23 17:55:21 +0000895 xmlOutputBufferWriteString(buf, (const char *)cur->name);
896 xmlOutputBufferWriteString(buf, ">");
897 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000898 if ((format) && (cur->next != NULL) &&
899 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000900 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000901 (cur->next->type != HTML_ENTITY_REF_NODE) &&
902 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000903 (cur->parent->name != NULL) &&
904 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000905 xmlOutputBufferWriteString(buf, "\n");
906 }
907 return;
908 }
909 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000910 if ((cur->type != XML_ELEMENT_NODE) &&
911 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000912 /*
913 * Uses the OutputBuffer property to automatically convert
914 * invalids to charrefs
915 */
916
Owen Taylor3473f882001-02-23 17:55:21 +0000917 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000918 }
919 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000920 if ((format) && (info != NULL) && (!info->isinline) &&
921 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000922 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000923 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000924 (cur->name != NULL) &&
925 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000926 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000927 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000928 if ((format) && (info != NULL) && (!info->isinline) &&
929 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000930 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000931 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000932 (cur->name != NULL) &&
933 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000934 xmlOutputBufferWriteString(buf, "\n");
935 }
Owen Taylor3473f882001-02-23 17:55:21 +0000936 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000937 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
938 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
939 xmlOutputBufferWriteString(buf, ":");
940 }
Owen Taylor3473f882001-02-23 17:55:21 +0000941 xmlOutputBufferWriteString(buf, (const char *)cur->name);
942 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000943 if ((format) && (info != NULL) && (!info->isinline) &&
944 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000945 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000946 (cur->next->type != HTML_ENTITY_REF_NODE) &&
947 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000948 (cur->parent->name != NULL) &&
949 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000950 xmlOutputBufferWriteString(buf, "\n");
951 }
952}
953
954/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000955 * htmlNodeDumpOutput:
956 * @buf: the HTML buffer output
957 * @doc: the document
958 * @cur: the current node
959 * @encoding: the encoding string
960 *
961 * Dump an HTML node, recursive behaviour,children are printed too,
962 * and formatting returns/spaces are added.
963 */
964void
965htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
966 xmlNodePtr cur, const char *encoding) {
967 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
968}
969
970/**
971 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000972 * @buf: the HTML buffer output
973 * @cur: the document
974 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +0000975 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000976 *
977 * Dump an HTML document.
978 */
979void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000980htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
981 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000982 int type;
983
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000984 xmlInitParser();
985
Daniel Veillard3d97e662004-11-04 10:49:00 +0000986 if ((buf == NULL) || (cur == NULL))
987 return;
988
Owen Taylor3473f882001-02-23 17:55:21 +0000989 /*
990 * force to output the stuff as HTML, especially for entities
991 */
992 type = cur->type;
993 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +0000994 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000995 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000996 }
997 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000998 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000999 }
1000 xmlOutputBufferWriteString(buf, "\n");
1001 cur->type = (xmlElementType) type;
1002}
1003
Daniel Veillard95d845f2001-06-13 13:48:46 +00001004/**
1005 * htmlDocContentDumpOutput:
1006 * @buf: the HTML buffer output
1007 * @cur: the document
1008 * @encoding: the encoding string
1009 *
1010 * Dump an HTML document. Formating return/spaces are added.
1011 */
1012void
1013htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1014 const char *encoding) {
1015 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1016}
1017
Owen Taylor3473f882001-02-23 17:55:21 +00001018/************************************************************************
1019 * *
1020 * Saving functions front-ends *
1021 * *
1022 ************************************************************************/
1023
1024/**
1025 * htmlDocDump:
1026 * @f: the FILE*
1027 * @cur: the document
1028 *
1029 * Dump an HTML document to an open FILE.
1030 *
1031 * returns: the number of byte written or -1 in case of failure.
1032 */
1033int
1034htmlDocDump(FILE *f, xmlDocPtr cur) {
1035 xmlOutputBufferPtr buf;
1036 xmlCharEncodingHandlerPtr handler = NULL;
1037 const char *encoding;
1038 int ret;
1039
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001040 xmlInitParser();
1041
Daniel Veillard3d97e662004-11-04 10:49:00 +00001042 if ((cur == NULL) || (f == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001043 return(-1);
1044 }
1045
1046 encoding = (const char *) htmlGetMetaEncoding(cur);
1047
1048 if (encoding != NULL) {
1049 xmlCharEncoding enc;
1050
1051 enc = xmlParseCharEncoding(encoding);
1052 if (enc != cur->charset) {
1053 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1054 /*
1055 * Not supported yet
1056 */
1057 return(-1);
1058 }
1059
1060 handler = xmlFindCharEncodingHandler(encoding);
1061 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001062 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Daniel Veillardb8c80162005-08-08 13:46:45 +00001063 } else {
1064 handler = xmlFindCharEncodingHandler(encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001065 }
1066 }
1067
1068 /*
1069 * Fallback to HTML or ASCII when the encoding is unspecified
1070 */
1071 if (handler == NULL)
1072 handler = xmlFindCharEncodingHandler("HTML");
1073 if (handler == NULL)
1074 handler = xmlFindCharEncodingHandler("ascii");
1075
1076 buf = xmlOutputBufferCreateFile(f, handler);
1077 if (buf == NULL) return(-1);
1078 htmlDocContentDumpOutput(buf, cur, NULL);
1079
1080 ret = xmlOutputBufferClose(buf);
1081 return(ret);
1082}
1083
1084/**
1085 * htmlSaveFile:
1086 * @filename: the filename (or URL)
1087 * @cur: the document
1088 *
1089 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1090 * used.
1091 * returns: the number of byte written or -1 in case of failure.
1092 */
1093int
1094htmlSaveFile(const char *filename, xmlDocPtr cur) {
1095 xmlOutputBufferPtr buf;
1096 xmlCharEncodingHandlerPtr handler = NULL;
1097 const char *encoding;
1098 int ret;
1099
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001100 if ((cur == NULL) || (filename == NULL))
1101 return(-1);
1102
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001103 xmlInitParser();
1104
Owen Taylor3473f882001-02-23 17:55:21 +00001105 encoding = (const char *) htmlGetMetaEncoding(cur);
1106
1107 if (encoding != NULL) {
1108 xmlCharEncoding enc;
1109
1110 enc = xmlParseCharEncoding(encoding);
1111 if (enc != cur->charset) {
1112 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1113 /*
1114 * Not supported yet
1115 */
1116 return(-1);
1117 }
1118
1119 handler = xmlFindCharEncodingHandler(encoding);
1120 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001121 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001122 }
1123 }
1124
1125 /*
1126 * Fallback to HTML or ASCII when the encoding is unspecified
1127 */
1128 if (handler == NULL)
1129 handler = xmlFindCharEncodingHandler("HTML");
1130 if (handler == NULL)
1131 handler = xmlFindCharEncodingHandler("ascii");
1132
1133 /*
1134 * save the content to a temp buffer.
1135 */
1136 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1137 if (buf == NULL) return(0);
1138
1139 htmlDocContentDumpOutput(buf, cur, NULL);
1140
1141 ret = xmlOutputBufferClose(buf);
1142 return(ret);
1143}
1144
1145/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001146 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001147 * @filename: the filename
1148 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001149 * @format: should formatting spaces been added
1150 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001151 *
1152 * Dump an HTML document to a file using a given encoding.
1153 *
1154 * returns: the number of byte written or -1 in case of failure.
1155 */
1156int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001157htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1158 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001159 xmlOutputBufferPtr buf;
1160 xmlCharEncodingHandlerPtr handler = NULL;
1161 int ret;
1162
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001163 if ((cur == NULL) || (filename == NULL))
1164 return(-1);
Daniel Veillard8d7c1b72009-08-12 23:03:23 +02001165
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001166 xmlInitParser();
1167
Owen Taylor3473f882001-02-23 17:55:21 +00001168 if (encoding != NULL) {
1169 xmlCharEncoding enc;
1170
1171 enc = xmlParseCharEncoding(encoding);
1172 if (enc != cur->charset) {
1173 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1174 /*
1175 * Not supported yet
1176 */
1177 return(-1);
1178 }
1179
1180 handler = xmlFindCharEncodingHandler(encoding);
1181 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001182 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001183 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +02001184 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
Daniel Veillard4dd93462001-04-02 15:16:19 +00001185 } else {
1186 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001187 }
1188
1189 /*
1190 * Fallback to HTML or ASCII when the encoding is unspecified
1191 */
1192 if (handler == NULL)
1193 handler = xmlFindCharEncodingHandler("HTML");
1194 if (handler == NULL)
1195 handler = xmlFindCharEncodingHandler("ascii");
1196
1197 /*
1198 * save the content to a temp buffer.
1199 */
1200 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1201 if (buf == NULL) return(0);
1202
Daniel Veillard95d845f2001-06-13 13:48:46 +00001203 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001204
1205 ret = xmlOutputBufferClose(buf);
1206 return(ret);
1207}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001208
1209/**
1210 * htmlSaveFileEnc:
1211 * @filename: the filename
1212 * @cur: the document
1213 * @encoding: the document encoding
1214 *
1215 * Dump an HTML document to a file using a given encoding
1216 * and formatting returns/spaces are added.
1217 *
1218 * returns: the number of byte written or -1 in case of failure.
1219 */
1220int
1221htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1222 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1223}
1224
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001225#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardc084e472002-08-12 13:27:28 +00001226
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001227#define bottom_HTMLtree
1228#include "elfgcchack.h"
Owen Taylor3473f882001-02-23 17:55:21 +00001229#endif /* LIBXML_HTML_ENABLED */