blob: 2fd0c9c567188902b13c3258512b0ead3546d40b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
Daniel Veillard7b9b0712012-07-16 14:58:02 +080033#include "buf.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035/************************************************************************
36 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080037 * Getting/Setting encoding meta tags *
Owen Taylor3473f882001-02-23 17:55:21 +000038 * *
39 ************************************************************************/
40
41/**
42 * htmlGetMetaEncoding:
43 * @doc: the document
Daniel Veillardf8e3db02012-09-11 13:26:36 +080044 *
Owen Taylor3473f882001-02-23 17:55:21 +000045 * Encoding definition lookup in the Meta tags
46 *
47 * Returns the current encoding as flagged in the HTML source
48 */
49const xmlChar *
50htmlGetMetaEncoding(htmlDocPtr doc) {
51 htmlNodePtr cur;
52 const xmlChar *content;
53 const xmlChar *encoding;
54
55 if (doc == NULL)
56 return(NULL);
57 cur = doc->children;
58
59 /*
60 * Search the html
61 */
62 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000063 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000064 if (xmlStrEqual(cur->name, BAD_CAST"html"))
65 break;
66 if (xmlStrEqual(cur->name, BAD_CAST"head"))
67 goto found_head;
68 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
69 goto found_meta;
70 }
71 cur = cur->next;
72 }
73 if (cur == NULL)
74 return(NULL);
75 cur = cur->children;
76
77 /*
78 * Search the head
79 */
80 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000081 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000082 if (xmlStrEqual(cur->name, BAD_CAST"head"))
83 break;
84 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
85 goto found_meta;
86 }
87 cur = cur->next;
88 }
89 if (cur == NULL)
90 return(NULL);
91found_head:
92 cur = cur->children;
93
94 /*
95 * Search the meta elements
96 */
97found_meta:
98 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000099 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000100 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
101 xmlAttrPtr attr = cur->properties;
102 int http;
103 const xmlChar *value;
104
105 content = NULL;
106 http = 0;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000111 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000112 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
113 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
114 http = 1;
115 else if ((value != NULL)
116 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
117 content = value;
118 if ((http != 0) && (content != NULL))
119 goto found_content;
120 }
121 attr = attr->next;
122 }
123 }
124 }
125 cur = cur->next;
126 }
127 return(NULL);
128
129found_content:
130 encoding = xmlStrstr(content, BAD_CAST"charset=");
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800131 if (encoding == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +0000132 encoding = xmlStrstr(content, BAD_CAST"Charset=");
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800133 if (encoding == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +0000134 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
135 if (encoding != NULL) {
136 encoding += 8;
137 } else {
138 encoding = xmlStrstr(content, BAD_CAST"charset =");
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800139 if (encoding == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +0000140 encoding = xmlStrstr(content, BAD_CAST"Charset =");
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800141 if (encoding == NULL)
Owen Taylor3473f882001-02-23 17:55:21 +0000142 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
143 if (encoding != NULL)
144 encoding += 9;
145 }
146 if (encoding != NULL) {
147 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
148 }
149 return(encoding);
150}
151
152/**
153 * htmlSetMetaEncoding:
154 * @doc: the document
155 * @encoding: the encoding string
Daniel Veillard39d027c2012-05-11 12:38:23 +0800156 *
Owen Taylor3473f882001-02-23 17:55:21 +0000157 * Sets the current encoding in the Meta tags
158 * NOTE: this will not change the document content encoding, just
159 * the META flag associated.
160 *
161 * Returns 0 in case of success and -1 in case of error
162 */
163int
164htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200165 htmlNodePtr cur, meta = NULL, head = NULL;
166 const xmlChar *content = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000167 char newcontent[100];
168
Daniel Veillard39d027c2012-05-11 12:38:23 +0800169 newcontent[0] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000170
171 if (doc == NULL)
172 return(-1);
173
Daniel Veillard74eb54b2009-08-12 15:59:01 +0200174 /* html isn't a real encoding it's just libxml2 way to get entities */
175 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
176 return(-1);
177
Owen Taylor3473f882001-02-23 17:55:21 +0000178 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
William M. Brack13dfa872004-09-18 04:52:08 +0000180 (char *)encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000181 newcontent[sizeof(newcontent) - 1] = 0;
182 }
183
184 cur = doc->children;
185
186 /*
187 * Search the html
188 */
189 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000190 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000191 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
192 break;
193 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
194 goto found_head;
195 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
196 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000197 }
198 cur = cur->next;
199 }
200 if (cur == NULL)
201 return(-1);
202 cur = cur->children;
203
204 /*
205 * Search the head
206 */
207 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000208 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000209 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
210 break;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200211 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
212 head = cur->parent;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000213 goto found_meta;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200214 }
Owen Taylor3473f882001-02-23 17:55:21 +0000215 }
216 cur = cur->next;
217 }
218 if (cur == NULL)
219 return(-1);
220found_head:
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200221 head = cur;
222 if (cur->children == NULL)
223 goto create;
Owen Taylor3473f882001-02-23 17:55:21 +0000224 cur = cur->children;
225
226found_meta:
Owen Taylor3473f882001-02-23 17:55:21 +0000227 /*
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200228 * Search and update all the remaining the meta elements carrying
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * encoding informations
230 */
231 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000232 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000233 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000234 xmlAttrPtr attr = cur->properties;
235 int http;
236 const xmlChar *value;
237
238 content = NULL;
239 http = 0;
240 while (attr != NULL) {
241 if ((attr->children != NULL) &&
242 (attr->children->type == XML_TEXT_NODE) &&
243 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000244 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000245 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
246 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
247 http = 1;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200248 else
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000249 {
Daniel Veillard39d027c2012-05-11 12:38:23 +0800250 if ((value != NULL) &&
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200251 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
252 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000253 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000254 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000255 break;
256 }
257 attr = attr->next;
258 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000259 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000260 meta = cur;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200261 break;
Owen Taylor3473f882001-02-23 17:55:21 +0000262 }
263
264 }
265 }
266 cur = cur->next;
267 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200268create:
269 if (meta == NULL) {
270 if ((encoding != NULL) && (head != NULL)) {
271 /*
272 * Create a new Meta element with the right attributes
273 */
274
275 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
276 if (head->children == NULL)
277 xmlAddChild(head, meta);
278 else
279 xmlAddPrevSibling(head->children, meta);
280 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
281 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
282 }
283 } else {
Daniel Veillard39d027c2012-05-11 12:38:23 +0800284 /* remove the meta tag if NULL is passed */
285 if (encoding == NULL) {
286 xmlUnlinkNode(meta);
287 xmlFreeNode(meta);
288 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200289 /* change the document only if there is a real encoding change */
Daniel Veillard39d027c2012-05-11 12:38:23 +0800290 else if (xmlStrcasestr(content, encoding) == NULL) {
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200291 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
292 }
293 }
294
295
Owen Taylor3473f882001-02-23 17:55:21 +0000296 return(0);
297}
298
Daniel Veillardc084e472002-08-12 13:27:28 +0000299/**
300 * booleanHTMLAttrs:
301 *
302 * These are the HTML attributes which will be output
303 * in minimized form, i.e. <option selected="selected"> will be
304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305 *
306 */
307static const char* htmlBooleanAttrs[] = {
308 "checked", "compact", "declare", "defer", "disabled", "ismap",
309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
310 "selected", NULL
311};
312
313
314/**
315 * htmlIsBooleanAttr:
316 * @name: the name of the attribute to check
317 *
318 * Determine if a given attribute is a boolean attribute.
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800319 *
Daniel Veillardc084e472002-08-12 13:27:28 +0000320 * returns: false if the attribute is not boolean, true otherwise.
321 */
322int
323htmlIsBooleanAttr(const xmlChar *name)
324{
325 int i = 0;
326
327 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000328 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000329 return 1;
330 i++;
331 }
332 return 0;
333}
334
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000335#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardda3fee42008-09-01 13:08:57 +0000336/*
337 * private routine exported from xmlIO.c
338 */
339xmlOutputBufferPtr
340xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
Owen Taylor3473f882001-02-23 17:55:21 +0000341/************************************************************************
342 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800343 * Output error handlers *
Daniel Veillarde2238d52003-10-09 13:14:55 +0000344 * *
345 ************************************************************************/
346/**
347 * htmlSaveErrMemory:
348 * @extra: extra informations
349 *
350 * Handle an out of memory condition
351 */
352static void
353htmlSaveErrMemory(const char *extra)
354{
355 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
356}
357
358/**
359 * htmlSaveErr:
360 * @code: the error number
361 * @node: the location of the error.
362 * @extra: extra informations
363 *
364 * Handle an out of memory condition
365 */
366static void
367htmlSaveErr(int code, xmlNodePtr node, const char *extra)
368{
369 const char *msg = NULL;
370
371 switch(code) {
372 case XML_SAVE_NOT_UTF8:
Rob Richards417b74d2006-08-15 23:14:24 +0000373 msg = "string is not in UTF-8\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000374 break;
375 case XML_SAVE_CHAR_INVALID:
Rob Richards417b74d2006-08-15 23:14:24 +0000376 msg = "invalid character value\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000377 break;
378 case XML_SAVE_UNKNOWN_ENCODING:
Rob Richards417b74d2006-08-15 23:14:24 +0000379 msg = "unknown encoding %s\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000380 break;
381 case XML_SAVE_NO_DOCTYPE:
Rob Richards417b74d2006-08-15 23:14:24 +0000382 msg = "HTML has no DOCTYPE\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000383 break;
384 default:
Rob Richards417b74d2006-08-15 23:14:24 +0000385 msg = "unexpected error number\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000386 }
387 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
388}
389
390/************************************************************************
391 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800392 * Dumping HTML tree content to a simple buffer *
Owen Taylor3473f882001-02-23 17:55:21 +0000393 * *
394 ************************************************************************/
395
Owen Taylor3473f882001-02-23 17:55:21 +0000396/**
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800397 * htmlBufNodeDumpFormat:
398 * @buf: the xmlBufPtr output
Owen Taylor3473f882001-02-23 17:55:21 +0000399 * @doc: the document
400 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000401 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000402 *
403 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000404 *
405 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000406 */
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800407static size_t
408htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
Daniel Veillard95d845f2001-06-13 13:48:46 +0000409 int format) {
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800410 size_t use;
Daniel Veillard8db67d22002-11-27 19:39:27 +0000411 int ret;
412 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000413
414 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000415 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000416 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000417 if (buf == NULL) {
418 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000419 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000420 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
421 if (outbuf == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000422 htmlSaveErrMemory("allocating HTML output buffer");
Daniel Veillard8db67d22002-11-27 19:39:27 +0000423 return (-1);
424 }
425 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
426 outbuf->buffer = buf;
427 outbuf->encoder = NULL;
428 outbuf->writecallback = NULL;
429 outbuf->closecallback = NULL;
430 outbuf->context = NULL;
431 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000432
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800433 use = xmlBufUse(buf);
Daniel Veillard8db67d22002-11-27 19:39:27 +0000434 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
435 xmlFree(outbuf);
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800436 ret = xmlBufUse(buf) - use;
Daniel Veillard8db67d22002-11-27 19:39:27 +0000437 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000438}
439
440/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000441 * htmlNodeDump:
442 * @buf: the HTML buffer output
443 * @doc: the document
444 * @cur: the current node
445 *
446 * Dump an HTML node, recursive behaviour,children are printed too,
447 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000448 *
449 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000450 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000451int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000452htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800453 xmlBufPtr buffer;
454 size_t ret;
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000455
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800456 if ((buf == NULL) || (cur == NULL))
457 return(-1);
458
459 xmlInitParser();
460 buffer = xmlBufFromBuffer(buf);
461 if (buffer == NULL)
462 return(-1);
463
464 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
465
466 xmlBufBackToBuffer(buffer);
467
468 if (ret > INT_MAX)
469 return(-1);
470 return((int) ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000471}
472
473/**
474 * htmlNodeDumpFileFormat:
475 * @out: the FILE pointer
476 * @doc: the document
477 * @cur: the current node
478 * @encoding: the document encoding
479 * @format: should formatting spaces been added
480 *
481 * Dump an HTML node, recursive behaviour,children are printed too.
482 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000483 * TODO: if encoding == NULL try to save in the doc encoding
484 *
485 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000486 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000487int
488htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
489 xmlNodePtr cur, const char *encoding, int format) {
490 xmlOutputBufferPtr buf;
491 xmlCharEncodingHandlerPtr handler = NULL;
492 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000493
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000494 xmlInitParser();
495
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000496 if (encoding != NULL) {
497 xmlCharEncoding enc;
498
499 enc = xmlParseCharEncoding(encoding);
500 if (enc != XML_CHAR_ENCODING_UTF8) {
501 handler = xmlFindCharEncodingHandler(encoding);
502 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +0800503 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000504 }
505 }
506
507 /*
508 * Fallback to HTML or ASCII when the encoding is unspecified
509 */
510 if (handler == NULL)
511 handler = xmlFindCharEncodingHandler("HTML");
512 if (handler == NULL)
513 handler = xmlFindCharEncodingHandler("ascii");
514
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800515 /*
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000516 * save the content to a temp buffer.
517 */
518 buf = xmlOutputBufferCreateFile(out, handler);
519 if (buf == NULL) return(0);
520
521 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
522
523 ret = xmlOutputBufferClose(buf);
524 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000525}
526
527/**
Owen Taylor3473f882001-02-23 17:55:21 +0000528 * htmlNodeDumpFile:
529 * @out: the FILE pointer
530 * @doc: the document
531 * @cur: the current node
532 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000533 * Dump an HTML node, recursive behaviour,children are printed too,
534 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000535 */
536void
537htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000538 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000539}
540
541/**
Rob Richards77b92ff2005-12-20 15:55:14 +0000542 * htmlDocDumpMemoryFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000543 * @cur: the document
544 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000545 * @size: OUT: the memory length
Rob Richards77b92ff2005-12-20 15:55:14 +0000546 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000547 *
548 * Dump an HTML document in memory and return the xmlChar * and it's size.
549 * It's up to the caller to free the memory.
550 */
551void
Rob Richards77b92ff2005-12-20 15:55:14 +0000552htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000553 xmlOutputBufferPtr buf;
554 xmlCharEncodingHandlerPtr handler = NULL;
555 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000556
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000557 xmlInitParser();
558
Daniel Veillardd5cc0f72004-11-06 19:24:28 +0000559 if ((mem == NULL) || (size == NULL))
560 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000561 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000562 *mem = NULL;
563 *size = 0;
564 return;
565 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000566
567 encoding = (const char *) htmlGetMetaEncoding(cur);
568
569 if (encoding != NULL) {
570 xmlCharEncoding enc;
571
572 enc = xmlParseCharEncoding(encoding);
573 if (enc != cur->charset) {
574 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
575 /*
576 * Not supported yet
577 */
578 *mem = NULL;
579 *size = 0;
580 return;
581 }
582
583 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillardc62efc82011-05-16 16:03:50 +0800584 if (handler == NULL)
585 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
586
Daniel Veillardb8c80162005-08-08 13:46:45 +0000587 } else {
588 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillard2d703722001-05-30 18:32:34 +0000589 }
590 }
591
592 /*
593 * Fallback to HTML or ASCII when the encoding is unspecified
594 */
595 if (handler == NULL)
596 handler = xmlFindCharEncodingHandler("HTML");
597 if (handler == NULL)
598 handler = xmlFindCharEncodingHandler("ascii");
599
Daniel Veillardda3fee42008-09-01 13:08:57 +0000600 buf = xmlAllocOutputBufferInternal(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000601 if (buf == NULL) {
602 *mem = NULL;
603 *size = 0;
604 return;
605 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000606
Daniel Veillardc62efc82011-05-16 16:03:50 +0800607 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
Rob Richards77b92ff2005-12-20 15:55:14 +0000608
Daniel Veillard2d703722001-05-30 18:32:34 +0000609 xmlOutputBufferFlush(buf);
610 if (buf->conv != NULL) {
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800611 *size = xmlBufUse(buf->conv);
612 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
Daniel Veillard2d703722001-05-30 18:32:34 +0000613 } else {
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800614 *size = xmlBufUse(buf->buffer);
615 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
Daniel Veillard2d703722001-05-30 18:32:34 +0000616 }
617 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000618}
619
Rob Richards77b92ff2005-12-20 15:55:14 +0000620/**
621 * htmlDocDumpMemory:
622 * @cur: the document
623 * @mem: OUT: the memory pointer
624 * @size: OUT: the memory length
625 *
626 * Dump an HTML document in memory and return the xmlChar * and it's size.
627 * It's up to the caller to free the memory.
628 */
629void
630htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
631 htmlDocDumpMemoryFormat(cur, mem, size, 1);
632}
633
Owen Taylor3473f882001-02-23 17:55:21 +0000634
635/************************************************************************
636 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800637 * Dumping HTML tree content to an I/O output buffer *
Owen Taylor3473f882001-02-23 17:55:21 +0000638 * *
639 ************************************************************************/
640
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000641void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000642
Owen Taylor3473f882001-02-23 17:55:21 +0000643/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000644 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000645 * @buf: the HTML buffer output
646 * @doc: the document
647 * @encoding: the encoding string
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800648 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000649 * TODO: check whether encoding is needed
650 *
Owen Taylor3473f882001-02-23 17:55:21 +0000651 * Dump the HTML document DTD, if any.
652 */
653static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000654htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000655 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000656 xmlDtdPtr cur = doc->intSubset;
657
658 if (cur == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000659 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000660 return;
661 }
662 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
663 xmlOutputBufferWriteString(buf, (const char *)cur->name);
664 if (cur->ExternalID != NULL) {
665 xmlOutputBufferWriteString(buf, " PUBLIC ");
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800666 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
Owen Taylor3473f882001-02-23 17:55:21 +0000667 if (cur->SystemID != NULL) {
668 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800669 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800670 }
Shaun McCance7607d9d2015-04-03 22:52:36 +0800671 } else if (cur->SystemID != NULL &&
672 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
Owen Taylor3473f882001-02-23 17:55:21 +0000673 xmlOutputBufferWriteString(buf, " SYSTEM ");
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800674 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +0000675 }
676 xmlOutputBufferWriteString(buf, ">\n");
677}
678
679/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000680 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000681 * @buf: the HTML buffer output
682 * @doc: the document
683 * @cur: the attribute pointer
684 * @encoding: the encoding string
685 *
686 * Dump an HTML attribute
687 */
688static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000689htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000690 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000691 xmlChar *value;
692
Daniel Veillardeca60d02001-06-13 07:45:41 +0000693 /*
Daniel Veillard7d4c5292012-09-05 11:45:32 +0800694 * The html output method should not escape a & character
695 * occurring in an attribute value immediately followed by
696 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
697 * This is implemented in xmlEncodeEntitiesReentrant
Daniel Veillardeca60d02001-06-13 07:45:41 +0000698 */
699
Owen Taylor3473f882001-02-23 17:55:21 +0000700 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000701 return;
702 }
703 xmlOutputBufferWriteString(buf, " ");
William M. Brack3a6da762003-09-15 04:58:14 +0000704 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
705 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
706 xmlOutputBufferWriteString(buf, ":");
707 }
Owen Taylor3473f882001-02-23 17:55:21 +0000708 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000709 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000710 value = xmlNodeListGetString(doc, cur->children, 0);
711 if (value) {
712 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000713 if ((cur->ns == NULL) && (cur->parent != NULL) &&
714 (cur->parent->ns == NULL) &&
715 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
716 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
Daniel Veillardaa9a9832005-03-29 20:30:17 +0000717 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
718 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
719 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000720 xmlChar *tmp = value;
Romain Bondue960f0e22013-04-23 20:44:55 +0800721 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
722 xmlBufCCat(buf->buffer, "\"");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000723
William M. Brack76e95df2003-10-18 16:20:14 +0000724 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillardeb475a32002-04-14 22:00:22 +0000725
Romain Bondue960f0e22013-04-23 20:44:55 +0800726 /* URI Escape everything, except server side includes. */
727 for ( ; ; ) {
728 xmlChar *escaped;
729 xmlChar endChar;
730 xmlChar *end = NULL;
731 xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
732 if (start != NULL) {
733 end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
734 if (end != NULL) {
735 *start = '\0';
736 }
737 }
738
739 /* Escape the whole string, or until start (set to '\0'). */
740 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
741 if (escaped != NULL) {
742 xmlBufCat(buf->buffer, escaped);
743 xmlFree(escaped);
744 } else {
745 xmlBufCat(buf->buffer, tmp);
746 }
747
748 if (end == NULL) { /* Everything has been written. */
749 break;
750 }
751
752 /* Do not escape anything within server side includes. */
753 *start = '<'; /* Restore the first character of "<!--". */
754 end += 3; /* strlen("-->") */
755 endChar = *end;
756 *end = '\0';
757 xmlBufCat(buf->buffer, start);
758 *end = endChar;
759 tmp = end;
Daniel Veillardeb475a32002-04-14 22:00:22 +0000760 }
Romain Bondue960f0e22013-04-23 20:44:55 +0800761
762 xmlBufCCat(buf->buffer, "\"");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000763 } else {
Daniel Veillard7b9b0712012-07-16 14:58:02 +0800764 xmlBufWriteQuotedString(buf->buffer, value);
Daniel Veillardeb475a32002-04-14 22:00:22 +0000765 }
Owen Taylor3473f882001-02-23 17:55:21 +0000766 xmlFree(value);
767 } else {
768 xmlOutputBufferWriteString(buf, "=\"\"");
769 }
770 }
771}
772
773/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000774 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * @buf: the HTML buffer output
776 * @doc: the document
777 * @cur: the first attribute pointer
778 * @encoding: the encoding string
779 *
780 * Dump a list of HTML attributes
781 */
782static void
783htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
784 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000785 return;
786 }
787 while (cur != NULL) {
788 htmlAttrDumpOutput(buf, doc, cur, encoding);
789 cur = cur->next;
790 }
791}
792
793
Owen Taylor3473f882001-02-23 17:55:21 +0000794
795/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000796 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000797 * @buf: the HTML buffer output
798 * @doc: the document
799 * @cur: the first node
800 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000801 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000802 *
803 * Dump an HTML node list, recursive behaviour,children are printed too.
804 */
805static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000806htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
807 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000808 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000809 return;
810 }
811 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000812 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000813 cur = cur->next;
814 }
815}
816
817/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000818 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000819 * @buf: the HTML buffer output
820 * @doc: the document
821 * @cur: the current node
822 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000823 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000824 *
825 * Dump an HTML node, recursive behaviour,children are printed too.
826 */
827void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000828htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
829 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000830 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000831
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000832 xmlInitParser();
833
Daniel Veillardce244ad2004-11-05 10:03:46 +0000834 if ((cur == NULL) || (buf == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000835 return;
836 }
837 /*
838 * Special cases.
839 */
840 if (cur->type == XML_DTD_NODE)
841 return;
Daniel Veillardce244ad2004-11-05 10:03:46 +0000842 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
843 (cur->type == XML_DOCUMENT_NODE)){
Owen Taylor3473f882001-02-23 17:55:21 +0000844 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
845 return;
846 }
Daniel Veillardfcd02ad2007-06-12 09:49:40 +0000847 if (cur->type == XML_ATTRIBUTE_NODE) {
848 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
849 return;
850 }
Owen Taylor3473f882001-02-23 17:55:21 +0000851 if (cur->type == HTML_TEXT_NODE) {
852 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000853 if (((cur->name == (const xmlChar *)xmlStringText) ||
854 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000855 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000856 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
857 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000858 xmlChar *buffer;
859
Owen Taylor3473f882001-02-23 17:55:21 +0000860 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000861 if (buffer != NULL) {
862 xmlOutputBufferWriteString(buf, (const char *)buffer);
863 xmlFree(buffer);
864 }
865 } else {
866 xmlOutputBufferWriteString(buf, (const char *)cur->content);
867 }
868 }
869 return;
870 }
871 if (cur->type == HTML_COMMENT_NODE) {
872 if (cur->content != NULL) {
873 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000874 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000875 xmlOutputBufferWriteString(buf, "-->");
876 }
877 return;
878 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000879 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000880 if (cur->name == NULL)
881 return;
882 xmlOutputBufferWriteString(buf, "<?");
883 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000884 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000885 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000886 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000887 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000888 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000889 return;
890 }
Owen Taylor3473f882001-02-23 17:55:21 +0000891 if (cur->type == HTML_ENTITY_REF_NODE) {
892 xmlOutputBufferWriteString(buf, "&");
893 xmlOutputBufferWriteString(buf, (const char *)cur->name);
894 xmlOutputBufferWriteString(buf, ";");
895 return;
896 }
897 if (cur->type == HTML_PRESERVE_NODE) {
898 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000899 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000900 }
901 return;
902 }
903
904 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000905 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000906 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000907 if (cur->ns == NULL)
908 info = htmlTagLookup(cur->name);
909 else
910 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000911
912 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000913 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
914 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
915 xmlOutputBufferWriteString(buf, ":");
916 }
Owen Taylor3473f882001-02-23 17:55:21 +0000917 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000918 if (cur->nsDef)
919 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000920 if (cur->properties != NULL)
921 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
922
923 if ((info != NULL) && (info->empty)) {
924 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000925 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000926 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000927 (cur->next->type != HTML_ENTITY_REF_NODE) &&
928 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000929 (cur->parent->name != NULL) &&
930 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000931 xmlOutputBufferWriteString(buf, "\n");
932 }
933 return;
934 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000935 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
936 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000937 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000938 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
939 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000940 xmlOutputBufferWriteString(buf, ">");
941 } else {
942 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000943 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
944 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
945 xmlOutputBufferWriteString(buf, ":");
946 }
Owen Taylor3473f882001-02-23 17:55:21 +0000947 xmlOutputBufferWriteString(buf, (const char *)cur->name);
948 xmlOutputBufferWriteString(buf, ">");
949 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000950 if ((format) && (cur->next != NULL) &&
951 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000952 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000953 (cur->next->type != HTML_ENTITY_REF_NODE) &&
954 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000955 (cur->parent->name != NULL) &&
956 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000957 xmlOutputBufferWriteString(buf, "\n");
958 }
959 return;
960 }
961 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000962 if ((cur->type != XML_ELEMENT_NODE) &&
963 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000964 /*
965 * Uses the OutputBuffer property to automatically convert
966 * invalids to charrefs
967 */
968
Owen Taylor3473f882001-02-23 17:55:21 +0000969 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000970 }
971 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000972 if ((format) && (info != NULL) && (!info->isinline) &&
973 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000974 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000975 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000976 (cur->name != NULL) &&
977 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000978 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000979 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000980 if ((format) && (info != NULL) && (!info->isinline) &&
981 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000982 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000983 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000984 (cur->name != NULL) &&
985 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000986 xmlOutputBufferWriteString(buf, "\n");
987 }
Owen Taylor3473f882001-02-23 17:55:21 +0000988 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000989 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
990 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
991 xmlOutputBufferWriteString(buf, ":");
992 }
Owen Taylor3473f882001-02-23 17:55:21 +0000993 xmlOutputBufferWriteString(buf, (const char *)cur->name);
994 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000995 if ((format) && (info != NULL) && (!info->isinline) &&
996 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000997 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000998 (cur->next->type != HTML_ENTITY_REF_NODE) &&
999 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +00001000 (cur->parent->name != NULL) &&
1001 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +00001002 xmlOutputBufferWriteString(buf, "\n");
1003 }
1004}
1005
1006/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001007 * htmlNodeDumpOutput:
1008 * @buf: the HTML buffer output
1009 * @doc: the document
1010 * @cur: the current node
1011 * @encoding: the encoding string
1012 *
1013 * Dump an HTML node, recursive behaviour,children are printed too,
1014 * and formatting returns/spaces are added.
1015 */
1016void
1017htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1018 xmlNodePtr cur, const char *encoding) {
1019 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1020}
1021
1022/**
1023 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001024 * @buf: the HTML buffer output
1025 * @cur: the document
1026 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001027 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001028 *
1029 * Dump an HTML document.
1030 */
1031void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001032htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1033 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001034 int type;
1035
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001036 xmlInitParser();
1037
Daniel Veillard3d97e662004-11-04 10:49:00 +00001038 if ((buf == NULL) || (cur == NULL))
1039 return;
1040
Owen Taylor3473f882001-02-23 17:55:21 +00001041 /*
1042 * force to output the stuff as HTML, especially for entities
1043 */
1044 type = cur->type;
1045 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001046 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001047 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001048 }
1049 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001050 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001051 }
1052 xmlOutputBufferWriteString(buf, "\n");
1053 cur->type = (xmlElementType) type;
1054}
1055
Daniel Veillard95d845f2001-06-13 13:48:46 +00001056/**
1057 * htmlDocContentDumpOutput:
1058 * @buf: the HTML buffer output
1059 * @cur: the document
1060 * @encoding: the encoding string
1061 *
1062 * Dump an HTML document. Formating return/spaces are added.
1063 */
1064void
1065htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1066 const char *encoding) {
1067 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1068}
1069
Owen Taylor3473f882001-02-23 17:55:21 +00001070/************************************************************************
1071 * *
1072 * Saving functions front-ends *
1073 * *
1074 ************************************************************************/
1075
1076/**
1077 * htmlDocDump:
1078 * @f: the FILE*
1079 * @cur: the document
1080 *
1081 * Dump an HTML document to an open FILE.
1082 *
1083 * returns: the number of byte written or -1 in case of failure.
1084 */
1085int
1086htmlDocDump(FILE *f, xmlDocPtr cur) {
1087 xmlOutputBufferPtr buf;
1088 xmlCharEncodingHandlerPtr handler = NULL;
1089 const char *encoding;
1090 int ret;
1091
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001092 xmlInitParser();
1093
Daniel Veillard3d97e662004-11-04 10:49:00 +00001094 if ((cur == NULL) || (f == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001095 return(-1);
1096 }
1097
1098 encoding = (const char *) htmlGetMetaEncoding(cur);
1099
1100 if (encoding != NULL) {
1101 xmlCharEncoding enc;
1102
1103 enc = xmlParseCharEncoding(encoding);
1104 if (enc != cur->charset) {
1105 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1106 /*
1107 * Not supported yet
1108 */
1109 return(-1);
1110 }
1111
1112 handler = xmlFindCharEncodingHandler(encoding);
1113 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001114 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Daniel Veillardb8c80162005-08-08 13:46:45 +00001115 } else {
1116 handler = xmlFindCharEncodingHandler(encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001117 }
1118 }
1119
1120 /*
1121 * Fallback to HTML or ASCII when the encoding is unspecified
1122 */
1123 if (handler == NULL)
1124 handler = xmlFindCharEncodingHandler("HTML");
1125 if (handler == NULL)
1126 handler = xmlFindCharEncodingHandler("ascii");
1127
1128 buf = xmlOutputBufferCreateFile(f, handler);
1129 if (buf == NULL) return(-1);
1130 htmlDocContentDumpOutput(buf, cur, NULL);
1131
1132 ret = xmlOutputBufferClose(buf);
1133 return(ret);
1134}
1135
1136/**
1137 * htmlSaveFile:
1138 * @filename: the filename (or URL)
1139 * @cur: the document
1140 *
1141 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1142 * used.
1143 * returns: the number of byte written or -1 in case of failure.
1144 */
1145int
1146htmlSaveFile(const char *filename, xmlDocPtr cur) {
1147 xmlOutputBufferPtr buf;
1148 xmlCharEncodingHandlerPtr handler = NULL;
1149 const char *encoding;
1150 int ret;
1151
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001152 if ((cur == NULL) || (filename == NULL))
1153 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001154
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001155 xmlInitParser();
1156
Owen Taylor3473f882001-02-23 17:55:21 +00001157 encoding = (const char *) htmlGetMetaEncoding(cur);
1158
1159 if (encoding != NULL) {
1160 xmlCharEncoding enc;
1161
1162 enc = xmlParseCharEncoding(encoding);
1163 if (enc != cur->charset) {
1164 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1165 /*
1166 * Not supported yet
1167 */
1168 return(-1);
1169 }
1170
1171 handler = xmlFindCharEncodingHandler(encoding);
1172 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001173 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001174 }
1175 }
1176
1177 /*
1178 * Fallback to HTML or ASCII when the encoding is unspecified
1179 */
1180 if (handler == NULL)
1181 handler = xmlFindCharEncodingHandler("HTML");
1182 if (handler == NULL)
1183 handler = xmlFindCharEncodingHandler("ascii");
1184
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001185 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001186 * save the content to a temp buffer.
1187 */
1188 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1189 if (buf == NULL) return(0);
1190
1191 htmlDocContentDumpOutput(buf, cur, NULL);
1192
1193 ret = xmlOutputBufferClose(buf);
1194 return(ret);
1195}
1196
1197/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001198 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001199 * @filename: the filename
1200 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001201 * @format: should formatting spaces been added
1202 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001203 *
1204 * Dump an HTML document to a file using a given encoding.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001205 *
Owen Taylor3473f882001-02-23 17:55:21 +00001206 * returns: the number of byte written or -1 in case of failure.
1207 */
1208int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001209htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1210 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001211 xmlOutputBufferPtr buf;
1212 xmlCharEncodingHandlerPtr handler = NULL;
1213 int ret;
1214
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001215 if ((cur == NULL) || (filename == NULL))
1216 return(-1);
Daniel Veillard8d7c1b72009-08-12 23:03:23 +02001217
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001218 xmlInitParser();
1219
Owen Taylor3473f882001-02-23 17:55:21 +00001220 if (encoding != NULL) {
1221 xmlCharEncoding enc;
1222
1223 enc = xmlParseCharEncoding(encoding);
1224 if (enc != cur->charset) {
1225 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1226 /*
1227 * Not supported yet
1228 */
1229 return(-1);
1230 }
1231
1232 handler = xmlFindCharEncodingHandler(encoding);
1233 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001234 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001235 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +02001236 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
Daniel Veillard4dd93462001-04-02 15:16:19 +00001237 } else {
1238 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001239 }
1240
1241 /*
1242 * Fallback to HTML or ASCII when the encoding is unspecified
1243 */
1244 if (handler == NULL)
1245 handler = xmlFindCharEncodingHandler("HTML");
1246 if (handler == NULL)
1247 handler = xmlFindCharEncodingHandler("ascii");
1248
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001249 /*
Owen Taylor3473f882001-02-23 17:55:21 +00001250 * save the content to a temp buffer.
1251 */
1252 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1253 if (buf == NULL) return(0);
1254
Daniel Veillard95d845f2001-06-13 13:48:46 +00001255 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001256
1257 ret = xmlOutputBufferClose(buf);
1258 return(ret);
1259}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001260
1261/**
1262 * htmlSaveFileEnc:
1263 * @filename: the filename
1264 * @cur: the document
1265 * @encoding: the document encoding
1266 *
1267 * Dump an HTML document to a file using a given encoding
1268 * and formatting returns/spaces are added.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001269 *
Daniel Veillard95d845f2001-06-13 13:48:46 +00001270 * returns: the number of byte written or -1 in case of failure.
1271 */
1272int
1273htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1274 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1275}
1276
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001277#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardc084e472002-08-12 13:27:28 +00001278
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001279#define bottom_HTMLtree
1280#include "elfgcchack.h"
Owen Taylor3473f882001-02-23 17:55:21 +00001281#endif /* LIBXML_HTML_ENABLED */