blob: 5d0893b8792bfda6a67cd8c1325cf99769cd5853 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Daniel Veillard8db67d22002-11-27 19:39:27 +000014#include <string.h> /* for memset() only ! */
15
Owen Taylor3473f882001-02-23 17:55:21 +000016#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000030#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000031#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000032
33/************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc: the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
52
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
56
57 /*
58 * Search the html
59 */
60 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000061 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000062 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
68 }
69 cur = cur->next;
70 }
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
74
75 /*
76 * Search the head
77 */
78 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000079 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000080 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89found_head:
90 cur = cur->children;
91
92 /*
93 * Search the meta elements
94 */
95found_meta:
96 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000097 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000098 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
102
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000109 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
Daniel Veillard39d027c2012-05-11 12:38:23 +0800154 *
Owen Taylor3473f882001-02-23 17:55:21 +0000155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200163 htmlNodePtr cur, meta = NULL, head = NULL;
164 const xmlChar *content = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000165 char newcontent[100];
166
Daniel Veillard39d027c2012-05-11 12:38:23 +0800167 newcontent[0] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000168
169 if (doc == NULL)
170 return(-1);
171
Daniel Veillard74eb54b2009-08-12 15:59:01 +0200172 /* html isn't a real encoding it's just libxml2 way to get entities */
173 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
174 return(-1);
175
Owen Taylor3473f882001-02-23 17:55:21 +0000176 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000177 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
William M. Brack13dfa872004-09-18 04:52:08 +0000178 (char *)encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000179 newcontent[sizeof(newcontent) - 1] = 0;
180 }
181
182 cur = doc->children;
183
184 /*
185 * Search the html
186 */
187 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000188 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000189 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
190 break;
191 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
192 goto found_head;
193 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
194 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000195 }
196 cur = cur->next;
197 }
198 if (cur == NULL)
199 return(-1);
200 cur = cur->children;
201
202 /*
203 * Search the head
204 */
205 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000206 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000207 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
208 break;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200209 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
210 head = cur->parent;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000211 goto found_meta;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200212 }
Owen Taylor3473f882001-02-23 17:55:21 +0000213 }
214 cur = cur->next;
215 }
216 if (cur == NULL)
217 return(-1);
218found_head:
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200219 head = cur;
220 if (cur->children == NULL)
221 goto create;
Owen Taylor3473f882001-02-23 17:55:21 +0000222 cur = cur->children;
223
224found_meta:
Owen Taylor3473f882001-02-23 17:55:21 +0000225 /*
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200226 * Search and update all the remaining the meta elements carrying
Owen Taylor3473f882001-02-23 17:55:21 +0000227 * encoding informations
228 */
229 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000230 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000232 xmlAttrPtr attr = cur->properties;
233 int http;
234 const xmlChar *value;
235
236 content = NULL;
237 http = 0;
238 while (attr != NULL) {
239 if ((attr->children != NULL) &&
240 (attr->children->type == XML_TEXT_NODE) &&
241 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000243 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
244 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
245 http = 1;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200246 else
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000247 {
Daniel Veillard39d027c2012-05-11 12:38:23 +0800248 if ((value != NULL) &&
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200249 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
250 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000251 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000252 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000253 break;
254 }
255 attr = attr->next;
256 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000257 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000258 meta = cur;
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200259 break;
Owen Taylor3473f882001-02-23 17:55:21 +0000260 }
261
262 }
263 }
264 cur = cur->next;
265 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200266create:
267 if (meta == NULL) {
268 if ((encoding != NULL) && (head != NULL)) {
269 /*
270 * Create a new Meta element with the right attributes
271 */
272
273 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
274 if (head->children == NULL)
275 xmlAddChild(head, meta);
276 else
277 xmlAddPrevSibling(head->children, meta);
278 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
279 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
280 }
281 } else {
Daniel Veillard39d027c2012-05-11 12:38:23 +0800282 /* remove the meta tag if NULL is passed */
283 if (encoding == NULL) {
284 xmlUnlinkNode(meta);
285 xmlFreeNode(meta);
286 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200287 /* change the document only if there is a real encoding change */
Daniel Veillard39d027c2012-05-11 12:38:23 +0800288 else if (xmlStrcasestr(content, encoding) == NULL) {
Daniel Veillard8d7c1b72009-08-12 23:03:23 +0200289 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
290 }
291 }
292
293
Owen Taylor3473f882001-02-23 17:55:21 +0000294 return(0);
295}
296
Daniel Veillardc084e472002-08-12 13:27:28 +0000297/**
298 * booleanHTMLAttrs:
299 *
300 * These are the HTML attributes which will be output
301 * in minimized form, i.e. <option selected="selected"> will be
302 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
303 *
304 */
305static const char* htmlBooleanAttrs[] = {
306 "checked", "compact", "declare", "defer", "disabled", "ismap",
307 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
308 "selected", NULL
309};
310
311
312/**
313 * htmlIsBooleanAttr:
314 * @name: the name of the attribute to check
315 *
316 * Determine if a given attribute is a boolean attribute.
317 *
318 * returns: false if the attribute is not boolean, true otherwise.
319 */
320int
321htmlIsBooleanAttr(const xmlChar *name)
322{
323 int i = 0;
324
325 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000326 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000327 return 1;
328 i++;
329 }
330 return 0;
331}
332
Daniel Veillarda9cce9c2003-09-29 13:20:24 +0000333#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardda3fee42008-09-01 13:08:57 +0000334/*
335 * private routine exported from xmlIO.c
336 */
337xmlOutputBufferPtr
338xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
Owen Taylor3473f882001-02-23 17:55:21 +0000339/************************************************************************
340 * *
Daniel Veillarde2238d52003-10-09 13:14:55 +0000341 * Output error handlers *
342 * *
343 ************************************************************************/
344/**
345 * htmlSaveErrMemory:
346 * @extra: extra informations
347 *
348 * Handle an out of memory condition
349 */
350static void
351htmlSaveErrMemory(const char *extra)
352{
353 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
354}
355
356/**
357 * htmlSaveErr:
358 * @code: the error number
359 * @node: the location of the error.
360 * @extra: extra informations
361 *
362 * Handle an out of memory condition
363 */
364static void
365htmlSaveErr(int code, xmlNodePtr node, const char *extra)
366{
367 const char *msg = NULL;
368
369 switch(code) {
370 case XML_SAVE_NOT_UTF8:
Rob Richards417b74d2006-08-15 23:14:24 +0000371 msg = "string is not in UTF-8\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000372 break;
373 case XML_SAVE_CHAR_INVALID:
Rob Richards417b74d2006-08-15 23:14:24 +0000374 msg = "invalid character value\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000375 break;
376 case XML_SAVE_UNKNOWN_ENCODING:
Rob Richards417b74d2006-08-15 23:14:24 +0000377 msg = "unknown encoding %s\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000378 break;
379 case XML_SAVE_NO_DOCTYPE:
Rob Richards417b74d2006-08-15 23:14:24 +0000380 msg = "HTML has no DOCTYPE\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000381 break;
382 default:
Rob Richards417b74d2006-08-15 23:14:24 +0000383 msg = "unexpected error number\n";
Daniel Veillarde2238d52003-10-09 13:14:55 +0000384 }
385 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
386}
387
388/************************************************************************
389 * *
Owen Taylor3473f882001-02-23 17:55:21 +0000390 * Dumping HTML tree content to a simple buffer *
391 * *
392 ************************************************************************/
393
Daniel Veillard8db67d22002-11-27 19:39:27 +0000394static int
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000395htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
396 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000397
398/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000399 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000400 * @buf: the HTML buffer output
401 * @doc: the document
402 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000403 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000404 *
405 * Dump an HTML node, recursive behaviour,children are printed too.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000406 *
407 * Returns the number of byte written or -1 in case of error
Owen Taylor3473f882001-02-23 17:55:21 +0000408 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000409static int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000410htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
411 int format) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000412 unsigned int use;
413 int ret;
414 xmlOutputBufferPtr outbuf;
Owen Taylor3473f882001-02-23 17:55:21 +0000415
416 if (cur == NULL) {
Daniel Veillard8db67d22002-11-27 19:39:27 +0000417 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000418 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000419 if (buf == NULL) {
420 return (-1);
Owen Taylor3473f882001-02-23 17:55:21 +0000421 }
Daniel Veillard8db67d22002-11-27 19:39:27 +0000422 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
423 if (outbuf == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000424 htmlSaveErrMemory("allocating HTML output buffer");
Daniel Veillard8db67d22002-11-27 19:39:27 +0000425 return (-1);
426 }
427 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
428 outbuf->buffer = buf;
429 outbuf->encoder = NULL;
430 outbuf->writecallback = NULL;
431 outbuf->closecallback = NULL;
432 outbuf->context = NULL;
433 outbuf->written = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000434
Daniel Veillard8db67d22002-11-27 19:39:27 +0000435 use = buf->use;
436 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
437 xmlFree(outbuf);
438 ret = buf->use - use;
439 return (ret);
Owen Taylor3473f882001-02-23 17:55:21 +0000440}
441
442/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000443 * htmlNodeDump:
444 * @buf: the HTML buffer output
445 * @doc: the document
446 * @cur: the current node
447 *
448 * Dump an HTML node, recursive behaviour,children are printed too,
449 * and formatting returns are added.
Daniel Veillard8db67d22002-11-27 19:39:27 +0000450 *
451 * Returns the number of byte written or -1 in case of error
Daniel Veillard95d845f2001-06-13 13:48:46 +0000452 */
Daniel Veillard8db67d22002-11-27 19:39:27 +0000453int
Daniel Veillard95d845f2001-06-13 13:48:46 +0000454htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000455 xmlInitParser();
456
Daniel Veillard8db67d22002-11-27 19:39:27 +0000457 return(htmlNodeDumpFormat(buf, doc, cur, 1));
Daniel Veillard95d845f2001-06-13 13:48:46 +0000458}
459
460/**
461 * htmlNodeDumpFileFormat:
462 * @out: the FILE pointer
463 * @doc: the document
464 * @cur: the current node
465 * @encoding: the document encoding
466 * @format: should formatting spaces been added
467 *
468 * Dump an HTML node, recursive behaviour,children are printed too.
469 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000470 * TODO: if encoding == NULL try to save in the doc encoding
471 *
472 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000473 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000474int
475htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
476 xmlNodePtr cur, const char *encoding, int format) {
477 xmlOutputBufferPtr buf;
478 xmlCharEncodingHandlerPtr handler = NULL;
479 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000480
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000481 xmlInitParser();
482
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000483 if (encoding != NULL) {
484 xmlCharEncoding enc;
485
486 enc = xmlParseCharEncoding(encoding);
487 if (enc != XML_CHAR_ENCODING_UTF8) {
488 handler = xmlFindCharEncodingHandler(encoding);
489 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +0800490 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000491 }
492 }
493
494 /*
495 * Fallback to HTML or ASCII when the encoding is unspecified
496 */
497 if (handler == NULL)
498 handler = xmlFindCharEncodingHandler("HTML");
499 if (handler == NULL)
500 handler = xmlFindCharEncodingHandler("ascii");
501
502 /*
503 * save the content to a temp buffer.
504 */
505 buf = xmlOutputBufferCreateFile(out, handler);
506 if (buf == NULL) return(0);
507
508 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
509
510 ret = xmlOutputBufferClose(buf);
511 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000512}
513
514/**
Owen Taylor3473f882001-02-23 17:55:21 +0000515 * htmlNodeDumpFile:
516 * @out: the FILE pointer
517 * @doc: the document
518 * @cur: the current node
519 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000520 * Dump an HTML node, recursive behaviour,children are printed too,
521 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000522 */
523void
524htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000525 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000526}
527
528/**
Rob Richards77b92ff2005-12-20 15:55:14 +0000529 * htmlDocDumpMemoryFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000530 * @cur: the document
531 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000532 * @size: OUT: the memory length
Rob Richards77b92ff2005-12-20 15:55:14 +0000533 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000534 *
535 * Dump an HTML document in memory and return the xmlChar * and it's size.
536 * It's up to the caller to free the memory.
537 */
538void
Rob Richards77b92ff2005-12-20 15:55:14 +0000539htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000540 xmlOutputBufferPtr buf;
541 xmlCharEncodingHandlerPtr handler = NULL;
542 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000543
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000544 xmlInitParser();
545
Daniel Veillardd5cc0f72004-11-06 19:24:28 +0000546 if ((mem == NULL) || (size == NULL))
547 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000548 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000549 *mem = NULL;
550 *size = 0;
551 return;
552 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000553
554 encoding = (const char *) htmlGetMetaEncoding(cur);
555
556 if (encoding != NULL) {
557 xmlCharEncoding enc;
558
559 enc = xmlParseCharEncoding(encoding);
560 if (enc != cur->charset) {
561 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
562 /*
563 * Not supported yet
564 */
565 *mem = NULL;
566 *size = 0;
567 return;
568 }
569
570 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillardc62efc82011-05-16 16:03:50 +0800571 if (handler == NULL)
572 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
573
Daniel Veillardb8c80162005-08-08 13:46:45 +0000574 } else {
575 handler = xmlFindCharEncodingHandler(encoding);
Daniel Veillard2d703722001-05-30 18:32:34 +0000576 }
577 }
578
579 /*
580 * Fallback to HTML or ASCII when the encoding is unspecified
581 */
582 if (handler == NULL)
583 handler = xmlFindCharEncodingHandler("HTML");
584 if (handler == NULL)
585 handler = xmlFindCharEncodingHandler("ascii");
586
Daniel Veillardda3fee42008-09-01 13:08:57 +0000587 buf = xmlAllocOutputBufferInternal(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000588 if (buf == NULL) {
589 *mem = NULL;
590 *size = 0;
591 return;
592 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000593
Daniel Veillardc62efc82011-05-16 16:03:50 +0800594 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
Rob Richards77b92ff2005-12-20 15:55:14 +0000595
Daniel Veillard2d703722001-05-30 18:32:34 +0000596 xmlOutputBufferFlush(buf);
597 if (buf->conv != NULL) {
598 *size = buf->conv->use;
599 *mem = xmlStrndup(buf->conv->content, *size);
600 } else {
601 *size = buf->buffer->use;
602 *mem = xmlStrndup(buf->buffer->content, *size);
603 }
604 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000605}
606
Rob Richards77b92ff2005-12-20 15:55:14 +0000607/**
608 * htmlDocDumpMemory:
609 * @cur: the document
610 * @mem: OUT: the memory pointer
611 * @size: OUT: the memory length
612 *
613 * Dump an HTML document in memory and return the xmlChar * and it's size.
614 * It's up to the caller to free the memory.
615 */
616void
617htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
618 htmlDocDumpMemoryFormat(cur, mem, size, 1);
619}
620
Owen Taylor3473f882001-02-23 17:55:21 +0000621
622/************************************************************************
623 * *
624 * Dumping HTML tree content to an I/O output buffer *
625 * *
626 ************************************************************************/
627
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000628void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
Daniel Veillardc084e472002-08-12 13:27:28 +0000629
Owen Taylor3473f882001-02-23 17:55:21 +0000630/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000631 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000632 * @buf: the HTML buffer output
633 * @doc: the document
634 * @encoding: the encoding string
635 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000636 * TODO: check whether encoding is needed
637 *
Owen Taylor3473f882001-02-23 17:55:21 +0000638 * Dump the HTML document DTD, if any.
639 */
640static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000641htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000642 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000643 xmlDtdPtr cur = doc->intSubset;
644
645 if (cur == NULL) {
Daniel Veillarde2238d52003-10-09 13:14:55 +0000646 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000647 return;
648 }
649 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
650 xmlOutputBufferWriteString(buf, (const char *)cur->name);
651 if (cur->ExternalID != NULL) {
652 xmlOutputBufferWriteString(buf, " PUBLIC ");
653 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
654 if (cur->SystemID != NULL) {
655 xmlOutputBufferWriteString(buf, " ");
656 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
657 }
658 } else if (cur->SystemID != NULL) {
659 xmlOutputBufferWriteString(buf, " SYSTEM ");
660 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
661 }
662 xmlOutputBufferWriteString(buf, ">\n");
663}
664
665/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000666 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000667 * @buf: the HTML buffer output
668 * @doc: the document
669 * @cur: the attribute pointer
670 * @encoding: the encoding string
671 *
672 * Dump an HTML attribute
673 */
674static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000675htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000676 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000677 xmlChar *value;
678
Daniel Veillardeca60d02001-06-13 07:45:41 +0000679 /*
680 * TODO: The html output method should not escape a & character
681 * occurring in an attribute value immediately followed by
682 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
683 */
684
Owen Taylor3473f882001-02-23 17:55:21 +0000685 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000686 return;
687 }
688 xmlOutputBufferWriteString(buf, " ");
William M. Brack3a6da762003-09-15 04:58:14 +0000689 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
690 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
691 xmlOutputBufferWriteString(buf, ":");
692 }
Owen Taylor3473f882001-02-23 17:55:21 +0000693 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000694 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000695 value = xmlNodeListGetString(doc, cur->children, 0);
696 if (value) {
697 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardc7e9b192003-03-27 14:08:24 +0000698 if ((cur->ns == NULL) && (cur->parent != NULL) &&
699 (cur->parent->ns == NULL) &&
700 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
701 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
Daniel Veillardaa9a9832005-03-29 20:30:17 +0000702 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
703 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
704 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000705 xmlChar *escaped;
706 xmlChar *tmp = value;
707
William M. Brack76e95df2003-10-18 16:20:14 +0000708 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillardeb475a32002-04-14 22:00:22 +0000709
Daniel Veillard5f5b7bb2003-05-16 17:19:40 +0000710 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000711 if (escaped != NULL) {
712 xmlBufferWriteQuotedString(buf->buffer, escaped);
713 xmlFree(escaped);
714 } else {
715 xmlBufferWriteQuotedString(buf->buffer, value);
716 }
717 } else {
718 xmlBufferWriteQuotedString(buf->buffer, value);
719 }
Owen Taylor3473f882001-02-23 17:55:21 +0000720 xmlFree(value);
721 } else {
722 xmlOutputBufferWriteString(buf, "=\"\"");
723 }
724 }
725}
726
727/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000728 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000729 * @buf: the HTML buffer output
730 * @doc: the document
731 * @cur: the first attribute pointer
732 * @encoding: the encoding string
733 *
734 * Dump a list of HTML attributes
735 */
736static void
737htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
738 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000739 return;
740 }
741 while (cur != NULL) {
742 htmlAttrDumpOutput(buf, doc, cur, encoding);
743 cur = cur->next;
744 }
745}
746
747
Owen Taylor3473f882001-02-23 17:55:21 +0000748
749/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000750 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000751 * @buf: the HTML buffer output
752 * @doc: the document
753 * @cur: the first node
754 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000755 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000756 *
757 * Dump an HTML node list, recursive behaviour,children are printed too.
758 */
759static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000760htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
761 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000762 if (cur == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000763 return;
764 }
765 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000766 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000767 cur = cur->next;
768 }
769}
770
771/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000772 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000773 * @buf: the HTML buffer output
774 * @doc: the document
775 * @cur: the current node
776 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000777 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000778 *
779 * Dump an HTML node, recursive behaviour,children are printed too.
780 */
781void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000782htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
783 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000784 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000785
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000786 xmlInitParser();
787
Daniel Veillardce244ad2004-11-05 10:03:46 +0000788 if ((cur == NULL) || (buf == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000789 return;
790 }
791 /*
792 * Special cases.
793 */
794 if (cur->type == XML_DTD_NODE)
795 return;
Daniel Veillardce244ad2004-11-05 10:03:46 +0000796 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
797 (cur->type == XML_DOCUMENT_NODE)){
Owen Taylor3473f882001-02-23 17:55:21 +0000798 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
799 return;
800 }
Daniel Veillardfcd02ad2007-06-12 09:49:40 +0000801 if (cur->type == XML_ATTRIBUTE_NODE) {
802 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
803 return;
804 }
Owen Taylor3473f882001-02-23 17:55:21 +0000805 if (cur->type == HTML_TEXT_NODE) {
806 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000807 if (((cur->name == (const xmlChar *)xmlStringText) ||
808 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000809 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000810 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
811 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000812 xmlChar *buffer;
813
Owen Taylor3473f882001-02-23 17:55:21 +0000814 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000815 if (buffer != NULL) {
816 xmlOutputBufferWriteString(buf, (const char *)buffer);
817 xmlFree(buffer);
818 }
819 } else {
820 xmlOutputBufferWriteString(buf, (const char *)cur->content);
821 }
822 }
823 return;
824 }
825 if (cur->type == HTML_COMMENT_NODE) {
826 if (cur->content != NULL) {
827 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000828 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000829 xmlOutputBufferWriteString(buf, "-->");
830 }
831 return;
832 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000833 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000834 if (cur->name == NULL)
835 return;
836 xmlOutputBufferWriteString(buf, "<?");
837 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000838 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000839 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000840 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000841 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000842 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000843 return;
844 }
Owen Taylor3473f882001-02-23 17:55:21 +0000845 if (cur->type == HTML_ENTITY_REF_NODE) {
846 xmlOutputBufferWriteString(buf, "&");
847 xmlOutputBufferWriteString(buf, (const char *)cur->name);
848 xmlOutputBufferWriteString(buf, ";");
849 return;
850 }
851 if (cur->type == HTML_PRESERVE_NODE) {
852 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000853 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000854 }
855 return;
856 }
857
858 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000859 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000860 */
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000861 if (cur->ns == NULL)
862 info = htmlTagLookup(cur->name);
863 else
864 info = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000865
866 xmlOutputBufferWriteString(buf, "<");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000867 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
868 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
869 xmlOutputBufferWriteString(buf, ":");
870 }
Owen Taylor3473f882001-02-23 17:55:21 +0000871 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000872 if (cur->nsDef)
873 xmlNsListDumpOutput(buf, cur->nsDef);
Owen Taylor3473f882001-02-23 17:55:21 +0000874 if (cur->properties != NULL)
875 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
876
877 if ((info != NULL) && (info->empty)) {
878 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000879 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000880 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000881 (cur->next->type != HTML_ENTITY_REF_NODE) &&
882 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000883 (cur->parent->name != NULL) &&
884 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000885 xmlOutputBufferWriteString(buf, "\n");
886 }
887 return;
888 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000889 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
890 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000891 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000892 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
893 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000894 xmlOutputBufferWriteString(buf, ">");
895 } else {
896 xmlOutputBufferWriteString(buf, "></");
Daniel Veillard645c6902003-04-10 21:40:49 +0000897 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
898 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
899 xmlOutputBufferWriteString(buf, ":");
900 }
Owen Taylor3473f882001-02-23 17:55:21 +0000901 xmlOutputBufferWriteString(buf, (const char *)cur->name);
902 xmlOutputBufferWriteString(buf, ">");
903 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000904 if ((format) && (cur->next != NULL) &&
905 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000906 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +0000907 (cur->next->type != HTML_ENTITY_REF_NODE) &&
908 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000909 (cur->parent->name != NULL) &&
910 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000911 xmlOutputBufferWriteString(buf, "\n");
912 }
913 return;
914 }
915 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000916 if ((cur->type != XML_ELEMENT_NODE) &&
917 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000918 /*
919 * Uses the OutputBuffer property to automatically convert
920 * invalids to charrefs
921 */
922
Owen Taylor3473f882001-02-23 17:55:21 +0000923 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000924 }
925 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000926 if ((format) && (info != NULL) && (!info->isinline) &&
927 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000928 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000929 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000930 (cur->name != NULL) &&
931 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000932 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000933 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000934 if ((format) && (info != NULL) && (!info->isinline) &&
935 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000936 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000937 (cur->children != cur->last) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000938 (cur->name != NULL) &&
939 (cur->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000940 xmlOutputBufferWriteString(buf, "\n");
941 }
Owen Taylor3473f882001-02-23 17:55:21 +0000942 xmlOutputBufferWriteString(buf, "</");
Daniel Veillard5ecaf7f2003-01-09 13:19:33 +0000943 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
944 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
945 xmlOutputBufferWriteString(buf, ":");
946 }
Owen Taylor3473f882001-02-23 17:55:21 +0000947 xmlOutputBufferWriteString(buf, (const char *)cur->name);
948 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000949 if ((format) && (info != NULL) && (!info->isinline) &&
950 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000951 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +0000952 (cur->next->type != HTML_ENTITY_REF_NODE) &&
953 (cur->parent != NULL) &&
Daniel Veillard42fd4122003-11-04 08:47:48 +0000954 (cur->parent->name != NULL) &&
955 (cur->parent->name[0] != 'p')) /* p, pre, param */
Owen Taylor3473f882001-02-23 17:55:21 +0000956 xmlOutputBufferWriteString(buf, "\n");
957 }
958}
959
960/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000961 * htmlNodeDumpOutput:
962 * @buf: the HTML buffer output
963 * @doc: the document
964 * @cur: the current node
965 * @encoding: the encoding string
966 *
967 * Dump an HTML node, recursive behaviour,children are printed too,
968 * and formatting returns/spaces are added.
969 */
970void
971htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
972 xmlNodePtr cur, const char *encoding) {
973 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
974}
975
976/**
977 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000978 * @buf: the HTML buffer output
979 * @cur: the document
980 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +0000981 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000982 *
983 * Dump an HTML document.
984 */
985void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000986htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
987 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000988 int type;
989
Daniel Veillard70bcb0e2003-08-08 14:00:28 +0000990 xmlInitParser();
991
Daniel Veillard3d97e662004-11-04 10:49:00 +0000992 if ((buf == NULL) || (cur == NULL))
993 return;
994
Owen Taylor3473f882001-02-23 17:55:21 +0000995 /*
996 * force to output the stuff as HTML, especially for entities
997 */
998 type = cur->type;
999 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001000 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001001 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001002 }
1003 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001004 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001005 }
1006 xmlOutputBufferWriteString(buf, "\n");
1007 cur->type = (xmlElementType) type;
1008}
1009
Daniel Veillard95d845f2001-06-13 13:48:46 +00001010/**
1011 * htmlDocContentDumpOutput:
1012 * @buf: the HTML buffer output
1013 * @cur: the document
1014 * @encoding: the encoding string
1015 *
1016 * Dump an HTML document. Formating return/spaces are added.
1017 */
1018void
1019htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1020 const char *encoding) {
1021 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1022}
1023
Owen Taylor3473f882001-02-23 17:55:21 +00001024/************************************************************************
1025 * *
1026 * Saving functions front-ends *
1027 * *
1028 ************************************************************************/
1029
1030/**
1031 * htmlDocDump:
1032 * @f: the FILE*
1033 * @cur: the document
1034 *
1035 * Dump an HTML document to an open FILE.
1036 *
1037 * returns: the number of byte written or -1 in case of failure.
1038 */
1039int
1040htmlDocDump(FILE *f, xmlDocPtr cur) {
1041 xmlOutputBufferPtr buf;
1042 xmlCharEncodingHandlerPtr handler = NULL;
1043 const char *encoding;
1044 int ret;
1045
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001046 xmlInitParser();
1047
Daniel Veillard3d97e662004-11-04 10:49:00 +00001048 if ((cur == NULL) || (f == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001049 return(-1);
1050 }
1051
1052 encoding = (const char *) htmlGetMetaEncoding(cur);
1053
1054 if (encoding != NULL) {
1055 xmlCharEncoding enc;
1056
1057 enc = xmlParseCharEncoding(encoding);
1058 if (enc != cur->charset) {
1059 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1060 /*
1061 * Not supported yet
1062 */
1063 return(-1);
1064 }
1065
1066 handler = xmlFindCharEncodingHandler(encoding);
1067 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001068 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Daniel Veillardb8c80162005-08-08 13:46:45 +00001069 } else {
1070 handler = xmlFindCharEncodingHandler(encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001071 }
1072 }
1073
1074 /*
1075 * Fallback to HTML or ASCII when the encoding is unspecified
1076 */
1077 if (handler == NULL)
1078 handler = xmlFindCharEncodingHandler("HTML");
1079 if (handler == NULL)
1080 handler = xmlFindCharEncodingHandler("ascii");
1081
1082 buf = xmlOutputBufferCreateFile(f, handler);
1083 if (buf == NULL) return(-1);
1084 htmlDocContentDumpOutput(buf, cur, NULL);
1085
1086 ret = xmlOutputBufferClose(buf);
1087 return(ret);
1088}
1089
1090/**
1091 * htmlSaveFile:
1092 * @filename: the filename (or URL)
1093 * @cur: the document
1094 *
1095 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1096 * used.
1097 * returns: the number of byte written or -1 in case of failure.
1098 */
1099int
1100htmlSaveFile(const char *filename, xmlDocPtr cur) {
1101 xmlOutputBufferPtr buf;
1102 xmlCharEncodingHandlerPtr handler = NULL;
1103 const char *encoding;
1104 int ret;
1105
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001106 if ((cur == NULL) || (filename == NULL))
1107 return(-1);
1108
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001109 xmlInitParser();
1110
Owen Taylor3473f882001-02-23 17:55:21 +00001111 encoding = (const char *) htmlGetMetaEncoding(cur);
1112
1113 if (encoding != NULL) {
1114 xmlCharEncoding enc;
1115
1116 enc = xmlParseCharEncoding(encoding);
1117 if (enc != cur->charset) {
1118 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1119 /*
1120 * Not supported yet
1121 */
1122 return(-1);
1123 }
1124
1125 handler = xmlFindCharEncodingHandler(encoding);
1126 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001127 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001128 }
1129 }
1130
1131 /*
1132 * Fallback to HTML or ASCII when the encoding is unspecified
1133 */
1134 if (handler == NULL)
1135 handler = xmlFindCharEncodingHandler("HTML");
1136 if (handler == NULL)
1137 handler = xmlFindCharEncodingHandler("ascii");
1138
1139 /*
1140 * save the content to a temp buffer.
1141 */
1142 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1143 if (buf == NULL) return(0);
1144
1145 htmlDocContentDumpOutput(buf, cur, NULL);
1146
1147 ret = xmlOutputBufferClose(buf);
1148 return(ret);
1149}
1150
1151/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001152 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001153 * @filename: the filename
1154 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001155 * @format: should formatting spaces been added
1156 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001157 *
1158 * Dump an HTML document to a file using a given encoding.
1159 *
1160 * returns: the number of byte written or -1 in case of failure.
1161 */
1162int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001163htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1164 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001165 xmlOutputBufferPtr buf;
1166 xmlCharEncodingHandlerPtr handler = NULL;
1167 int ret;
1168
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001169 if ((cur == NULL) || (filename == NULL))
1170 return(-1);
Daniel Veillard8d7c1b72009-08-12 23:03:23 +02001171
Daniel Veillard70bcb0e2003-08-08 14:00:28 +00001172 xmlInitParser();
1173
Owen Taylor3473f882001-02-23 17:55:21 +00001174 if (encoding != NULL) {
1175 xmlCharEncoding enc;
1176
1177 enc = xmlParseCharEncoding(encoding);
1178 if (enc != cur->charset) {
1179 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1180 /*
1181 * Not supported yet
1182 */
1183 return(-1);
1184 }
1185
1186 handler = xmlFindCharEncodingHandler(encoding);
1187 if (handler == NULL)
Daniel Veillardc62efc82011-05-16 16:03:50 +08001188 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
Owen Taylor3473f882001-02-23 17:55:21 +00001189 }
Daniel Veillard8d7c1b72009-08-12 23:03:23 +02001190 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
Daniel Veillard4dd93462001-04-02 15:16:19 +00001191 } else {
1192 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001193 }
1194
1195 /*
1196 * Fallback to HTML or ASCII when the encoding is unspecified
1197 */
1198 if (handler == NULL)
1199 handler = xmlFindCharEncodingHandler("HTML");
1200 if (handler == NULL)
1201 handler = xmlFindCharEncodingHandler("ascii");
1202
1203 /*
1204 * save the content to a temp buffer.
1205 */
1206 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1207 if (buf == NULL) return(0);
1208
Daniel Veillard95d845f2001-06-13 13:48:46 +00001209 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001210
1211 ret = xmlOutputBufferClose(buf);
1212 return(ret);
1213}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001214
1215/**
1216 * htmlSaveFileEnc:
1217 * @filename: the filename
1218 * @cur: the document
1219 * @encoding: the document encoding
1220 *
1221 * Dump an HTML document to a file using a given encoding
1222 * and formatting returns/spaces are added.
1223 *
1224 * returns: the number of byte written or -1 in case of failure.
1225 */
1226int
1227htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1228 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1229}
1230
Daniel Veillarda9cce9c2003-09-29 13:20:24 +00001231#endif /* LIBXML_OUTPUT_ENABLED */
Daniel Veillardc084e472002-08-12 13:27:28 +00001232
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001233#define bottom_HTMLtree
1234#include "elfgcchack.h"
Owen Taylor3473f882001-02-23 17:55:21 +00001235#endif /* LIBXML_HTML_ENABLED */