blob: 7650184be73779c3ec4df47c417af14b6fdca4f5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#ifdef HAVE_CTYPE_H
15#include <ctype.h>
16#endif
17#ifdef HAVE_STDLIB_H
18#include <stdlib.h>
19#endif
20
21#include <libxml/xmlmemory.h>
22#include <libxml/HTMLparser.h>
23#include <libxml/HTMLtree.h>
24#include <libxml/entities.h>
25#include <libxml/valid.h>
26#include <libxml/xmlerror.h>
27#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000028#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000029#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000030
31/************************************************************************
32 * *
33 * Getting/Setting encoding meta tags *
34 * *
35 ************************************************************************/
36
37/**
38 * htmlGetMetaEncoding:
39 * @doc: the document
40 *
41 * Encoding definition lookup in the Meta tags
42 *
43 * Returns the current encoding as flagged in the HTML source
44 */
45const xmlChar *
46htmlGetMetaEncoding(htmlDocPtr doc) {
47 htmlNodePtr cur;
48 const xmlChar *content;
49 const xmlChar *encoding;
50
51 if (doc == NULL)
52 return(NULL);
53 cur = doc->children;
54
55 /*
56 * Search the html
57 */
58 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000059 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000060 if (xmlStrEqual(cur->name, BAD_CAST"html"))
61 break;
62 if (xmlStrEqual(cur->name, BAD_CAST"head"))
63 goto found_head;
64 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
65 goto found_meta;
66 }
67 cur = cur->next;
68 }
69 if (cur == NULL)
70 return(NULL);
71 cur = cur->children;
72
73 /*
74 * Search the head
75 */
76 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000077 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000078 if (xmlStrEqual(cur->name, BAD_CAST"head"))
79 break;
80 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
81 goto found_meta;
82 }
83 cur = cur->next;
84 }
85 if (cur == NULL)
86 return(NULL);
87found_head:
88 cur = cur->children;
89
90 /*
91 * Search the meta elements
92 */
93found_meta:
94 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000095 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000096 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
97 xmlAttrPtr attr = cur->properties;
98 int http;
99 const xmlChar *value;
100
101 content = NULL;
102 http = 0;
103 while (attr != NULL) {
104 if ((attr->children != NULL) &&
105 (attr->children->type == XML_TEXT_NODE) &&
106 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000107 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000108 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
109 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
110 http = 1;
111 else if ((value != NULL)
112 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
113 content = value;
114 if ((http != 0) && (content != NULL))
115 goto found_content;
116 }
117 attr = attr->next;
118 }
119 }
120 }
121 cur = cur->next;
122 }
123 return(NULL);
124
125found_content:
126 encoding = xmlStrstr(content, BAD_CAST"charset=");
127 if (encoding == NULL)
128 encoding = xmlStrstr(content, BAD_CAST"Charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
131 if (encoding != NULL) {
132 encoding += 8;
133 } else {
134 encoding = xmlStrstr(content, BAD_CAST"charset =");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"Charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
139 if (encoding != NULL)
140 encoding += 9;
141 }
142 if (encoding != NULL) {
143 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
144 }
145 return(encoding);
146}
147
148/**
149 * htmlSetMetaEncoding:
150 * @doc: the document
151 * @encoding: the encoding string
152 *
153 * Sets the current encoding in the Meta tags
154 * NOTE: this will not change the document content encoding, just
155 * the META flag associated.
156 *
157 * Returns 0 in case of success and -1 in case of error
158 */
159int
160htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
161 htmlNodePtr cur, meta;
162 const xmlChar *content;
163 char newcontent[100];
164
165
166 if (doc == NULL)
167 return(-1);
168
169 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000170 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
171 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000172 newcontent[sizeof(newcontent) - 1] = 0;
173 }
174
175 cur = doc->children;
176
177 /*
178 * Search the html
179 */
180 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000181 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000182 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
183 break;
184 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
185 goto found_head;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
187 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000188 }
189 cur = cur->next;
190 }
191 if (cur == NULL)
192 return(-1);
193 cur = cur->children;
194
195 /*
196 * Search the head
197 */
198 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000199 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000200 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
201 break;
202 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
203 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000204 }
205 cur = cur->next;
206 }
207 if (cur == NULL)
208 return(-1);
209found_head:
210 if (cur->children == NULL) {
211 if (encoding == NULL)
212 return(0);
213 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
214 xmlAddChild(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000215 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000216 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 return(0);
218 }
219 cur = cur->children;
220
221found_meta:
222 if (encoding != NULL) {
223 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000224 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
226
227 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
228 xmlAddPrevSibling(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000229 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000230 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 }
232
233 /*
234 * Search and destroy all the remaining the meta elements carrying
235 * encoding informations
236 */
237 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000238 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000239 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlAttrPtr attr = cur->properties;
241 int http;
242 const xmlChar *value;
243
244 content = NULL;
245 http = 0;
246 while (attr != NULL) {
247 if ((attr->children != NULL) &&
248 (attr->children->type == XML_TEXT_NODE) &&
249 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000250 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000251 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
252 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
253 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000254 else
255 {
256 if ((value != NULL) &&
257 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
258 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000259 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000260 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000261 break;
262 }
263 attr = attr->next;
264 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000265 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000266 meta = cur;
267 cur = cur->next;
268 xmlUnlinkNode(meta);
269 xmlFreeNode(meta);
270 continue;
271 }
272
273 }
274 }
275 cur = cur->next;
276 }
277 return(0);
278}
279
Daniel Veillardc084e472002-08-12 13:27:28 +0000280/**
281 * booleanHTMLAttrs:
282 *
283 * These are the HTML attributes which will be output
284 * in minimized form, i.e. <option selected="selected"> will be
285 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
286 *
287 */
288static const char* htmlBooleanAttrs[] = {
289 "checked", "compact", "declare", "defer", "disabled", "ismap",
290 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
291 "selected", NULL
292};
293
294
295/**
296 * htmlIsBooleanAttr:
297 * @name: the name of the attribute to check
298 *
299 * Determine if a given attribute is a boolean attribute.
300 *
301 * returns: false if the attribute is not boolean, true otherwise.
302 */
303int
304htmlIsBooleanAttr(const xmlChar *name)
305{
306 int i = 0;
307
308 while (htmlBooleanAttrs[i] != NULL) {
Daniel Veillardabe01742002-09-26 12:40:03 +0000309 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
Daniel Veillardc084e472002-08-12 13:27:28 +0000310 return 1;
311 i++;
312 }
313 return 0;
314}
315
Owen Taylor3473f882001-02-23 17:55:21 +0000316/************************************************************************
317 * *
318 * Dumping HTML tree content to a simple buffer *
319 * *
320 ************************************************************************/
321
322static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000323htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000324static void
325htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
326 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000327
328/**
329 * htmlDtdDump:
330 * @buf: the HTML buffer output
331 * @doc: the document
332 *
333 * Dump the HTML document DTD, if any.
334 */
335static void
336htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
337 xmlDtdPtr cur = doc->intSubset;
338
339 if (cur == NULL) {
340 xmlGenericError(xmlGenericErrorContext,
341 "htmlDtdDump : no internal subset\n");
342 return;
343 }
344 xmlBufferWriteChar(buf, "<!DOCTYPE ");
345 xmlBufferWriteCHAR(buf, cur->name);
346 if (cur->ExternalID != NULL) {
347 xmlBufferWriteChar(buf, " PUBLIC ");
348 xmlBufferWriteQuotedString(buf, cur->ExternalID);
349 if (cur->SystemID != NULL) {
350 xmlBufferWriteChar(buf, " ");
351 xmlBufferWriteQuotedString(buf, cur->SystemID);
352 }
353 } else if (cur->SystemID != NULL) {
354 xmlBufferWriteChar(buf, " SYSTEM ");
355 xmlBufferWriteQuotedString(buf, cur->SystemID);
356 }
357 xmlBufferWriteChar(buf, ">\n");
358}
359
360/**
361 * htmlAttrDump:
362 * @buf: the HTML buffer output
363 * @doc: the document
364 * @cur: the attribute pointer
365 *
366 * Dump an HTML attribute
367 */
368static void
369htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
370 xmlChar *value;
371
Daniel Veillardeca60d02001-06-13 07:45:41 +0000372 /*
373 * TODO: The html output method should not escape a & character
374 * occurring in an attribute value immediately followed by
375 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
376 */
377
Owen Taylor3473f882001-02-23 17:55:21 +0000378 if (cur == NULL) {
379 xmlGenericError(xmlGenericErrorContext,
380 "htmlAttrDump : property == NULL\n");
381 return;
382 }
383 xmlBufferWriteChar(buf, " ");
384 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000385 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000386 value = xmlNodeListGetString(doc, cur->children, 0);
387 if (value) {
388 xmlBufferWriteChar(buf, "=");
Daniel Veillardad11b302002-08-12 14:53:41 +0000389 if ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
390 (!xmlStrcasecmp(cur->name, BAD_CAST "src"))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000391 xmlChar *escaped;
392 xmlChar *tmp = value;
393
394 while (IS_BLANK(*tmp)) tmp++;
395
Daniel Veillard6231e842002-04-18 11:54:04 +0000396 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000397 if (escaped != NULL) {
398 xmlBufferWriteQuotedString(buf, escaped);
399 xmlFree(escaped);
400 } else {
401 xmlBufferWriteQuotedString(buf, value);
402 }
403 } else {
404 xmlBufferWriteQuotedString(buf, value);
405 }
Owen Taylor3473f882001-02-23 17:55:21 +0000406 xmlFree(value);
407 } else {
408 xmlBufferWriteChar(buf, "=\"\"");
409 }
410 }
411}
412
413/**
414 * htmlAttrListDump:
415 * @buf: the HTML buffer output
416 * @doc: the document
417 * @cur: the first attribute pointer
418 *
419 * Dump a list of HTML attributes
420 */
421static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000422htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
423 int i = 0;
424
Owen Taylor3473f882001-02-23 17:55:21 +0000425 if (cur == NULL) {
426 xmlGenericError(xmlGenericErrorContext,
427 "htmlAttrListDump : property == NULL\n");
428 return;
429 }
430 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000431 i++;
432 if ((format) && (i >= 5)) {
433 i = 0;
434 xmlBufferWriteChar(buf, "\n");
435 }
Owen Taylor3473f882001-02-23 17:55:21 +0000436 htmlAttrDump(buf, doc, cur);
437 cur = cur->next;
438 }
439}
440
Owen Taylor3473f882001-02-23 17:55:21 +0000441/**
442 * htmlNodeListDump:
443 * @buf: the HTML buffer output
444 * @doc: the document
445 * @cur: the first node
446 *
447 * Dump an HTML node list, recursive behaviour,children are printed too.
448 */
449static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000450htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000451 if (cur == NULL) {
452 xmlGenericError(xmlGenericErrorContext,
453 "htmlNodeListDump : node == NULL\n");
454 return;
455 }
456 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000457 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000458 cur = cur->next;
459 }
460}
461
462/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000463 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000464 * @buf: the HTML buffer output
465 * @doc: the document
466 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000467 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000468 *
469 * Dump an HTML node, recursive behaviour,children are printed too.
470 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000471static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000472htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
473 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000474 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000475
476 if (cur == NULL) {
477 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000478 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000479 return;
480 }
481 /*
482 * Special cases.
483 */
484 if (cur->type == XML_DTD_NODE)
485 return;
486 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000487 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000488 return;
489 }
490 if (cur->type == HTML_TEXT_NODE) {
491 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000492 if (((cur->name == (const xmlChar *)xmlStringText) ||
493 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000494 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000495 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
496 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000497 xmlChar *buffer;
498
Owen Taylor3473f882001-02-23 17:55:21 +0000499 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000500 if (buffer != NULL) {
501 xmlBufferWriteCHAR(buf, buffer);
502 xmlFree(buffer);
503 }
504 } else {
505 xmlBufferWriteCHAR(buf, cur->content);
506 }
507 }
508 return;
509 }
510 if (cur->type == HTML_COMMENT_NODE) {
511 if (cur->content != NULL) {
512 xmlBufferWriteChar(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000513 xmlBufferWriteCHAR(buf, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000514 xmlBufferWriteChar(buf, "-->");
515 }
516 return;
517 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000518 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000519 if (cur->name == NULL)
520 return;
521 xmlBufferWriteChar(buf, "<?");
522 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000523 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000524 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000525 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000526 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000527 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000528 return;
529 }
Owen Taylor3473f882001-02-23 17:55:21 +0000530 if (cur->type == HTML_ENTITY_REF_NODE) {
531 xmlBufferWriteChar(buf, "&");
532 xmlBufferWriteCHAR(buf, cur->name);
533 xmlBufferWriteChar(buf, ";");
534 return;
535 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000536 if (cur->type == HTML_PRESERVE_NODE) {
537 if (cur->content != NULL) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000538 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard083c2662001-05-08 08:27:14 +0000539 }
540 return;
541 }
Owen Taylor3473f882001-02-23 17:55:21 +0000542
543 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000544 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000545 */
546 info = htmlTagLookup(cur->name);
547
548 xmlBufferWriteChar(buf, "<");
549 xmlBufferWriteCHAR(buf, cur->name);
550 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000551 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000552
553 if ((info != NULL) && (info->empty)) {
554 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000555 if ((format) && (info != NULL) && (!info->isinline) &&
556 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000557 if ((cur->next->type != HTML_TEXT_NODE) &&
558 (cur->next->type != HTML_ENTITY_REF_NODE))
559 xmlBufferWriteChar(buf, "\n");
560 }
561 return;
562 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000563 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
564 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000565 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000566 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
567 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000568 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000569 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000570 xmlBufferWriteChar(buf, "></");
571 xmlBufferWriteCHAR(buf, cur->name);
572 xmlBufferWriteChar(buf, ">");
573 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000574 if ((format) && (info != NULL) && (!info->isinline) &&
575 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000576 if ((cur->next->type != HTML_TEXT_NODE) &&
577 (cur->next->type != HTML_ENTITY_REF_NODE))
578 xmlBufferWriteChar(buf, "\n");
579 }
580 return;
581 }
582 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000583 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000584 xmlChar *buffer;
585
Daniel Veillard083c2662001-05-08 08:27:14 +0000586 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000587 if (buffer != NULL) {
588 xmlBufferWriteCHAR(buf, buffer);
589 xmlFree(buffer);
590 }
591 }
592 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000593 if ((format) && (info != NULL) && (!info->isinline) &&
594 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000595 (cur->children->type != HTML_ENTITY_REF_NODE) &&
596 (cur->children != cur->last))
597 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000598 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000599 if ((format) && (info != NULL) && (!info->isinline) &&
600 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000601 (cur->last->type != HTML_ENTITY_REF_NODE) &&
602 (cur->children != cur->last))
603 xmlBufferWriteChar(buf, "\n");
604 }
Owen Taylor3473f882001-02-23 17:55:21 +0000605 xmlBufferWriteChar(buf, "</");
606 xmlBufferWriteCHAR(buf, cur->name);
607 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000608 if ((format) && (info != NULL) && (!info->isinline) &&
609 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000610 if ((cur->next->type != HTML_TEXT_NODE) &&
611 (cur->next->type != HTML_ENTITY_REF_NODE))
612 xmlBufferWriteChar(buf, "\n");
613 }
614}
615
616/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000617 * htmlNodeDump:
618 * @buf: the HTML buffer output
619 * @doc: the document
620 * @cur: the current node
621 *
622 * Dump an HTML node, recursive behaviour,children are printed too,
623 * and formatting returns are added.
624 */
625void
626htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
627 htmlNodeDumpFormat(buf, doc, cur, 1);
628}
629
630/**
631 * htmlNodeDumpFileFormat:
632 * @out: the FILE pointer
633 * @doc: the document
634 * @cur: the current node
635 * @encoding: the document encoding
636 * @format: should formatting spaces been added
637 *
638 * Dump an HTML node, recursive behaviour,children are printed too.
639 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000640 * TODO: if encoding == NULL try to save in the doc encoding
641 *
642 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000643 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000644int
645htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
646 xmlNodePtr cur, const char *encoding, int format) {
647 xmlOutputBufferPtr buf;
648 xmlCharEncodingHandlerPtr handler = NULL;
649 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000650
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000651 if (encoding != NULL) {
652 xmlCharEncoding enc;
653
654 enc = xmlParseCharEncoding(encoding);
655 if (enc != XML_CHAR_ENCODING_UTF8) {
656 handler = xmlFindCharEncodingHandler(encoding);
657 if (handler == NULL)
658 return(-1);
659 }
660 }
661
662 /*
663 * Fallback to HTML or ASCII when the encoding is unspecified
664 */
665 if (handler == NULL)
666 handler = xmlFindCharEncodingHandler("HTML");
667 if (handler == NULL)
668 handler = xmlFindCharEncodingHandler("ascii");
669
670 /*
671 * save the content to a temp buffer.
672 */
673 buf = xmlOutputBufferCreateFile(out, handler);
674 if (buf == NULL) return(0);
675
676 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
677
678 ret = xmlOutputBufferClose(buf);
679 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000680}
681
682/**
Owen Taylor3473f882001-02-23 17:55:21 +0000683 * htmlNodeDumpFile:
684 * @out: the FILE pointer
685 * @doc: the document
686 * @cur: the current node
687 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000688 * Dump an HTML node, recursive behaviour,children are printed too,
689 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000690 */
691void
692htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000693 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000694}
695
696/**
697 * htmlDocContentDump:
698 * @buf: the HTML buffer output
699 * @cur: the document
700 *
701 * Dump an HTML document.
702 */
703static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000704htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000705 int type;
706
707 /*
708 * force to output the stuff as HTML, especially for entities
709 */
710 type = cur->type;
711 cur->type = XML_HTML_DOCUMENT_NODE;
712 if (cur->intSubset != NULL)
713 htmlDtdDump(buf, cur);
714 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000716 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
717
718 }
719 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000720 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000721 }
722 xmlBufferWriteChar(buf, "\n");
723 cur->type = (xmlElementType) type;
724}
725
726/**
727 * htmlDocDumpMemory:
728 * @cur: the document
729 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000730 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000731 *
732 * Dump an HTML document in memory and return the xmlChar * and it's size.
733 * It's up to the caller to free the memory.
734 */
735void
736htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000737 xmlOutputBufferPtr buf;
738 xmlCharEncodingHandlerPtr handler = NULL;
739 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000740
741 if (cur == NULL) {
742#ifdef DEBUG_TREE
743 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000744 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000745#endif
746 *mem = NULL;
747 *size = 0;
748 return;
749 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000750
751 encoding = (const char *) htmlGetMetaEncoding(cur);
752
753 if (encoding != NULL) {
754 xmlCharEncoding enc;
755
756 enc = xmlParseCharEncoding(encoding);
757 if (enc != cur->charset) {
758 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
759 /*
760 * Not supported yet
761 */
762 *mem = NULL;
763 *size = 0;
764 return;
765 }
766
767 handler = xmlFindCharEncodingHandler(encoding);
768 if (handler == NULL) {
769 *mem = NULL;
770 *size = 0;
771 return;
772 }
773 }
774 }
775
776 /*
777 * Fallback to HTML or ASCII when the encoding is unspecified
778 */
779 if (handler == NULL)
780 handler = xmlFindCharEncodingHandler("HTML");
781 if (handler == NULL)
782 handler = xmlFindCharEncodingHandler("ascii");
783
784 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000785 if (buf == NULL) {
786 *mem = NULL;
787 *size = 0;
788 return;
789 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000790
791 htmlDocContentDumpOutput(buf, cur, NULL);
792 xmlOutputBufferFlush(buf);
793 if (buf->conv != NULL) {
794 *size = buf->conv->use;
795 *mem = xmlStrndup(buf->conv->content, *size);
796 } else {
797 *size = buf->buffer->use;
798 *mem = xmlStrndup(buf->buffer->content, *size);
799 }
800 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000801}
802
803
804/************************************************************************
805 * *
806 * Dumping HTML tree content to an I/O output buffer *
807 * *
808 ************************************************************************/
809
Daniel Veillardc084e472002-08-12 13:27:28 +0000810
Owen Taylor3473f882001-02-23 17:55:21 +0000811/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000812 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000813 * @buf: the HTML buffer output
814 * @doc: the document
815 * @encoding: the encoding string
816 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000817 * TODO: check whether encoding is needed
818 *
Owen Taylor3473f882001-02-23 17:55:21 +0000819 * Dump the HTML document DTD, if any.
820 */
821static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000822htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000823 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000824 xmlDtdPtr cur = doc->intSubset;
825
826 if (cur == NULL) {
827 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000828 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000829 return;
830 }
831 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
832 xmlOutputBufferWriteString(buf, (const char *)cur->name);
833 if (cur->ExternalID != NULL) {
834 xmlOutputBufferWriteString(buf, " PUBLIC ");
835 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
836 if (cur->SystemID != NULL) {
837 xmlOutputBufferWriteString(buf, " ");
838 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
839 }
840 } else if (cur->SystemID != NULL) {
841 xmlOutputBufferWriteString(buf, " SYSTEM ");
842 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
843 }
844 xmlOutputBufferWriteString(buf, ">\n");
845}
846
847/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000848 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000849 * @buf: the HTML buffer output
850 * @doc: the document
851 * @cur: the attribute pointer
852 * @encoding: the encoding string
853 *
854 * Dump an HTML attribute
855 */
856static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000857htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000858 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000859 xmlChar *value;
860
Daniel Veillardeca60d02001-06-13 07:45:41 +0000861 /*
862 * TODO: The html output method should not escape a & character
863 * occurring in an attribute value immediately followed by
864 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
865 */
866
Owen Taylor3473f882001-02-23 17:55:21 +0000867 if (cur == NULL) {
868 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000869 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000870 return;
871 }
872 xmlOutputBufferWriteString(buf, " ");
873 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillardc084e472002-08-12 13:27:28 +0000874 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000875 value = xmlNodeListGetString(doc, cur->children, 0);
876 if (value) {
877 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardad11b302002-08-12 14:53:41 +0000878 if ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
879 (!xmlStrcasecmp(cur->name, BAD_CAST "src"))) {
Daniel Veillardeb475a32002-04-14 22:00:22 +0000880 xmlChar *escaped;
881 xmlChar *tmp = value;
882
883 while (IS_BLANK(*tmp)) tmp++;
884
Daniel Veillard6231e842002-04-18 11:54:04 +0000885 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000886 if (escaped != NULL) {
887 xmlBufferWriteQuotedString(buf->buffer, escaped);
888 xmlFree(escaped);
889 } else {
890 xmlBufferWriteQuotedString(buf->buffer, value);
891 }
892 } else {
893 xmlBufferWriteQuotedString(buf->buffer, value);
894 }
Owen Taylor3473f882001-02-23 17:55:21 +0000895 xmlFree(value);
896 } else {
897 xmlOutputBufferWriteString(buf, "=\"\"");
898 }
899 }
900}
901
902/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000903 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000904 * @buf: the HTML buffer output
905 * @doc: the document
906 * @cur: the first attribute pointer
907 * @encoding: the encoding string
908 *
909 * Dump a list of HTML attributes
910 */
911static void
912htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
913 if (cur == NULL) {
914 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000915 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000916 return;
917 }
918 while (cur != NULL) {
919 htmlAttrDumpOutput(buf, doc, cur, encoding);
920 cur = cur->next;
921 }
922}
923
924
925void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
926 xmlNodePtr cur, const char *encoding);
927
928/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000929 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000930 * @buf: the HTML buffer output
931 * @doc: the document
932 * @cur: the first node
933 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000934 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000935 *
936 * Dump an HTML node list, recursive behaviour,children are printed too.
937 */
938static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000939htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
940 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000941 if (cur == NULL) {
942 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000944 return;
945 }
946 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000947 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000948 cur = cur->next;
949 }
950}
951
952/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000953 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000954 * @buf: the HTML buffer output
955 * @doc: the document
956 * @cur: the current node
957 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000958 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000959 *
960 * Dump an HTML node, recursive behaviour,children are printed too.
961 */
962void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000963htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
964 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000965 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000966
967 if (cur == NULL) {
968 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000969 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000970 return;
971 }
972 /*
973 * Special cases.
974 */
975 if (cur->type == XML_DTD_NODE)
976 return;
977 if (cur->type == XML_HTML_DOCUMENT_NODE) {
978 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
979 return;
980 }
981 if (cur->type == HTML_TEXT_NODE) {
982 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000983 if (((cur->name == (const xmlChar *)xmlStringText) ||
984 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000985 ((cur->parent == NULL) ||
Daniel Veillard44892f72002-10-16 15:23:26 +0000986 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
987 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000988 xmlChar *buffer;
989
Owen Taylor3473f882001-02-23 17:55:21 +0000990 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000991 if (buffer != NULL) {
992 xmlOutputBufferWriteString(buf, (const char *)buffer);
993 xmlFree(buffer);
994 }
995 } else {
996 xmlOutputBufferWriteString(buf, (const char *)cur->content);
997 }
998 }
999 return;
1000 }
1001 if (cur->type == HTML_COMMENT_NODE) {
1002 if (cur->content != NULL) {
1003 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +00001004 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001005 xmlOutputBufferWriteString(buf, "-->");
1006 }
1007 return;
1008 }
Daniel Veillard7533cc82001-04-24 15:52:00 +00001009 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +00001010 if (cur->name == NULL)
1011 return;
1012 xmlOutputBufferWriteString(buf, "<?");
1013 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +00001014 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +00001015 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001016 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +00001017 }
Daniel Veillard5146f202001-04-25 10:29:44 +00001018 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001019 return;
1020 }
Owen Taylor3473f882001-02-23 17:55:21 +00001021 if (cur->type == HTML_ENTITY_REF_NODE) {
1022 xmlOutputBufferWriteString(buf, "&");
1023 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1024 xmlOutputBufferWriteString(buf, ";");
1025 return;
1026 }
1027 if (cur->type == HTML_PRESERVE_NODE) {
1028 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001029 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001030 }
1031 return;
1032 }
1033
1034 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001035 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +00001036 */
1037 info = htmlTagLookup(cur->name);
1038
1039 xmlOutputBufferWriteString(buf, "<");
1040 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1041 if (cur->properties != NULL)
1042 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1043
1044 if ((info != NULL) && (info->empty)) {
1045 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001046 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001047 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001048 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1049 (cur->parent != NULL) &&
1050 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001051 xmlOutputBufferWriteString(buf, "\n");
1052 }
1053 return;
1054 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001055 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1056 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001057 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001058 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1059 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001060 xmlOutputBufferWriteString(buf, ">");
1061 } else {
1062 xmlOutputBufferWriteString(buf, "></");
1063 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1064 xmlOutputBufferWriteString(buf, ">");
1065 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001066 if ((format) && (cur->next != NULL) &&
1067 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001068 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001069 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1070 (cur->parent != NULL) &&
1071 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001072 xmlOutputBufferWriteString(buf, "\n");
1073 }
1074 return;
1075 }
1076 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001077 if ((cur->type != XML_ELEMENT_NODE) &&
1078 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001079 /*
1080 * Uses the OutputBuffer property to automatically convert
1081 * invalids to charrefs
1082 */
1083
Owen Taylor3473f882001-02-23 17:55:21 +00001084 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001085 }
1086 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001087 if ((format) && (info != NULL) && (!info->isinline) &&
1088 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001089 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001090 (cur->children != cur->last) &&
1091 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001092 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001093 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001094 if ((format) && (info != NULL) && (!info->isinline) &&
1095 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001096 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001097 (cur->children != cur->last) &&
1098 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001099 xmlOutputBufferWriteString(buf, "\n");
1100 }
Owen Taylor3473f882001-02-23 17:55:21 +00001101 xmlOutputBufferWriteString(buf, "</");
1102 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1103 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001104 if ((format) && (info != NULL) && (!info->isinline) &&
1105 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001106 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001107 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1108 (cur->parent != NULL) &&
1109 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001110 xmlOutputBufferWriteString(buf, "\n");
1111 }
1112}
1113
1114/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001115 * htmlNodeDumpOutput:
1116 * @buf: the HTML buffer output
1117 * @doc: the document
1118 * @cur: the current node
1119 * @encoding: the encoding string
1120 *
1121 * Dump an HTML node, recursive behaviour,children are printed too,
1122 * and formatting returns/spaces are added.
1123 */
1124void
1125htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1126 xmlNodePtr cur, const char *encoding) {
1127 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1128}
1129
1130/**
1131 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001132 * @buf: the HTML buffer output
1133 * @cur: the document
1134 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001135 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001136 *
1137 * Dump an HTML document.
1138 */
1139void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001140htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1141 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001142 int type;
1143
1144 /*
1145 * force to output the stuff as HTML, especially for entities
1146 */
1147 type = cur->type;
1148 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001149 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001150 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001151 }
1152 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001153 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001154 }
1155 xmlOutputBufferWriteString(buf, "\n");
1156 cur->type = (xmlElementType) type;
1157}
1158
Daniel Veillard95d845f2001-06-13 13:48:46 +00001159/**
1160 * htmlDocContentDumpOutput:
1161 * @buf: the HTML buffer output
1162 * @cur: the document
1163 * @encoding: the encoding string
1164 *
1165 * Dump an HTML document. Formating return/spaces are added.
1166 */
1167void
1168htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1169 const char *encoding) {
1170 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1171}
1172
Owen Taylor3473f882001-02-23 17:55:21 +00001173/************************************************************************
1174 * *
1175 * Saving functions front-ends *
1176 * *
1177 ************************************************************************/
1178
1179/**
1180 * htmlDocDump:
1181 * @f: the FILE*
1182 * @cur: the document
1183 *
1184 * Dump an HTML document to an open FILE.
1185 *
1186 * returns: the number of byte written or -1 in case of failure.
1187 */
1188int
1189htmlDocDump(FILE *f, xmlDocPtr cur) {
1190 xmlOutputBufferPtr buf;
1191 xmlCharEncodingHandlerPtr handler = NULL;
1192 const char *encoding;
1193 int ret;
1194
1195 if (cur == NULL) {
1196#ifdef DEBUG_TREE
1197 xmlGenericError(xmlGenericErrorContext,
1198 "htmlDocDump : document == NULL\n");
1199#endif
1200 return(-1);
1201 }
1202
1203 encoding = (const char *) htmlGetMetaEncoding(cur);
1204
1205 if (encoding != NULL) {
1206 xmlCharEncoding enc;
1207
1208 enc = xmlParseCharEncoding(encoding);
1209 if (enc != cur->charset) {
1210 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1211 /*
1212 * Not supported yet
1213 */
1214 return(-1);
1215 }
1216
1217 handler = xmlFindCharEncodingHandler(encoding);
1218 if (handler == NULL)
1219 return(-1);
1220 }
1221 }
1222
1223 /*
1224 * Fallback to HTML or ASCII when the encoding is unspecified
1225 */
1226 if (handler == NULL)
1227 handler = xmlFindCharEncodingHandler("HTML");
1228 if (handler == NULL)
1229 handler = xmlFindCharEncodingHandler("ascii");
1230
1231 buf = xmlOutputBufferCreateFile(f, handler);
1232 if (buf == NULL) return(-1);
1233 htmlDocContentDumpOutput(buf, cur, NULL);
1234
1235 ret = xmlOutputBufferClose(buf);
1236 return(ret);
1237}
1238
1239/**
1240 * htmlSaveFile:
1241 * @filename: the filename (or URL)
1242 * @cur: the document
1243 *
1244 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1245 * used.
1246 * returns: the number of byte written or -1 in case of failure.
1247 */
1248int
1249htmlSaveFile(const char *filename, xmlDocPtr cur) {
1250 xmlOutputBufferPtr buf;
1251 xmlCharEncodingHandlerPtr handler = NULL;
1252 const char *encoding;
1253 int ret;
1254
1255 encoding = (const char *) htmlGetMetaEncoding(cur);
1256
1257 if (encoding != NULL) {
1258 xmlCharEncoding enc;
1259
1260 enc = xmlParseCharEncoding(encoding);
1261 if (enc != cur->charset) {
1262 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1263 /*
1264 * Not supported yet
1265 */
1266 return(-1);
1267 }
1268
1269 handler = xmlFindCharEncodingHandler(encoding);
1270 if (handler == NULL)
1271 return(-1);
1272 }
1273 }
1274
1275 /*
1276 * Fallback to HTML or ASCII when the encoding is unspecified
1277 */
1278 if (handler == NULL)
1279 handler = xmlFindCharEncodingHandler("HTML");
1280 if (handler == NULL)
1281 handler = xmlFindCharEncodingHandler("ascii");
1282
1283 /*
1284 * save the content to a temp buffer.
1285 */
1286 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1287 if (buf == NULL) return(0);
1288
1289 htmlDocContentDumpOutput(buf, cur, NULL);
1290
1291 ret = xmlOutputBufferClose(buf);
1292 return(ret);
1293}
1294
1295/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001296 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001297 * @filename: the filename
1298 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001299 * @format: should formatting spaces been added
1300 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001301 *
1302 * Dump an HTML document to a file using a given encoding.
1303 *
1304 * returns: the number of byte written or -1 in case of failure.
1305 */
1306int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001307htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1308 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001309 xmlOutputBufferPtr buf;
1310 xmlCharEncodingHandlerPtr handler = NULL;
1311 int ret;
1312
1313 if (encoding != NULL) {
1314 xmlCharEncoding enc;
1315
1316 enc = xmlParseCharEncoding(encoding);
1317 if (enc != cur->charset) {
1318 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1319 /*
1320 * Not supported yet
1321 */
1322 return(-1);
1323 }
1324
1325 handler = xmlFindCharEncodingHandler(encoding);
1326 if (handler == NULL)
1327 return(-1);
1328 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1329 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001330 } else {
1331 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001332 }
1333
1334 /*
1335 * Fallback to HTML or ASCII when the encoding is unspecified
1336 */
1337 if (handler == NULL)
1338 handler = xmlFindCharEncodingHandler("HTML");
1339 if (handler == NULL)
1340 handler = xmlFindCharEncodingHandler("ascii");
1341
1342 /*
1343 * save the content to a temp buffer.
1344 */
1345 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1346 if (buf == NULL) return(0);
1347
Daniel Veillard95d845f2001-06-13 13:48:46 +00001348 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001349
1350 ret = xmlOutputBufferClose(buf);
1351 return(ret);
1352}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001353
1354/**
1355 * htmlSaveFileEnc:
1356 * @filename: the filename
1357 * @cur: the document
1358 * @encoding: the document encoding
1359 *
1360 * Dump an HTML document to a file using a given encoding
1361 * and formatting returns/spaces are added.
1362 *
1363 * returns: the number of byte written or -1 in case of failure.
1364 */
1365int
1366htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1367 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1368}
1369
Daniel Veillardc084e472002-08-12 13:27:28 +00001370
1371
Owen Taylor3473f882001-02-23 17:55:21 +00001372#endif /* LIBXML_HTML_ENABLED */