blob: b0946292218d87f17a49f93100a06516ffdd250d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#ifdef HAVE_CTYPE_H
15#include <ctype.h>
16#endif
17#ifdef HAVE_STDLIB_H
18#include <stdlib.h>
19#endif
20
21#include <libxml/xmlmemory.h>
22#include <libxml/HTMLparser.h>
23#include <libxml/HTMLtree.h>
24#include <libxml/entities.h>
25#include <libxml/valid.h>
26#include <libxml/xmlerror.h>
27#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000028#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000029#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000030
31/************************************************************************
32 * *
33 * Getting/Setting encoding meta tags *
34 * *
35 ************************************************************************/
36
37/**
38 * htmlGetMetaEncoding:
39 * @doc: the document
40 *
41 * Encoding definition lookup in the Meta tags
42 *
43 * Returns the current encoding as flagged in the HTML source
44 */
45const xmlChar *
46htmlGetMetaEncoding(htmlDocPtr doc) {
47 htmlNodePtr cur;
48 const xmlChar *content;
49 const xmlChar *encoding;
50
51 if (doc == NULL)
52 return(NULL);
53 cur = doc->children;
54
55 /*
56 * Search the html
57 */
58 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000059 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000060 if (xmlStrEqual(cur->name, BAD_CAST"html"))
61 break;
62 if (xmlStrEqual(cur->name, BAD_CAST"head"))
63 goto found_head;
64 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
65 goto found_meta;
66 }
67 cur = cur->next;
68 }
69 if (cur == NULL)
70 return(NULL);
71 cur = cur->children;
72
73 /*
74 * Search the head
75 */
76 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000077 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000078 if (xmlStrEqual(cur->name, BAD_CAST"head"))
79 break;
80 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
81 goto found_meta;
82 }
83 cur = cur->next;
84 }
85 if (cur == NULL)
86 return(NULL);
87found_head:
88 cur = cur->children;
89
90 /*
91 * Search the meta elements
92 */
93found_meta:
94 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000095 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000096 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
97 xmlAttrPtr attr = cur->properties;
98 int http;
99 const xmlChar *value;
100
101 content = NULL;
102 http = 0;
103 while (attr != NULL) {
104 if ((attr->children != NULL) &&
105 (attr->children->type == XML_TEXT_NODE) &&
106 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000107 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000108 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
109 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
110 http = 1;
111 else if ((value != NULL)
112 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
113 content = value;
114 if ((http != 0) && (content != NULL))
115 goto found_content;
116 }
117 attr = attr->next;
118 }
119 }
120 }
121 cur = cur->next;
122 }
123 return(NULL);
124
125found_content:
126 encoding = xmlStrstr(content, BAD_CAST"charset=");
127 if (encoding == NULL)
128 encoding = xmlStrstr(content, BAD_CAST"Charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
131 if (encoding != NULL) {
132 encoding += 8;
133 } else {
134 encoding = xmlStrstr(content, BAD_CAST"charset =");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"Charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
139 if (encoding != NULL)
140 encoding += 9;
141 }
142 if (encoding != NULL) {
143 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
144 }
145 return(encoding);
146}
147
148/**
149 * htmlSetMetaEncoding:
150 * @doc: the document
151 * @encoding: the encoding string
152 *
153 * Sets the current encoding in the Meta tags
154 * NOTE: this will not change the document content encoding, just
155 * the META flag associated.
156 *
157 * Returns 0 in case of success and -1 in case of error
158 */
159int
160htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
161 htmlNodePtr cur, meta;
162 const xmlChar *content;
163 char newcontent[100];
164
165
166 if (doc == NULL)
167 return(-1);
168
169 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000170 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
171 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000172 newcontent[sizeof(newcontent) - 1] = 0;
173 }
174
175 cur = doc->children;
176
177 /*
178 * Search the html
179 */
180 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000181 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000182 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
183 break;
184 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
185 goto found_head;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
187 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000188 }
189 cur = cur->next;
190 }
191 if (cur == NULL)
192 return(-1);
193 cur = cur->children;
194
195 /*
196 * Search the head
197 */
198 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000199 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000200 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
201 break;
202 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
203 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000204 }
205 cur = cur->next;
206 }
207 if (cur == NULL)
208 return(-1);
209found_head:
210 if (cur->children == NULL) {
211 if (encoding == NULL)
212 return(0);
213 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
214 xmlAddChild(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000215 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000216 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 return(0);
218 }
219 cur = cur->children;
220
221found_meta:
222 if (encoding != NULL) {
223 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000224 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
226
227 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
228 xmlAddPrevSibling(cur, meta);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000229 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Daniel Veillard3a42f3f2002-07-17 17:57:34 +0000230 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 }
232
233 /*
234 * Search and destroy all the remaining the meta elements carrying
235 * encoding informations
236 */
237 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000238 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000239 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlAttrPtr attr = cur->properties;
241 int http;
242 const xmlChar *value;
243
244 content = NULL;
245 http = 0;
246 while (attr != NULL) {
247 if ((attr->children != NULL) &&
248 (attr->children->type == XML_TEXT_NODE) &&
249 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000250 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000251 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
252 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
253 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000254 else
255 {
256 if ((value != NULL) &&
257 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
258 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000259 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000260 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000261 break;
262 }
263 attr = attr->next;
264 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000265 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000266 meta = cur;
267 cur = cur->next;
268 xmlUnlinkNode(meta);
269 xmlFreeNode(meta);
270 continue;
271 }
272
273 }
274 }
275 cur = cur->next;
276 }
277 return(0);
278}
279
280/************************************************************************
281 * *
282 * Dumping HTML tree content to a simple buffer *
283 * *
284 ************************************************************************/
285
286static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000287htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000288static void
289htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
290 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000291
292/**
293 * htmlDtdDump:
294 * @buf: the HTML buffer output
295 * @doc: the document
296 *
297 * Dump the HTML document DTD, if any.
298 */
299static void
300htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
301 xmlDtdPtr cur = doc->intSubset;
302
303 if (cur == NULL) {
304 xmlGenericError(xmlGenericErrorContext,
305 "htmlDtdDump : no internal subset\n");
306 return;
307 }
308 xmlBufferWriteChar(buf, "<!DOCTYPE ");
309 xmlBufferWriteCHAR(buf, cur->name);
310 if (cur->ExternalID != NULL) {
311 xmlBufferWriteChar(buf, " PUBLIC ");
312 xmlBufferWriteQuotedString(buf, cur->ExternalID);
313 if (cur->SystemID != NULL) {
314 xmlBufferWriteChar(buf, " ");
315 xmlBufferWriteQuotedString(buf, cur->SystemID);
316 }
317 } else if (cur->SystemID != NULL) {
318 xmlBufferWriteChar(buf, " SYSTEM ");
319 xmlBufferWriteQuotedString(buf, cur->SystemID);
320 }
321 xmlBufferWriteChar(buf, ">\n");
322}
323
324/**
325 * htmlAttrDump:
326 * @buf: the HTML buffer output
327 * @doc: the document
328 * @cur: the attribute pointer
329 *
330 * Dump an HTML attribute
331 */
332static void
333htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
334 xmlChar *value;
335
Daniel Veillardeca60d02001-06-13 07:45:41 +0000336 /*
337 * TODO: The html output method should not escape a & character
338 * occurring in an attribute value immediately followed by
339 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
340 */
341
Owen Taylor3473f882001-02-23 17:55:21 +0000342 if (cur == NULL) {
343 xmlGenericError(xmlGenericErrorContext,
344 "htmlAttrDump : property == NULL\n");
345 return;
346 }
347 xmlBufferWriteChar(buf, " ");
348 xmlBufferWriteCHAR(buf, cur->name);
349 if (cur->children != NULL) {
350 value = xmlNodeListGetString(doc, cur->children, 0);
351 if (value) {
352 xmlBufferWriteChar(buf, "=");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000353 if ((xmlStrEqual(cur->name, BAD_CAST "href")) ||
354 (xmlStrEqual(cur->name, BAD_CAST "src"))) {
355 xmlChar *escaped;
356 xmlChar *tmp = value;
357
358 while (IS_BLANK(*tmp)) tmp++;
359
Daniel Veillard6231e842002-04-18 11:54:04 +0000360 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000361 if (escaped != NULL) {
362 xmlBufferWriteQuotedString(buf, escaped);
363 xmlFree(escaped);
364 } else {
365 xmlBufferWriteQuotedString(buf, value);
366 }
367 } else {
368 xmlBufferWriteQuotedString(buf, value);
369 }
Owen Taylor3473f882001-02-23 17:55:21 +0000370 xmlFree(value);
371 } else {
372 xmlBufferWriteChar(buf, "=\"\"");
373 }
374 }
375}
376
377/**
378 * htmlAttrListDump:
379 * @buf: the HTML buffer output
380 * @doc: the document
381 * @cur: the first attribute pointer
382 *
383 * Dump a list of HTML attributes
384 */
385static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000386htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
387 int i = 0;
388
Owen Taylor3473f882001-02-23 17:55:21 +0000389 if (cur == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "htmlAttrListDump : property == NULL\n");
392 return;
393 }
394 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000395 i++;
396 if ((format) && (i >= 5)) {
397 i = 0;
398 xmlBufferWriteChar(buf, "\n");
399 }
Owen Taylor3473f882001-02-23 17:55:21 +0000400 htmlAttrDump(buf, doc, cur);
401 cur = cur->next;
402 }
403}
404
Owen Taylor3473f882001-02-23 17:55:21 +0000405/**
406 * htmlNodeListDump:
407 * @buf: the HTML buffer output
408 * @doc: the document
409 * @cur: the first node
410 *
411 * Dump an HTML node list, recursive behaviour,children are printed too.
412 */
413static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000414htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000415 if (cur == NULL) {
416 xmlGenericError(xmlGenericErrorContext,
417 "htmlNodeListDump : node == NULL\n");
418 return;
419 }
420 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000421 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000422 cur = cur->next;
423 }
424}
425
426/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000427 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000428 * @buf: the HTML buffer output
429 * @doc: the document
430 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000431 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000432 *
433 * Dump an HTML node, recursive behaviour,children are printed too.
434 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000435static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000436htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
437 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000438 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000439
440 if (cur == NULL) {
441 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000442 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000443 return;
444 }
445 /*
446 * Special cases.
447 */
448 if (cur->type == XML_DTD_NODE)
449 return;
450 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000451 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000452 return;
453 }
454 if (cur->type == HTML_TEXT_NODE) {
455 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000456 if (((cur->name == (const xmlChar *)xmlStringText) ||
457 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000458 ((cur->parent == NULL) ||
Daniel Veillard0b22def2002-07-29 16:23:03 +0000459 ((!xmlStrEqual(cur->parent->name, BAD_CAST "script")) &&
460 (!xmlStrEqual(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000461 xmlChar *buffer;
462
Owen Taylor3473f882001-02-23 17:55:21 +0000463 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000464 if (buffer != NULL) {
465 xmlBufferWriteCHAR(buf, buffer);
466 xmlFree(buffer);
467 }
468 } else {
469 xmlBufferWriteCHAR(buf, cur->content);
470 }
471 }
472 return;
473 }
474 if (cur->type == HTML_COMMENT_NODE) {
475 if (cur->content != NULL) {
476 xmlBufferWriteChar(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000477 xmlBufferWriteCHAR(buf, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000478 xmlBufferWriteChar(buf, "-->");
479 }
480 return;
481 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000482 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000483 if (cur->name == NULL)
484 return;
485 xmlBufferWriteChar(buf, "<?");
486 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000487 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000488 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000489 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000490 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000491 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000492 return;
493 }
Owen Taylor3473f882001-02-23 17:55:21 +0000494 if (cur->type == HTML_ENTITY_REF_NODE) {
495 xmlBufferWriteChar(buf, "&");
496 xmlBufferWriteCHAR(buf, cur->name);
497 xmlBufferWriteChar(buf, ";");
498 return;
499 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000500 if (cur->type == HTML_PRESERVE_NODE) {
501 if (cur->content != NULL) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000502 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard083c2662001-05-08 08:27:14 +0000503 }
504 return;
505 }
Owen Taylor3473f882001-02-23 17:55:21 +0000506
507 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000508 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000509 */
510 info = htmlTagLookup(cur->name);
511
512 xmlBufferWriteChar(buf, "<");
513 xmlBufferWriteCHAR(buf, cur->name);
514 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000515 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000516
517 if ((info != NULL) && (info->empty)) {
518 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000519 if ((format) && (info != NULL) && (!info->isinline) &&
520 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000521 if ((cur->next->type != HTML_TEXT_NODE) &&
522 (cur->next->type != HTML_ENTITY_REF_NODE))
523 xmlBufferWriteChar(buf, "\n");
524 }
525 return;
526 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000527 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
528 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000529 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000530 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
531 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000532 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000533 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000534 xmlBufferWriteChar(buf, "></");
535 xmlBufferWriteCHAR(buf, cur->name);
536 xmlBufferWriteChar(buf, ">");
537 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000538 if ((format) && (info != NULL) && (!info->isinline) &&
539 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000540 if ((cur->next->type != HTML_TEXT_NODE) &&
541 (cur->next->type != HTML_ENTITY_REF_NODE))
542 xmlBufferWriteChar(buf, "\n");
543 }
544 return;
545 }
546 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000547 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000548 xmlChar *buffer;
549
Daniel Veillard083c2662001-05-08 08:27:14 +0000550 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000551 if (buffer != NULL) {
552 xmlBufferWriteCHAR(buf, buffer);
553 xmlFree(buffer);
554 }
555 }
556 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000557 if ((format) && (info != NULL) && (!info->isinline) &&
558 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000559 (cur->children->type != HTML_ENTITY_REF_NODE) &&
560 (cur->children != cur->last))
561 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000562 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000563 if ((format) && (info != NULL) && (!info->isinline) &&
564 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000565 (cur->last->type != HTML_ENTITY_REF_NODE) &&
566 (cur->children != cur->last))
567 xmlBufferWriteChar(buf, "\n");
568 }
Owen Taylor3473f882001-02-23 17:55:21 +0000569 xmlBufferWriteChar(buf, "</");
570 xmlBufferWriteCHAR(buf, cur->name);
571 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000572 if ((format) && (info != NULL) && (!info->isinline) &&
573 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000574 if ((cur->next->type != HTML_TEXT_NODE) &&
575 (cur->next->type != HTML_ENTITY_REF_NODE))
576 xmlBufferWriteChar(buf, "\n");
577 }
578}
579
580/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000581 * htmlNodeDump:
582 * @buf: the HTML buffer output
583 * @doc: the document
584 * @cur: the current node
585 *
586 * Dump an HTML node, recursive behaviour,children are printed too,
587 * and formatting returns are added.
588 */
589void
590htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
591 htmlNodeDumpFormat(buf, doc, cur, 1);
592}
593
594/**
595 * htmlNodeDumpFileFormat:
596 * @out: the FILE pointer
597 * @doc: the document
598 * @cur: the current node
599 * @encoding: the document encoding
600 * @format: should formatting spaces been added
601 *
602 * Dump an HTML node, recursive behaviour,children are printed too.
603 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000604 * TODO: if encoding == NULL try to save in the doc encoding
605 *
606 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000607 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000608int
609htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
610 xmlNodePtr cur, const char *encoding, int format) {
611 xmlOutputBufferPtr buf;
612 xmlCharEncodingHandlerPtr handler = NULL;
613 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000614
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000615 if (encoding != NULL) {
616 xmlCharEncoding enc;
617
618 enc = xmlParseCharEncoding(encoding);
619 if (enc != XML_CHAR_ENCODING_UTF8) {
620 handler = xmlFindCharEncodingHandler(encoding);
621 if (handler == NULL)
622 return(-1);
623 }
624 }
625
626 /*
627 * Fallback to HTML or ASCII when the encoding is unspecified
628 */
629 if (handler == NULL)
630 handler = xmlFindCharEncodingHandler("HTML");
631 if (handler == NULL)
632 handler = xmlFindCharEncodingHandler("ascii");
633
634 /*
635 * save the content to a temp buffer.
636 */
637 buf = xmlOutputBufferCreateFile(out, handler);
638 if (buf == NULL) return(0);
639
640 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
641
642 ret = xmlOutputBufferClose(buf);
643 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000644}
645
646/**
Owen Taylor3473f882001-02-23 17:55:21 +0000647 * htmlNodeDumpFile:
648 * @out: the FILE pointer
649 * @doc: the document
650 * @cur: the current node
651 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000652 * Dump an HTML node, recursive behaviour,children are printed too,
653 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000654 */
655void
656htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000657 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000658}
659
660/**
661 * htmlDocContentDump:
662 * @buf: the HTML buffer output
663 * @cur: the document
664 *
665 * Dump an HTML document.
666 */
667static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000668htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000669 int type;
670
671 /*
672 * force to output the stuff as HTML, especially for entities
673 */
674 type = cur->type;
675 cur->type = XML_HTML_DOCUMENT_NODE;
676 if (cur->intSubset != NULL)
677 htmlDtdDump(buf, cur);
678 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000679 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000680 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
681
682 }
683 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000684 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000685 }
686 xmlBufferWriteChar(buf, "\n");
687 cur->type = (xmlElementType) type;
688}
689
690/**
691 * htmlDocDumpMemory:
692 * @cur: the document
693 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000694 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000695 *
696 * Dump an HTML document in memory and return the xmlChar * and it's size.
697 * It's up to the caller to free the memory.
698 */
699void
700htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000701 xmlOutputBufferPtr buf;
702 xmlCharEncodingHandlerPtr handler = NULL;
703 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000704
705 if (cur == NULL) {
706#ifdef DEBUG_TREE
707 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000708 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000709#endif
710 *mem = NULL;
711 *size = 0;
712 return;
713 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000714
715 encoding = (const char *) htmlGetMetaEncoding(cur);
716
717 if (encoding != NULL) {
718 xmlCharEncoding enc;
719
720 enc = xmlParseCharEncoding(encoding);
721 if (enc != cur->charset) {
722 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
723 /*
724 * Not supported yet
725 */
726 *mem = NULL;
727 *size = 0;
728 return;
729 }
730
731 handler = xmlFindCharEncodingHandler(encoding);
732 if (handler == NULL) {
733 *mem = NULL;
734 *size = 0;
735 return;
736 }
737 }
738 }
739
740 /*
741 * Fallback to HTML or ASCII when the encoding is unspecified
742 */
743 if (handler == NULL)
744 handler = xmlFindCharEncodingHandler("HTML");
745 if (handler == NULL)
746 handler = xmlFindCharEncodingHandler("ascii");
747
748 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000749 if (buf == NULL) {
750 *mem = NULL;
751 *size = 0;
752 return;
753 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000754
755 htmlDocContentDumpOutput(buf, cur, NULL);
756 xmlOutputBufferFlush(buf);
757 if (buf->conv != NULL) {
758 *size = buf->conv->use;
759 *mem = xmlStrndup(buf->conv->content, *size);
760 } else {
761 *size = buf->buffer->use;
762 *mem = xmlStrndup(buf->buffer->content, *size);
763 }
764 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000765}
766
767
768/************************************************************************
769 * *
770 * Dumping HTML tree content to an I/O output buffer *
771 * *
772 ************************************************************************/
773
774/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000775 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000776 * @buf: the HTML buffer output
777 * @doc: the document
778 * @encoding: the encoding string
779 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000780 * TODO: check whether encoding is needed
781 *
Owen Taylor3473f882001-02-23 17:55:21 +0000782 * Dump the HTML document DTD, if any.
783 */
784static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000785htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000786 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000787 xmlDtdPtr cur = doc->intSubset;
788
789 if (cur == NULL) {
790 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000791 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000792 return;
793 }
794 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
795 xmlOutputBufferWriteString(buf, (const char *)cur->name);
796 if (cur->ExternalID != NULL) {
797 xmlOutputBufferWriteString(buf, " PUBLIC ");
798 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
799 if (cur->SystemID != NULL) {
800 xmlOutputBufferWriteString(buf, " ");
801 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
802 }
803 } else if (cur->SystemID != NULL) {
804 xmlOutputBufferWriteString(buf, " SYSTEM ");
805 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
806 }
807 xmlOutputBufferWriteString(buf, ">\n");
808}
809
810/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000811 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000812 * @buf: the HTML buffer output
813 * @doc: the document
814 * @cur: the attribute pointer
815 * @encoding: the encoding string
816 *
817 * Dump an HTML attribute
818 */
819static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000820htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000821 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000822 xmlChar *value;
823
Daniel Veillardeca60d02001-06-13 07:45:41 +0000824 /*
825 * TODO: The html output method should not escape a & character
826 * occurring in an attribute value immediately followed by
827 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
828 */
829
Owen Taylor3473f882001-02-23 17:55:21 +0000830 if (cur == NULL) {
831 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000832 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000833 return;
834 }
835 xmlOutputBufferWriteString(buf, " ");
836 xmlOutputBufferWriteString(buf, (const char *)cur->name);
837 if (cur->children != NULL) {
838 value = xmlNodeListGetString(doc, cur->children, 0);
839 if (value) {
840 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000841 if ((xmlStrEqual(cur->name, BAD_CAST "href")) ||
842 (xmlStrEqual(cur->name, BAD_CAST "src"))) {
843 xmlChar *escaped;
844 xmlChar *tmp = value;
845
846 while (IS_BLANK(*tmp)) tmp++;
847
Daniel Veillard6231e842002-04-18 11:54:04 +0000848 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000849 if (escaped != NULL) {
850 xmlBufferWriteQuotedString(buf->buffer, escaped);
851 xmlFree(escaped);
852 } else {
853 xmlBufferWriteQuotedString(buf->buffer, value);
854 }
855 } else {
856 xmlBufferWriteQuotedString(buf->buffer, value);
857 }
Owen Taylor3473f882001-02-23 17:55:21 +0000858 xmlFree(value);
859 } else {
860 xmlOutputBufferWriteString(buf, "=\"\"");
861 }
862 }
863}
864
865/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000866 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000867 * @buf: the HTML buffer output
868 * @doc: the document
869 * @cur: the first attribute pointer
870 * @encoding: the encoding string
871 *
872 * Dump a list of HTML attributes
873 */
874static void
875htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
876 if (cur == NULL) {
877 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000878 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000879 return;
880 }
881 while (cur != NULL) {
882 htmlAttrDumpOutput(buf, doc, cur, encoding);
883 cur = cur->next;
884 }
885}
886
887
888void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
889 xmlNodePtr cur, const char *encoding);
890
891/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000892 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000893 * @buf: the HTML buffer output
894 * @doc: the document
895 * @cur: the first node
896 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000897 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000898 *
899 * Dump an HTML node list, recursive behaviour,children are printed too.
900 */
901static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000902htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
903 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000904 if (cur == NULL) {
905 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000906 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000907 return;
908 }
909 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000910 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000911 cur = cur->next;
912 }
913}
914
915/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000916 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000917 * @buf: the HTML buffer output
918 * @doc: the document
919 * @cur: the current node
920 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000921 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000922 *
923 * Dump an HTML node, recursive behaviour,children are printed too.
924 */
925void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000926htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
927 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000928 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000929
930 if (cur == NULL) {
931 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000932 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000933 return;
934 }
935 /*
936 * Special cases.
937 */
938 if (cur->type == XML_DTD_NODE)
939 return;
940 if (cur->type == XML_HTML_DOCUMENT_NODE) {
941 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
942 return;
943 }
944 if (cur->type == HTML_TEXT_NODE) {
945 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000946 if (((cur->name == (const xmlChar *)xmlStringText) ||
947 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000948 ((cur->parent == NULL) ||
Daniel Veillard0b22def2002-07-29 16:23:03 +0000949 ((!xmlStrEqual(cur->parent->name, BAD_CAST "script")) &&
950 (!xmlStrEqual(cur->parent->name, BAD_CAST "style"))))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000951 xmlChar *buffer;
952
Owen Taylor3473f882001-02-23 17:55:21 +0000953 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000954 if (buffer != NULL) {
955 xmlOutputBufferWriteString(buf, (const char *)buffer);
956 xmlFree(buffer);
957 }
958 } else {
959 xmlOutputBufferWriteString(buf, (const char *)cur->content);
960 }
961 }
962 return;
963 }
964 if (cur->type == HTML_COMMENT_NODE) {
965 if (cur->content != NULL) {
966 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000967 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000968 xmlOutputBufferWriteString(buf, "-->");
969 }
970 return;
971 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000972 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000973 if (cur->name == NULL)
974 return;
975 xmlOutputBufferWriteString(buf, "<?");
976 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000977 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000978 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000979 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000980 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000981 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000982 return;
983 }
Owen Taylor3473f882001-02-23 17:55:21 +0000984 if (cur->type == HTML_ENTITY_REF_NODE) {
985 xmlOutputBufferWriteString(buf, "&");
986 xmlOutputBufferWriteString(buf, (const char *)cur->name);
987 xmlOutputBufferWriteString(buf, ";");
988 return;
989 }
990 if (cur->type == HTML_PRESERVE_NODE) {
991 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000992 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000993 }
994 return;
995 }
996
997 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000998 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000999 */
1000 info = htmlTagLookup(cur->name);
1001
1002 xmlOutputBufferWriteString(buf, "<");
1003 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1004 if (cur->properties != NULL)
1005 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1006
1007 if ((info != NULL) && (info->empty)) {
1008 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001009 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001010 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001011 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1012 (cur->parent != NULL) &&
1013 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001014 xmlOutputBufferWriteString(buf, "\n");
1015 }
1016 return;
1017 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001018 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1019 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001020 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001021 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1022 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001023 xmlOutputBufferWriteString(buf, ">");
1024 } else {
1025 xmlOutputBufferWriteString(buf, "></");
1026 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1027 xmlOutputBufferWriteString(buf, ">");
1028 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001029 if ((format) && (cur->next != NULL) &&
1030 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001031 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001032 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1033 (cur->parent != NULL) &&
1034 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001035 xmlOutputBufferWriteString(buf, "\n");
1036 }
1037 return;
1038 }
1039 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001040 if ((cur->type != XML_ELEMENT_NODE) &&
1041 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001042 /*
1043 * Uses the OutputBuffer property to automatically convert
1044 * invalids to charrefs
1045 */
1046
Owen Taylor3473f882001-02-23 17:55:21 +00001047 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001048 }
1049 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001050 if ((format) && (info != NULL) && (!info->isinline) &&
1051 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001052 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001053 (cur->children != cur->last) &&
1054 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001055 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001056 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001057 if ((format) && (info != NULL) && (!info->isinline) &&
1058 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001059 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001060 (cur->children != cur->last) &&
1061 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001062 xmlOutputBufferWriteString(buf, "\n");
1063 }
Owen Taylor3473f882001-02-23 17:55:21 +00001064 xmlOutputBufferWriteString(buf, "</");
1065 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1066 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001067 if ((format) && (info != NULL) && (!info->isinline) &&
1068 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001069 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001070 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1071 (cur->parent != NULL) &&
1072 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001073 xmlOutputBufferWriteString(buf, "\n");
1074 }
1075}
1076
1077/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001078 * htmlNodeDumpOutput:
1079 * @buf: the HTML buffer output
1080 * @doc: the document
1081 * @cur: the current node
1082 * @encoding: the encoding string
1083 *
1084 * Dump an HTML node, recursive behaviour,children are printed too,
1085 * and formatting returns/spaces are added.
1086 */
1087void
1088htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1089 xmlNodePtr cur, const char *encoding) {
1090 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1091}
1092
1093/**
1094 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001095 * @buf: the HTML buffer output
1096 * @cur: the document
1097 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001098 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001099 *
1100 * Dump an HTML document.
1101 */
1102void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001103htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1104 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001105 int type;
1106
1107 /*
1108 * force to output the stuff as HTML, especially for entities
1109 */
1110 type = cur->type;
1111 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001112 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001113 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001114 }
1115 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001116 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001117 }
1118 xmlOutputBufferWriteString(buf, "\n");
1119 cur->type = (xmlElementType) type;
1120}
1121
Daniel Veillard95d845f2001-06-13 13:48:46 +00001122/**
1123 * htmlDocContentDumpOutput:
1124 * @buf: the HTML buffer output
1125 * @cur: the document
1126 * @encoding: the encoding string
1127 *
1128 * Dump an HTML document. Formating return/spaces are added.
1129 */
1130void
1131htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1132 const char *encoding) {
1133 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1134}
1135
Owen Taylor3473f882001-02-23 17:55:21 +00001136/************************************************************************
1137 * *
1138 * Saving functions front-ends *
1139 * *
1140 ************************************************************************/
1141
1142/**
1143 * htmlDocDump:
1144 * @f: the FILE*
1145 * @cur: the document
1146 *
1147 * Dump an HTML document to an open FILE.
1148 *
1149 * returns: the number of byte written or -1 in case of failure.
1150 */
1151int
1152htmlDocDump(FILE *f, xmlDocPtr cur) {
1153 xmlOutputBufferPtr buf;
1154 xmlCharEncodingHandlerPtr handler = NULL;
1155 const char *encoding;
1156 int ret;
1157
1158 if (cur == NULL) {
1159#ifdef DEBUG_TREE
1160 xmlGenericError(xmlGenericErrorContext,
1161 "htmlDocDump : document == NULL\n");
1162#endif
1163 return(-1);
1164 }
1165
1166 encoding = (const char *) htmlGetMetaEncoding(cur);
1167
1168 if (encoding != NULL) {
1169 xmlCharEncoding enc;
1170
1171 enc = xmlParseCharEncoding(encoding);
1172 if (enc != cur->charset) {
1173 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1174 /*
1175 * Not supported yet
1176 */
1177 return(-1);
1178 }
1179
1180 handler = xmlFindCharEncodingHandler(encoding);
1181 if (handler == NULL)
1182 return(-1);
1183 }
1184 }
1185
1186 /*
1187 * Fallback to HTML or ASCII when the encoding is unspecified
1188 */
1189 if (handler == NULL)
1190 handler = xmlFindCharEncodingHandler("HTML");
1191 if (handler == NULL)
1192 handler = xmlFindCharEncodingHandler("ascii");
1193
1194 buf = xmlOutputBufferCreateFile(f, handler);
1195 if (buf == NULL) return(-1);
1196 htmlDocContentDumpOutput(buf, cur, NULL);
1197
1198 ret = xmlOutputBufferClose(buf);
1199 return(ret);
1200}
1201
1202/**
1203 * htmlSaveFile:
1204 * @filename: the filename (or URL)
1205 * @cur: the document
1206 *
1207 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1208 * used.
1209 * returns: the number of byte written or -1 in case of failure.
1210 */
1211int
1212htmlSaveFile(const char *filename, xmlDocPtr cur) {
1213 xmlOutputBufferPtr buf;
1214 xmlCharEncodingHandlerPtr handler = NULL;
1215 const char *encoding;
1216 int ret;
1217
1218 encoding = (const char *) htmlGetMetaEncoding(cur);
1219
1220 if (encoding != NULL) {
1221 xmlCharEncoding enc;
1222
1223 enc = xmlParseCharEncoding(encoding);
1224 if (enc != cur->charset) {
1225 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1226 /*
1227 * Not supported yet
1228 */
1229 return(-1);
1230 }
1231
1232 handler = xmlFindCharEncodingHandler(encoding);
1233 if (handler == NULL)
1234 return(-1);
1235 }
1236 }
1237
1238 /*
1239 * Fallback to HTML or ASCII when the encoding is unspecified
1240 */
1241 if (handler == NULL)
1242 handler = xmlFindCharEncodingHandler("HTML");
1243 if (handler == NULL)
1244 handler = xmlFindCharEncodingHandler("ascii");
1245
1246 /*
1247 * save the content to a temp buffer.
1248 */
1249 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1250 if (buf == NULL) return(0);
1251
1252 htmlDocContentDumpOutput(buf, cur, NULL);
1253
1254 ret = xmlOutputBufferClose(buf);
1255 return(ret);
1256}
1257
1258/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001259 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001260 * @filename: the filename
1261 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001262 * @format: should formatting spaces been added
1263 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001264 *
1265 * Dump an HTML document to a file using a given encoding.
1266 *
1267 * returns: the number of byte written or -1 in case of failure.
1268 */
1269int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001270htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1271 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001272 xmlOutputBufferPtr buf;
1273 xmlCharEncodingHandlerPtr handler = NULL;
1274 int ret;
1275
1276 if (encoding != NULL) {
1277 xmlCharEncoding enc;
1278
1279 enc = xmlParseCharEncoding(encoding);
1280 if (enc != cur->charset) {
1281 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1282 /*
1283 * Not supported yet
1284 */
1285 return(-1);
1286 }
1287
1288 handler = xmlFindCharEncodingHandler(encoding);
1289 if (handler == NULL)
1290 return(-1);
1291 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1292 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001293 } else {
1294 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001295 }
1296
1297 /*
1298 * Fallback to HTML or ASCII when the encoding is unspecified
1299 */
1300 if (handler == NULL)
1301 handler = xmlFindCharEncodingHandler("HTML");
1302 if (handler == NULL)
1303 handler = xmlFindCharEncodingHandler("ascii");
1304
1305 /*
1306 * save the content to a temp buffer.
1307 */
1308 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1309 if (buf == NULL) return(0);
1310
Daniel Veillard95d845f2001-06-13 13:48:46 +00001311 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313 ret = xmlOutputBufferClose(buf);
1314 return(ret);
1315}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001316
1317/**
1318 * htmlSaveFileEnc:
1319 * @filename: the filename
1320 * @cur: the document
1321 * @encoding: the document encoding
1322 *
1323 * Dump an HTML document to a file using a given encoding
1324 * and formatting returns/spaces are added.
1325 *
1326 * returns: the number of byte written or -1 in case of failure.
1327 */
1328int
1329htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1330 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1331}
1332
Owen Taylor3473f882001-02-23 17:55:21 +00001333#endif /* LIBXML_HTML_ENABLED */