blob: 872f2f015ae2ed289976b5207d86729132f48986 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Daniel Veillard34ce8be2002-03-18 19:37:11 +000010#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000011#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000012#ifdef LIBXML_HTML_ENABLED
13
Owen Taylor3473f882001-02-23 17:55:21 +000014#ifdef HAVE_CTYPE_H
15#include <ctype.h>
16#endif
17#ifdef HAVE_STDLIB_H
18#include <stdlib.h>
19#endif
20
21#include <libxml/xmlmemory.h>
22#include <libxml/HTMLparser.h>
23#include <libxml/HTMLtree.h>
24#include <libxml/entities.h>
25#include <libxml/valid.h>
26#include <libxml/xmlerror.h>
27#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000028#include <libxml/globals.h>
Daniel Veillardeb475a32002-04-14 22:00:22 +000029#include <libxml/uri.h>
Owen Taylor3473f882001-02-23 17:55:21 +000030
31/************************************************************************
32 * *
33 * Getting/Setting encoding meta tags *
34 * *
35 ************************************************************************/
36
37/**
38 * htmlGetMetaEncoding:
39 * @doc: the document
40 *
41 * Encoding definition lookup in the Meta tags
42 *
43 * Returns the current encoding as flagged in the HTML source
44 */
45const xmlChar *
46htmlGetMetaEncoding(htmlDocPtr doc) {
47 htmlNodePtr cur;
48 const xmlChar *content;
49 const xmlChar *encoding;
50
51 if (doc == NULL)
52 return(NULL);
53 cur = doc->children;
54
55 /*
56 * Search the html
57 */
58 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000059 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000060 if (xmlStrEqual(cur->name, BAD_CAST"html"))
61 break;
62 if (xmlStrEqual(cur->name, BAD_CAST"head"))
63 goto found_head;
64 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
65 goto found_meta;
66 }
67 cur = cur->next;
68 }
69 if (cur == NULL)
70 return(NULL);
71 cur = cur->children;
72
73 /*
74 * Search the head
75 */
76 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000077 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000078 if (xmlStrEqual(cur->name, BAD_CAST"head"))
79 break;
80 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
81 goto found_meta;
82 }
83 cur = cur->next;
84 }
85 if (cur == NULL)
86 return(NULL);
87found_head:
88 cur = cur->children;
89
90 /*
91 * Search the meta elements
92 */
93found_meta:
94 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000095 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000096 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
97 xmlAttrPtr attr = cur->properties;
98 int http;
99 const xmlChar *value;
100
101 content = NULL;
102 http = 0;
103 while (attr != NULL) {
104 if ((attr->children != NULL) &&
105 (attr->children->type == XML_TEXT_NODE) &&
106 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000107 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000108 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
109 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
110 http = 1;
111 else if ((value != NULL)
112 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
113 content = value;
114 if ((http != 0) && (content != NULL))
115 goto found_content;
116 }
117 attr = attr->next;
118 }
119 }
120 }
121 cur = cur->next;
122 }
123 return(NULL);
124
125found_content:
126 encoding = xmlStrstr(content, BAD_CAST"charset=");
127 if (encoding == NULL)
128 encoding = xmlStrstr(content, BAD_CAST"Charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
131 if (encoding != NULL) {
132 encoding += 8;
133 } else {
134 encoding = xmlStrstr(content, BAD_CAST"charset =");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"Charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
139 if (encoding != NULL)
140 encoding += 9;
141 }
142 if (encoding != NULL) {
143 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
144 }
145 return(encoding);
146}
147
148/**
149 * htmlSetMetaEncoding:
150 * @doc: the document
151 * @encoding: the encoding string
152 *
153 * Sets the current encoding in the Meta tags
154 * NOTE: this will not change the document content encoding, just
155 * the META flag associated.
156 *
157 * Returns 0 in case of success and -1 in case of error
158 */
159int
160htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
161 htmlNodePtr cur, meta;
162 const xmlChar *content;
163 char newcontent[100];
164
165
166 if (doc == NULL)
167 return(-1);
168
169 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000170 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
171 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000172 newcontent[sizeof(newcontent) - 1] = 0;
173 }
174
175 cur = doc->children;
176
177 /*
178 * Search the html
179 */
180 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000181 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000182 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
183 break;
184 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
185 goto found_head;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
187 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000188 }
189 cur = cur->next;
190 }
191 if (cur == NULL)
192 return(-1);
193 cur = cur->children;
194
195 /*
196 * Search the head
197 */
198 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000199 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000200 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
201 break;
202 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
203 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000204 }
205 cur = cur->next;
206 }
207 if (cur == NULL)
208 return(-1);
209found_head:
210 if (cur->children == NULL) {
211 if (encoding == NULL)
212 return(0);
213 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
214 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000215 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000216 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000217 return(0);
218 }
219 cur = cur->children;
220
221found_meta:
222 if (encoding != NULL) {
223 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000224 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
226
227 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
228 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000229 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000230 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000231 }
232
233 /*
234 * Search and destroy all the remaining the meta elements carrying
235 * encoding informations
236 */
237 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000238 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000239 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlAttrPtr attr = cur->properties;
241 int http;
242 const xmlChar *value;
243
244 content = NULL;
245 http = 0;
246 while (attr != NULL) {
247 if ((attr->children != NULL) &&
248 (attr->children->type == XML_TEXT_NODE) &&
249 (attr->children->next == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000250 value = attr->children->content;
Owen Taylor3473f882001-02-23 17:55:21 +0000251 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
252 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
253 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000254 else
255 {
256 if ((value != NULL) &&
257 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
258 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000259 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000260 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000261 break;
262 }
263 attr = attr->next;
264 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000265 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000266 meta = cur;
267 cur = cur->next;
268 xmlUnlinkNode(meta);
269 xmlFreeNode(meta);
270 continue;
271 }
272
273 }
274 }
275 cur = cur->next;
276 }
277 return(0);
278}
279
280/************************************************************************
281 * *
282 * Dumping HTML tree content to a simple buffer *
283 * *
284 ************************************************************************/
285
286static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000287htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000288static void
289htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
290 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000291
292/**
293 * htmlDtdDump:
294 * @buf: the HTML buffer output
295 * @doc: the document
296 *
297 * Dump the HTML document DTD, if any.
298 */
299static void
300htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
301 xmlDtdPtr cur = doc->intSubset;
302
303 if (cur == NULL) {
304 xmlGenericError(xmlGenericErrorContext,
305 "htmlDtdDump : no internal subset\n");
306 return;
307 }
308 xmlBufferWriteChar(buf, "<!DOCTYPE ");
309 xmlBufferWriteCHAR(buf, cur->name);
310 if (cur->ExternalID != NULL) {
311 xmlBufferWriteChar(buf, " PUBLIC ");
312 xmlBufferWriteQuotedString(buf, cur->ExternalID);
313 if (cur->SystemID != NULL) {
314 xmlBufferWriteChar(buf, " ");
315 xmlBufferWriteQuotedString(buf, cur->SystemID);
316 }
317 } else if (cur->SystemID != NULL) {
318 xmlBufferWriteChar(buf, " SYSTEM ");
319 xmlBufferWriteQuotedString(buf, cur->SystemID);
320 }
321 xmlBufferWriteChar(buf, ">\n");
322}
323
324/**
325 * htmlAttrDump:
326 * @buf: the HTML buffer output
327 * @doc: the document
328 * @cur: the attribute pointer
329 *
330 * Dump an HTML attribute
331 */
332static void
333htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
334 xmlChar *value;
335
Daniel Veillardeca60d02001-06-13 07:45:41 +0000336 /*
337 * TODO: The html output method should not escape a & character
338 * occurring in an attribute value immediately followed by
339 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
340 */
341
Owen Taylor3473f882001-02-23 17:55:21 +0000342 if (cur == NULL) {
343 xmlGenericError(xmlGenericErrorContext,
344 "htmlAttrDump : property == NULL\n");
345 return;
346 }
347 xmlBufferWriteChar(buf, " ");
348 xmlBufferWriteCHAR(buf, cur->name);
349 if (cur->children != NULL) {
350 value = xmlNodeListGetString(doc, cur->children, 0);
351 if (value) {
352 xmlBufferWriteChar(buf, "=");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000353 if ((xmlStrEqual(cur->name, BAD_CAST "href")) ||
354 (xmlStrEqual(cur->name, BAD_CAST "src"))) {
355 xmlChar *escaped;
356 xmlChar *tmp = value;
357
358 while (IS_BLANK(*tmp)) tmp++;
359
Daniel Veillard6231e842002-04-18 11:54:04 +0000360 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000361 if (escaped != NULL) {
362 xmlBufferWriteQuotedString(buf, escaped);
363 xmlFree(escaped);
364 } else {
365 xmlBufferWriteQuotedString(buf, value);
366 }
367 } else {
368 xmlBufferWriteQuotedString(buf, value);
369 }
Owen Taylor3473f882001-02-23 17:55:21 +0000370 xmlFree(value);
371 } else {
372 xmlBufferWriteChar(buf, "=\"\"");
373 }
374 }
375}
376
377/**
378 * htmlAttrListDump:
379 * @buf: the HTML buffer output
380 * @doc: the document
381 * @cur: the first attribute pointer
382 *
383 * Dump a list of HTML attributes
384 */
385static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000386htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
387 int i = 0;
388
Owen Taylor3473f882001-02-23 17:55:21 +0000389 if (cur == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "htmlAttrListDump : property == NULL\n");
392 return;
393 }
394 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000395 i++;
396 if ((format) && (i >= 5)) {
397 i = 0;
398 xmlBufferWriteChar(buf, "\n");
399 }
Owen Taylor3473f882001-02-23 17:55:21 +0000400 htmlAttrDump(buf, doc, cur);
401 cur = cur->next;
402 }
403}
404
Owen Taylor3473f882001-02-23 17:55:21 +0000405/**
406 * htmlNodeListDump:
407 * @buf: the HTML buffer output
408 * @doc: the document
409 * @cur: the first node
410 *
411 * Dump an HTML node list, recursive behaviour,children are printed too.
412 */
413static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000414htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000415 if (cur == NULL) {
416 xmlGenericError(xmlGenericErrorContext,
417 "htmlNodeListDump : node == NULL\n");
418 return;
419 }
420 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000421 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000422 cur = cur->next;
423 }
424}
425
426/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000427 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000428 * @buf: the HTML buffer output
429 * @doc: the document
430 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000431 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000432 *
433 * Dump an HTML node, recursive behaviour,children are printed too.
434 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000435static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000436htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
437 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000438 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000439
440 if (cur == NULL) {
441 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000442 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000443 return;
444 }
445 /*
446 * Special cases.
447 */
448 if (cur->type == XML_DTD_NODE)
449 return;
450 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000451 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000452 return;
453 }
454 if (cur->type == HTML_TEXT_NODE) {
455 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000456 if (((cur->name == (const xmlChar *)xmlStringText) ||
457 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000458 ((cur->parent == NULL) ||
459 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000460 xmlChar *buffer;
461
Owen Taylor3473f882001-02-23 17:55:21 +0000462 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000463 if (buffer != NULL) {
464 xmlBufferWriteCHAR(buf, buffer);
465 xmlFree(buffer);
466 }
467 } else {
468 xmlBufferWriteCHAR(buf, cur->content);
469 }
470 }
471 return;
472 }
473 if (cur->type == HTML_COMMENT_NODE) {
474 if (cur->content != NULL) {
475 xmlBufferWriteChar(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000476 xmlBufferWriteCHAR(buf, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000477 xmlBufferWriteChar(buf, "-->");
478 }
479 return;
480 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000481 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000482 if (cur->name == NULL)
483 return;
484 xmlBufferWriteChar(buf, "<?");
485 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000486 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000487 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000488 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000489 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000490 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000491 return;
492 }
Owen Taylor3473f882001-02-23 17:55:21 +0000493 if (cur->type == HTML_ENTITY_REF_NODE) {
494 xmlBufferWriteChar(buf, "&");
495 xmlBufferWriteCHAR(buf, cur->name);
496 xmlBufferWriteChar(buf, ";");
497 return;
498 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000499 if (cur->type == HTML_PRESERVE_NODE) {
500 if (cur->content != NULL) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000501 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillard083c2662001-05-08 08:27:14 +0000502 }
503 return;
504 }
Owen Taylor3473f882001-02-23 17:55:21 +0000505
506 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000507 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000508 */
509 info = htmlTagLookup(cur->name);
510
511 xmlBufferWriteChar(buf, "<");
512 xmlBufferWriteCHAR(buf, cur->name);
513 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000514 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000515
516 if ((info != NULL) && (info->empty)) {
517 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000518 if ((format) && (info != NULL) && (!info->isinline) &&
519 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000520 if ((cur->next->type != HTML_TEXT_NODE) &&
521 (cur->next->type != HTML_ENTITY_REF_NODE))
522 xmlBufferWriteChar(buf, "\n");
523 }
524 return;
525 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000526 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
527 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000528 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000529 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
530 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000531 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000532 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000533 xmlBufferWriteChar(buf, "></");
534 xmlBufferWriteCHAR(buf, cur->name);
535 xmlBufferWriteChar(buf, ">");
536 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000537 if ((format) && (info != NULL) && (!info->isinline) &&
538 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000539 if ((cur->next->type != HTML_TEXT_NODE) &&
540 (cur->next->type != HTML_ENTITY_REF_NODE))
541 xmlBufferWriteChar(buf, "\n");
542 }
543 return;
544 }
545 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000546 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlChar *buffer;
548
Daniel Veillard083c2662001-05-08 08:27:14 +0000549 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000550 if (buffer != NULL) {
551 xmlBufferWriteCHAR(buf, buffer);
552 xmlFree(buffer);
553 }
554 }
555 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000556 if ((format) && (info != NULL) && (!info->isinline) &&
557 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000558 (cur->children->type != HTML_ENTITY_REF_NODE) &&
559 (cur->children != cur->last))
560 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000561 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000562 if ((format) && (info != NULL) && (!info->isinline) &&
563 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000564 (cur->last->type != HTML_ENTITY_REF_NODE) &&
565 (cur->children != cur->last))
566 xmlBufferWriteChar(buf, "\n");
567 }
Owen Taylor3473f882001-02-23 17:55:21 +0000568 xmlBufferWriteChar(buf, "</");
569 xmlBufferWriteCHAR(buf, cur->name);
570 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000571 if ((format) && (info != NULL) && (!info->isinline) &&
572 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000573 if ((cur->next->type != HTML_TEXT_NODE) &&
574 (cur->next->type != HTML_ENTITY_REF_NODE))
575 xmlBufferWriteChar(buf, "\n");
576 }
577}
578
579/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000580 * htmlNodeDump:
581 * @buf: the HTML buffer output
582 * @doc: the document
583 * @cur: the current node
584 *
585 * Dump an HTML node, recursive behaviour,children are printed too,
586 * and formatting returns are added.
587 */
588void
589htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
590 htmlNodeDumpFormat(buf, doc, cur, 1);
591}
592
593/**
594 * htmlNodeDumpFileFormat:
595 * @out: the FILE pointer
596 * @doc: the document
597 * @cur: the current node
598 * @encoding: the document encoding
599 * @format: should formatting spaces been added
600 *
601 * Dump an HTML node, recursive behaviour,children are printed too.
602 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000603 * TODO: if encoding == NULL try to save in the doc encoding
604 *
605 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000606 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000607int
608htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
609 xmlNodePtr cur, const char *encoding, int format) {
610 xmlOutputBufferPtr buf;
611 xmlCharEncodingHandlerPtr handler = NULL;
612 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000613
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000614 if (encoding != NULL) {
615 xmlCharEncoding enc;
616
617 enc = xmlParseCharEncoding(encoding);
618 if (enc != XML_CHAR_ENCODING_UTF8) {
619 handler = xmlFindCharEncodingHandler(encoding);
620 if (handler == NULL)
621 return(-1);
622 }
623 }
624
625 /*
626 * Fallback to HTML or ASCII when the encoding is unspecified
627 */
628 if (handler == NULL)
629 handler = xmlFindCharEncodingHandler("HTML");
630 if (handler == NULL)
631 handler = xmlFindCharEncodingHandler("ascii");
632
633 /*
634 * save the content to a temp buffer.
635 */
636 buf = xmlOutputBufferCreateFile(out, handler);
637 if (buf == NULL) return(0);
638
639 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
640
641 ret = xmlOutputBufferClose(buf);
642 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000643}
644
645/**
Owen Taylor3473f882001-02-23 17:55:21 +0000646 * htmlNodeDumpFile:
647 * @out: the FILE pointer
648 * @doc: the document
649 * @cur: the current node
650 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000651 * Dump an HTML node, recursive behaviour,children are printed too,
652 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000653 */
654void
655htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000656 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000657}
658
659/**
660 * htmlDocContentDump:
661 * @buf: the HTML buffer output
662 * @cur: the document
663 *
664 * Dump an HTML document.
665 */
666static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000667htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000668 int type;
669
670 /*
671 * force to output the stuff as HTML, especially for entities
672 */
673 type = cur->type;
674 cur->type = XML_HTML_DOCUMENT_NODE;
675 if (cur->intSubset != NULL)
676 htmlDtdDump(buf, cur);
677 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000678 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000679 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
680
681 }
682 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000683 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000684 }
685 xmlBufferWriteChar(buf, "\n");
686 cur->type = (xmlElementType) type;
687}
688
689/**
690 * htmlDocDumpMemory:
691 * @cur: the document
692 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000693 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000694 *
695 * Dump an HTML document in memory and return the xmlChar * and it's size.
696 * It's up to the caller to free the memory.
697 */
698void
699htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000700 xmlOutputBufferPtr buf;
701 xmlCharEncodingHandlerPtr handler = NULL;
702 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000703
704 if (cur == NULL) {
705#ifdef DEBUG_TREE
706 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000707 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000708#endif
709 *mem = NULL;
710 *size = 0;
711 return;
712 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000713
714 encoding = (const char *) htmlGetMetaEncoding(cur);
715
716 if (encoding != NULL) {
717 xmlCharEncoding enc;
718
719 enc = xmlParseCharEncoding(encoding);
720 if (enc != cur->charset) {
721 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
722 /*
723 * Not supported yet
724 */
725 *mem = NULL;
726 *size = 0;
727 return;
728 }
729
730 handler = xmlFindCharEncodingHandler(encoding);
731 if (handler == NULL) {
732 *mem = NULL;
733 *size = 0;
734 return;
735 }
736 }
737 }
738
739 /*
740 * Fallback to HTML or ASCII when the encoding is unspecified
741 */
742 if (handler == NULL)
743 handler = xmlFindCharEncodingHandler("HTML");
744 if (handler == NULL)
745 handler = xmlFindCharEncodingHandler("ascii");
746
747 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000748 if (buf == NULL) {
749 *mem = NULL;
750 *size = 0;
751 return;
752 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000753
754 htmlDocContentDumpOutput(buf, cur, NULL);
755 xmlOutputBufferFlush(buf);
756 if (buf->conv != NULL) {
757 *size = buf->conv->use;
758 *mem = xmlStrndup(buf->conv->content, *size);
759 } else {
760 *size = buf->buffer->use;
761 *mem = xmlStrndup(buf->buffer->content, *size);
762 }
763 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000764}
765
766
767/************************************************************************
768 * *
769 * Dumping HTML tree content to an I/O output buffer *
770 * *
771 ************************************************************************/
772
773/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000774 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * @buf: the HTML buffer output
776 * @doc: the document
777 * @encoding: the encoding string
778 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000779 * TODO: check whether encoding is needed
780 *
Owen Taylor3473f882001-02-23 17:55:21 +0000781 * Dump the HTML document DTD, if any.
782 */
783static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000784htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000785 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000786 xmlDtdPtr cur = doc->intSubset;
787
788 if (cur == NULL) {
789 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000790 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000791 return;
792 }
793 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
794 xmlOutputBufferWriteString(buf, (const char *)cur->name);
795 if (cur->ExternalID != NULL) {
796 xmlOutputBufferWriteString(buf, " PUBLIC ");
797 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
798 if (cur->SystemID != NULL) {
799 xmlOutputBufferWriteString(buf, " ");
800 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
801 }
802 } else if (cur->SystemID != NULL) {
803 xmlOutputBufferWriteString(buf, " SYSTEM ");
804 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
805 }
806 xmlOutputBufferWriteString(buf, ">\n");
807}
808
809/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000810 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000811 * @buf: the HTML buffer output
812 * @doc: the document
813 * @cur: the attribute pointer
814 * @encoding: the encoding string
815 *
816 * Dump an HTML attribute
817 */
818static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000819htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000820 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000821 xmlChar *value;
822
Daniel Veillardeca60d02001-06-13 07:45:41 +0000823 /*
824 * TODO: The html output method should not escape a & character
825 * occurring in an attribute value immediately followed by
826 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
827 */
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if (cur == NULL) {
830 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000831 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000832 return;
833 }
834 xmlOutputBufferWriteString(buf, " ");
835 xmlOutputBufferWriteString(buf, (const char *)cur->name);
836 if (cur->children != NULL) {
837 value = xmlNodeListGetString(doc, cur->children, 0);
838 if (value) {
839 xmlOutputBufferWriteString(buf, "=");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000840 if ((xmlStrEqual(cur->name, BAD_CAST "href")) ||
841 (xmlStrEqual(cur->name, BAD_CAST "src"))) {
842 xmlChar *escaped;
843 xmlChar *tmp = value;
844
845 while (IS_BLANK(*tmp)) tmp++;
846
Daniel Veillard6231e842002-04-18 11:54:04 +0000847 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&");
Daniel Veillardeb475a32002-04-14 22:00:22 +0000848 if (escaped != NULL) {
849 xmlBufferWriteQuotedString(buf->buffer, escaped);
850 xmlFree(escaped);
851 } else {
852 xmlBufferWriteQuotedString(buf->buffer, value);
853 }
854 } else {
855 xmlBufferWriteQuotedString(buf->buffer, value);
856 }
Owen Taylor3473f882001-02-23 17:55:21 +0000857 xmlFree(value);
858 } else {
859 xmlOutputBufferWriteString(buf, "=\"\"");
860 }
861 }
862}
863
864/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000865 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000866 * @buf: the HTML buffer output
867 * @doc: the document
868 * @cur: the first attribute pointer
869 * @encoding: the encoding string
870 *
871 * Dump a list of HTML attributes
872 */
873static void
874htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
875 if (cur == NULL) {
876 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000877 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000878 return;
879 }
880 while (cur != NULL) {
881 htmlAttrDumpOutput(buf, doc, cur, encoding);
882 cur = cur->next;
883 }
884}
885
886
887void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
888 xmlNodePtr cur, const char *encoding);
889
890/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000891 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000892 * @buf: the HTML buffer output
893 * @doc: the document
894 * @cur: the first node
895 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000896 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * Dump an HTML node list, recursive behaviour,children are printed too.
899 */
900static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000901htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
902 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000903 if (cur == NULL) {
904 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000905 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000906 return;
907 }
908 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000909 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000910 cur = cur->next;
911 }
912}
913
914/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000915 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000916 * @buf: the HTML buffer output
917 * @doc: the document
918 * @cur: the current node
919 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000920 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000921 *
922 * Dump an HTML node, recursive behaviour,children are printed too.
923 */
924void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000925htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
926 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000927 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000928
929 if (cur == NULL) {
930 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000931 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000932 return;
933 }
934 /*
935 * Special cases.
936 */
937 if (cur->type == XML_DTD_NODE)
938 return;
939 if (cur->type == XML_HTML_DOCUMENT_NODE) {
940 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
941 return;
942 }
943 if (cur->type == HTML_TEXT_NODE) {
944 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000945 if (((cur->name == (const xmlChar *)xmlStringText) ||
946 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000947 ((cur->parent == NULL) ||
948 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000949 xmlChar *buffer;
950
Owen Taylor3473f882001-02-23 17:55:21 +0000951 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000952 if (buffer != NULL) {
953 xmlOutputBufferWriteString(buf, (const char *)buffer);
954 xmlFree(buffer);
955 }
956 } else {
957 xmlOutputBufferWriteString(buf, (const char *)cur->content);
958 }
959 }
960 return;
961 }
962 if (cur->type == HTML_COMMENT_NODE) {
963 if (cur->content != NULL) {
964 xmlOutputBufferWriteString(buf, "<!--");
Owen Taylor3473f882001-02-23 17:55:21 +0000965 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000966 xmlOutputBufferWriteString(buf, "-->");
967 }
968 return;
969 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000970 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000971 if (cur->name == NULL)
972 return;
973 xmlOutputBufferWriteString(buf, "<?");
974 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000975 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000976 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000977 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000978 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000979 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000980 return;
981 }
Owen Taylor3473f882001-02-23 17:55:21 +0000982 if (cur->type == HTML_ENTITY_REF_NODE) {
983 xmlOutputBufferWriteString(buf, "&");
984 xmlOutputBufferWriteString(buf, (const char *)cur->name);
985 xmlOutputBufferWriteString(buf, ";");
986 return;
987 }
988 if (cur->type == HTML_PRESERVE_NODE) {
989 if (cur->content != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000990 xmlOutputBufferWriteString(buf, (const char *)cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000991 }
992 return;
993 }
994
995 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000996 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000997 */
998 info = htmlTagLookup(cur->name);
999
1000 xmlOutputBufferWriteString(buf, "<");
1001 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1002 if (cur->properties != NULL)
1003 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1004
1005 if ((info != NULL) && (info->empty)) {
1006 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001007 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001008 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001009 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1010 (cur->parent != NULL) &&
1011 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001012 xmlOutputBufferWriteString(buf, "\n");
1013 }
1014 return;
1015 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001016 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1017 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001018 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001019 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1020 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001021 xmlOutputBufferWriteString(buf, ">");
1022 } else {
1023 xmlOutputBufferWriteString(buf, "></");
1024 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1025 xmlOutputBufferWriteString(buf, ">");
1026 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001027 if ((format) && (cur->next != NULL) &&
1028 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001029 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001030 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1031 (cur->parent != NULL) &&
1032 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001033 xmlOutputBufferWriteString(buf, "\n");
1034 }
1035 return;
1036 }
1037 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001038 if ((cur->type != XML_ELEMENT_NODE) &&
1039 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001040 /*
1041 * Uses the OutputBuffer property to automatically convert
1042 * invalids to charrefs
1043 */
1044
Owen Taylor3473f882001-02-23 17:55:21 +00001045 xmlOutputBufferWriteString(buf, (const char *) cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +00001046 }
1047 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001048 if ((format) && (info != NULL) && (!info->isinline) &&
1049 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001050 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001051 (cur->children != cur->last) &&
1052 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001053 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001054 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001055 if ((format) && (info != NULL) && (!info->isinline) &&
1056 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001057 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001058 (cur->children != cur->last) &&
1059 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001060 xmlOutputBufferWriteString(buf, "\n");
1061 }
Owen Taylor3473f882001-02-23 17:55:21 +00001062 xmlOutputBufferWriteString(buf, "</");
1063 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1064 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001065 if ((format) && (info != NULL) && (!info->isinline) &&
1066 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001067 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001068 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1069 (cur->parent != NULL) &&
1070 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001071 xmlOutputBufferWriteString(buf, "\n");
1072 }
1073}
1074
1075/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001076 * htmlNodeDumpOutput:
1077 * @buf: the HTML buffer output
1078 * @doc: the document
1079 * @cur: the current node
1080 * @encoding: the encoding string
1081 *
1082 * Dump an HTML node, recursive behaviour,children are printed too,
1083 * and formatting returns/spaces are added.
1084 */
1085void
1086htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1087 xmlNodePtr cur, const char *encoding) {
1088 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1089}
1090
1091/**
1092 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001093 * @buf: the HTML buffer output
1094 * @cur: the document
1095 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001096 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001097 *
1098 * Dump an HTML document.
1099 */
1100void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001101htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1102 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001103 int type;
1104
1105 /*
1106 * force to output the stuff as HTML, especially for entities
1107 */
1108 type = cur->type;
1109 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001110 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001111 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001112 }
1113 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001114 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001115 }
1116 xmlOutputBufferWriteString(buf, "\n");
1117 cur->type = (xmlElementType) type;
1118}
1119
Daniel Veillard95d845f2001-06-13 13:48:46 +00001120/**
1121 * htmlDocContentDumpOutput:
1122 * @buf: the HTML buffer output
1123 * @cur: the document
1124 * @encoding: the encoding string
1125 *
1126 * Dump an HTML document. Formating return/spaces are added.
1127 */
1128void
1129htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1130 const char *encoding) {
1131 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1132}
1133
Owen Taylor3473f882001-02-23 17:55:21 +00001134/************************************************************************
1135 * *
1136 * Saving functions front-ends *
1137 * *
1138 ************************************************************************/
1139
1140/**
1141 * htmlDocDump:
1142 * @f: the FILE*
1143 * @cur: the document
1144 *
1145 * Dump an HTML document to an open FILE.
1146 *
1147 * returns: the number of byte written or -1 in case of failure.
1148 */
1149int
1150htmlDocDump(FILE *f, xmlDocPtr cur) {
1151 xmlOutputBufferPtr buf;
1152 xmlCharEncodingHandlerPtr handler = NULL;
1153 const char *encoding;
1154 int ret;
1155
1156 if (cur == NULL) {
1157#ifdef DEBUG_TREE
1158 xmlGenericError(xmlGenericErrorContext,
1159 "htmlDocDump : document == NULL\n");
1160#endif
1161 return(-1);
1162 }
1163
1164 encoding = (const char *) htmlGetMetaEncoding(cur);
1165
1166 if (encoding != NULL) {
1167 xmlCharEncoding enc;
1168
1169 enc = xmlParseCharEncoding(encoding);
1170 if (enc != cur->charset) {
1171 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1172 /*
1173 * Not supported yet
1174 */
1175 return(-1);
1176 }
1177
1178 handler = xmlFindCharEncodingHandler(encoding);
1179 if (handler == NULL)
1180 return(-1);
1181 }
1182 }
1183
1184 /*
1185 * Fallback to HTML or ASCII when the encoding is unspecified
1186 */
1187 if (handler == NULL)
1188 handler = xmlFindCharEncodingHandler("HTML");
1189 if (handler == NULL)
1190 handler = xmlFindCharEncodingHandler("ascii");
1191
1192 buf = xmlOutputBufferCreateFile(f, handler);
1193 if (buf == NULL) return(-1);
1194 htmlDocContentDumpOutput(buf, cur, NULL);
1195
1196 ret = xmlOutputBufferClose(buf);
1197 return(ret);
1198}
1199
1200/**
1201 * htmlSaveFile:
1202 * @filename: the filename (or URL)
1203 * @cur: the document
1204 *
1205 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1206 * used.
1207 * returns: the number of byte written or -1 in case of failure.
1208 */
1209int
1210htmlSaveFile(const char *filename, xmlDocPtr cur) {
1211 xmlOutputBufferPtr buf;
1212 xmlCharEncodingHandlerPtr handler = NULL;
1213 const char *encoding;
1214 int ret;
1215
1216 encoding = (const char *) htmlGetMetaEncoding(cur);
1217
1218 if (encoding != NULL) {
1219 xmlCharEncoding enc;
1220
1221 enc = xmlParseCharEncoding(encoding);
1222 if (enc != cur->charset) {
1223 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1224 /*
1225 * Not supported yet
1226 */
1227 return(-1);
1228 }
1229
1230 handler = xmlFindCharEncodingHandler(encoding);
1231 if (handler == NULL)
1232 return(-1);
1233 }
1234 }
1235
1236 /*
1237 * Fallback to HTML or ASCII when the encoding is unspecified
1238 */
1239 if (handler == NULL)
1240 handler = xmlFindCharEncodingHandler("HTML");
1241 if (handler == NULL)
1242 handler = xmlFindCharEncodingHandler("ascii");
1243
1244 /*
1245 * save the content to a temp buffer.
1246 */
1247 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1248 if (buf == NULL) return(0);
1249
1250 htmlDocContentDumpOutput(buf, cur, NULL);
1251
1252 ret = xmlOutputBufferClose(buf);
1253 return(ret);
1254}
1255
1256/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001257 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001258 * @filename: the filename
1259 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001260 * @format: should formatting spaces been added
1261 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001262 *
1263 * Dump an HTML document to a file using a given encoding.
1264 *
1265 * returns: the number of byte written or -1 in case of failure.
1266 */
1267int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001268htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1269 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001270 xmlOutputBufferPtr buf;
1271 xmlCharEncodingHandlerPtr handler = NULL;
1272 int ret;
1273
1274 if (encoding != NULL) {
1275 xmlCharEncoding enc;
1276
1277 enc = xmlParseCharEncoding(encoding);
1278 if (enc != cur->charset) {
1279 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1280 /*
1281 * Not supported yet
1282 */
1283 return(-1);
1284 }
1285
1286 handler = xmlFindCharEncodingHandler(encoding);
1287 if (handler == NULL)
1288 return(-1);
1289 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1290 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001291 } else {
1292 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001293 }
1294
1295 /*
1296 * Fallback to HTML or ASCII when the encoding is unspecified
1297 */
1298 if (handler == NULL)
1299 handler = xmlFindCharEncodingHandler("HTML");
1300 if (handler == NULL)
1301 handler = xmlFindCharEncodingHandler("ascii");
1302
1303 /*
1304 * save the content to a temp buffer.
1305 */
1306 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1307 if (buf == NULL) return(0);
1308
Daniel Veillard95d845f2001-06-13 13:48:46 +00001309 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001310
1311 ret = xmlOutputBufferClose(buf);
1312 return(ret);
1313}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001314
1315/**
1316 * htmlSaveFileEnc:
1317 * @filename: the filename
1318 * @cur: the document
1319 * @encoding: the document encoding
1320 *
1321 * Dump an HTML document to a file using a given encoding
1322 * and formatting returns/spaces are added.
1323 *
1324 * returns: the number of byte written or -1 in case of failure.
1325 */
1326int
1327htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1328 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1329}
1330
Owen Taylor3473f882001-02-23 17:55:21 +00001331#endif /* LIBXML_HTML_ENABLED */