blob: cb5f3080475b8a5d784f9ab1def621ce6eb8799b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000027#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29/************************************************************************
30 * *
31 * Getting/Setting encoding meta tags *
32 * *
33 ************************************************************************/
34
35/**
36 * htmlGetMetaEncoding:
37 * @doc: the document
38 *
39 * Encoding definition lookup in the Meta tags
40 *
41 * Returns the current encoding as flagged in the HTML source
42 */
43const xmlChar *
44htmlGetMetaEncoding(htmlDocPtr doc) {
45 htmlNodePtr cur;
46 const xmlChar *content;
47 const xmlChar *encoding;
48
49 if (doc == NULL)
50 return(NULL);
51 cur = doc->children;
52
53 /*
54 * Search the html
55 */
56 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000057 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000058 if (xmlStrEqual(cur->name, BAD_CAST"html"))
59 break;
60 if (xmlStrEqual(cur->name, BAD_CAST"head"))
61 goto found_head;
62 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
63 goto found_meta;
64 }
65 cur = cur->next;
66 }
67 if (cur == NULL)
68 return(NULL);
69 cur = cur->children;
70
71 /*
72 * Search the head
73 */
74 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000075 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000076 if (xmlStrEqual(cur->name, BAD_CAST"head"))
77 break;
78 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 goto found_meta;
80 }
81 cur = cur->next;
82 }
83 if (cur == NULL)
84 return(NULL);
85found_head:
86 cur = cur->children;
87
88 /*
89 * Search the meta elements
90 */
91found_meta:
92 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000093 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000094 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
95 xmlAttrPtr attr = cur->properties;
96 int http;
97 const xmlChar *value;
98
99 content = NULL;
100 http = 0;
101 while (attr != NULL) {
102 if ((attr->children != NULL) &&
103 (attr->children->type == XML_TEXT_NODE) &&
104 (attr->children->next == NULL)) {
105#ifndef XML_USE_BUFFER_CONTENT
106 value = attr->children->content;
107#else
108 value = xmlBufferContent(attr->children->content);
109#endif
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000218 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
226 * Create a new Meta element with the right aatributes
227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000232 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000245 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000246
247 content = NULL;
248 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000249 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000250 while (attr != NULL) {
251 if ((attr->children != NULL) &&
252 (attr->children->type == XML_TEXT_NODE) &&
253 (attr->children->next == NULL)) {
254#ifndef XML_USE_BUFFER_CONTENT
255 value = attr->children->content;
256#else
257 value = xmlBufferContent(attr->children->content);
258#endif
259 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
260 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
261 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000262 else
263 {
264 if ((value != NULL) &&
265 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
266 content = value;
267 else
268 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
269 && (!xmlStrcasecmp(value, encoding)))
270 same_charset = 1;
271 }
272 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000273 break;
274 }
275 attr = attr->next;
276 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000277 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000278 meta = cur;
279 cur = cur->next;
280 xmlUnlinkNode(meta);
281 xmlFreeNode(meta);
282 continue;
283 }
284
285 }
286 }
287 cur = cur->next;
288 }
289 return(0);
290}
291
292/************************************************************************
293 * *
294 * Dumping HTML tree content to a simple buffer *
295 * *
296 ************************************************************************/
297
298static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000299htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000300
301/**
302 * htmlDtdDump:
303 * @buf: the HTML buffer output
304 * @doc: the document
305 *
306 * Dump the HTML document DTD, if any.
307 */
308static void
309htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
310 xmlDtdPtr cur = doc->intSubset;
311
312 if (cur == NULL) {
313 xmlGenericError(xmlGenericErrorContext,
314 "htmlDtdDump : no internal subset\n");
315 return;
316 }
317 xmlBufferWriteChar(buf, "<!DOCTYPE ");
318 xmlBufferWriteCHAR(buf, cur->name);
319 if (cur->ExternalID != NULL) {
320 xmlBufferWriteChar(buf, " PUBLIC ");
321 xmlBufferWriteQuotedString(buf, cur->ExternalID);
322 if (cur->SystemID != NULL) {
323 xmlBufferWriteChar(buf, " ");
324 xmlBufferWriteQuotedString(buf, cur->SystemID);
325 }
326 } else if (cur->SystemID != NULL) {
327 xmlBufferWriteChar(buf, " SYSTEM ");
328 xmlBufferWriteQuotedString(buf, cur->SystemID);
329 }
330 xmlBufferWriteChar(buf, ">\n");
331}
332
333/**
334 * htmlAttrDump:
335 * @buf: the HTML buffer output
336 * @doc: the document
337 * @cur: the attribute pointer
338 *
339 * Dump an HTML attribute
340 */
341static void
342htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
343 xmlChar *value;
344
Daniel Veillardeca60d02001-06-13 07:45:41 +0000345 /*
346 * TODO: The html output method should not escape a & character
347 * occurring in an attribute value immediately followed by
348 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
349 */
350
Owen Taylor3473f882001-02-23 17:55:21 +0000351 if (cur == NULL) {
352 xmlGenericError(xmlGenericErrorContext,
353 "htmlAttrDump : property == NULL\n");
354 return;
355 }
356 xmlBufferWriteChar(buf, " ");
357 xmlBufferWriteCHAR(buf, cur->name);
358 if (cur->children != NULL) {
359 value = xmlNodeListGetString(doc, cur->children, 0);
360 if (value) {
361 xmlBufferWriteChar(buf, "=");
362 xmlBufferWriteQuotedString(buf, value);
363 xmlFree(value);
364 } else {
365 xmlBufferWriteChar(buf, "=\"\"");
366 }
367 }
368}
369
370/**
371 * htmlAttrListDump:
372 * @buf: the HTML buffer output
373 * @doc: the document
374 * @cur: the first attribute pointer
375 *
376 * Dump a list of HTML attributes
377 */
378static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000379htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
380 int i = 0;
381
Owen Taylor3473f882001-02-23 17:55:21 +0000382 if (cur == NULL) {
383 xmlGenericError(xmlGenericErrorContext,
384 "htmlAttrListDump : property == NULL\n");
385 return;
386 }
387 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000388 i++;
389 if ((format) && (i >= 5)) {
390 i = 0;
391 xmlBufferWriteChar(buf, "\n");
392 }
Owen Taylor3473f882001-02-23 17:55:21 +0000393 htmlAttrDump(buf, doc, cur);
394 cur = cur->next;
395 }
396}
397
Daniel Veillard95d845f2001-06-13 13:48:46 +0000398static void
399htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000400
Owen Taylor3473f882001-02-23 17:55:21 +0000401/**
402 * htmlNodeListDump:
403 * @buf: the HTML buffer output
404 * @doc: the document
405 * @cur: the first node
406 *
407 * Dump an HTML node list, recursive behaviour,children are printed too.
408 */
409static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000410htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000411 if (cur == NULL) {
412 xmlGenericError(xmlGenericErrorContext,
413 "htmlNodeListDump : node == NULL\n");
414 return;
415 }
416 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000417 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000418 cur = cur->next;
419 }
420}
421
422/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000423 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000424 * @buf: the HTML buffer output
425 * @doc: the document
426 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000427 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000428 *
429 * Dump an HTML node, recursive behaviour,children are printed too.
430 */
431void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000432htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
433 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000434 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000435
436 if (cur == NULL) {
437 xmlGenericError(xmlGenericErrorContext,
438 "htmlNodeDump : node == NULL\n");
439 return;
440 }
441 /*
442 * Special cases.
443 */
444 if (cur->type == XML_DTD_NODE)
445 return;
446 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000447 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000448 return;
449 }
450 if (cur->type == HTML_TEXT_NODE) {
451 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000452 if (((cur->name == (const xmlChar *)xmlStringText) ||
453 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000454 ((cur->parent == NULL) ||
455 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000456 xmlChar *buffer;
457
458#ifndef XML_USE_BUFFER_CONTENT
459 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
460#else
461 buffer = xmlEncodeEntitiesReentrant(doc,
462 xmlBufferContent(cur->content));
463#endif
464 if (buffer != NULL) {
465 xmlBufferWriteCHAR(buf, buffer);
466 xmlFree(buffer);
467 }
468 } else {
469 xmlBufferWriteCHAR(buf, cur->content);
470 }
471 }
472 return;
473 }
474 if (cur->type == HTML_COMMENT_NODE) {
475 if (cur->content != NULL) {
476 xmlBufferWriteChar(buf, "<!--");
477#ifndef XML_USE_BUFFER_CONTENT
478 xmlBufferWriteCHAR(buf, cur->content);
479#else
480 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
481#endif
482 xmlBufferWriteChar(buf, "-->");
483 }
484 return;
485 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000486 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000487 if (cur->name == NULL)
488 return;
489 xmlBufferWriteChar(buf, "<?");
490 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000491 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000492 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000493#ifndef XML_USE_BUFFER_CONTENT
494 xmlBufferWriteCHAR(buf, cur->content);
495#else
496 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
497#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000498 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000499 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000500 return;
501 }
Owen Taylor3473f882001-02-23 17:55:21 +0000502 if (cur->type == HTML_ENTITY_REF_NODE) {
503 xmlBufferWriteChar(buf, "&");
504 xmlBufferWriteCHAR(buf, cur->name);
505 xmlBufferWriteChar(buf, ";");
506 return;
507 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000508 if (cur->type == HTML_PRESERVE_NODE) {
509 if (cur->content != NULL) {
510#ifndef XML_USE_BUFFER_CONTENT
511 xmlBufferWriteCHAR(buf, cur->content);
512#else
513 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
514#endif
515 }
516 return;
517 }
Owen Taylor3473f882001-02-23 17:55:21 +0000518
519 /*
Daniel Veillard083c2662001-05-08 08:27:14 +0000520 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +0000521 */
522 info = htmlTagLookup(cur->name);
523
524 xmlBufferWriteChar(buf, "<");
525 xmlBufferWriteCHAR(buf, cur->name);
526 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000527 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000528
529 if ((info != NULL) && (info->empty)) {
530 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000531 if ((format) && (info != NULL) && (!info->isinline) &&
532 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000533 if ((cur->next->type != HTML_TEXT_NODE) &&
534 (cur->next->type != HTML_ENTITY_REF_NODE))
535 xmlBufferWriteChar(buf, "\n");
536 }
537 return;
538 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000539 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
540 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000541 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000542 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
543 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000544 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000545 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000546 xmlBufferWriteChar(buf, "></");
547 xmlBufferWriteCHAR(buf, cur->name);
548 xmlBufferWriteChar(buf, ">");
549 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000550 if ((format) && (info != NULL) && (!info->isinline) &&
551 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000552 if ((cur->next->type != HTML_TEXT_NODE) &&
553 (cur->next->type != HTML_ENTITY_REF_NODE))
554 xmlBufferWriteChar(buf, "\n");
555 }
556 return;
557 }
558 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000559 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000560 xmlChar *buffer;
561
562#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000563 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000564#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000565 buffer = xmlEncodeEntitiesReentrant(doc,
566 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000567#endif
568 if (buffer != NULL) {
569 xmlBufferWriteCHAR(buf, buffer);
570 xmlFree(buffer);
571 }
572 }
573 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000574 if ((format) && (info != NULL) && (!info->isinline) &&
575 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000576 (cur->children->type != HTML_ENTITY_REF_NODE) &&
577 (cur->children != cur->last))
578 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000579 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000580 if ((format) && (info != NULL) && (!info->isinline) &&
581 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000582 (cur->last->type != HTML_ENTITY_REF_NODE) &&
583 (cur->children != cur->last))
584 xmlBufferWriteChar(buf, "\n");
585 }
Owen Taylor3473f882001-02-23 17:55:21 +0000586 xmlBufferWriteChar(buf, "</");
587 xmlBufferWriteCHAR(buf, cur->name);
588 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000589 if ((format) && (info != NULL) && (!info->isinline) &&
590 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000591 if ((cur->next->type != HTML_TEXT_NODE) &&
592 (cur->next->type != HTML_ENTITY_REF_NODE))
593 xmlBufferWriteChar(buf, "\n");
594 }
595}
596
597/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000598 * htmlNodeDump:
599 * @buf: the HTML buffer output
600 * @doc: the document
601 * @cur: the current node
602 *
603 * Dump an HTML node, recursive behaviour,children are printed too,
604 * and formatting returns are added.
605 */
606void
607htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
608 htmlNodeDumpFormat(buf, doc, cur, 1);
609}
610
611/**
612 * htmlNodeDumpFileFormat:
613 * @out: the FILE pointer
614 * @doc: the document
615 * @cur: the current node
616 * @encoding: the document encoding
617 * @format: should formatting spaces been added
618 *
619 * Dump an HTML node, recursive behaviour,children are printed too.
620 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000621 * TODO: if encoding == NULL try to save in the doc encoding
622 *
623 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000624 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000625int
626htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
627 xmlNodePtr cur, const char *encoding, int format) {
628 xmlOutputBufferPtr buf;
629 xmlCharEncodingHandlerPtr handler = NULL;
630 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000631
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000632 if (encoding != NULL) {
633 xmlCharEncoding enc;
634
635 enc = xmlParseCharEncoding(encoding);
636 if (enc != XML_CHAR_ENCODING_UTF8) {
637 handler = xmlFindCharEncodingHandler(encoding);
638 if (handler == NULL)
639 return(-1);
640 }
641 }
642
643 /*
644 * Fallback to HTML or ASCII when the encoding is unspecified
645 */
646 if (handler == NULL)
647 handler = xmlFindCharEncodingHandler("HTML");
648 if (handler == NULL)
649 handler = xmlFindCharEncodingHandler("ascii");
650
651 /*
652 * save the content to a temp buffer.
653 */
654 buf = xmlOutputBufferCreateFile(out, handler);
655 if (buf == NULL) return(0);
656
657 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
658
659 ret = xmlOutputBufferClose(buf);
660 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000661}
662
663/**
Owen Taylor3473f882001-02-23 17:55:21 +0000664 * htmlNodeDumpFile:
665 * @out: the FILE pointer
666 * @doc: the document
667 * @cur: the current node
668 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000669 * Dump an HTML node, recursive behaviour,children are printed too,
670 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000671 */
672void
673htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000674 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000675}
676
677/**
678 * htmlDocContentDump:
679 * @buf: the HTML buffer output
680 * @cur: the document
681 *
682 * Dump an HTML document.
683 */
684static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000685htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000686 int type;
687
688 /*
689 * force to output the stuff as HTML, especially for entities
690 */
691 type = cur->type;
692 cur->type = XML_HTML_DOCUMENT_NODE;
693 if (cur->intSubset != NULL)
694 htmlDtdDump(buf, cur);
695 else {
696 /* Default to HTML-4.0 transitionnal @@@@ */
697 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
698
699 }
700 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000701 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000702 }
703 xmlBufferWriteChar(buf, "\n");
704 cur->type = (xmlElementType) type;
705}
706
707/**
708 * htmlDocDumpMemory:
709 * @cur: the document
710 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000711 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000712 *
713 * Dump an HTML document in memory and return the xmlChar * and it's size.
714 * It's up to the caller to free the memory.
715 */
716void
717htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000718 xmlOutputBufferPtr buf;
719 xmlCharEncodingHandlerPtr handler = NULL;
720 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000721
722 if (cur == NULL) {
723#ifdef DEBUG_TREE
724 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000725 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000726#endif
727 *mem = NULL;
728 *size = 0;
729 return;
730 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000731
732 encoding = (const char *) htmlGetMetaEncoding(cur);
733
734 if (encoding != NULL) {
735 xmlCharEncoding enc;
736
737 enc = xmlParseCharEncoding(encoding);
738 if (enc != cur->charset) {
739 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
740 /*
741 * Not supported yet
742 */
743 *mem = NULL;
744 *size = 0;
745 return;
746 }
747
748 handler = xmlFindCharEncodingHandler(encoding);
749 if (handler == NULL) {
750 *mem = NULL;
751 *size = 0;
752 return;
753 }
754 }
755 }
756
757 /*
758 * Fallback to HTML or ASCII when the encoding is unspecified
759 */
760 if (handler == NULL)
761 handler = xmlFindCharEncodingHandler("HTML");
762 if (handler == NULL)
763 handler = xmlFindCharEncodingHandler("ascii");
764
765 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000766 if (buf == NULL) {
767 *mem = NULL;
768 *size = 0;
769 return;
770 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000771
772 htmlDocContentDumpOutput(buf, cur, NULL);
773 xmlOutputBufferFlush(buf);
774 if (buf->conv != NULL) {
775 *size = buf->conv->use;
776 *mem = xmlStrndup(buf->conv->content, *size);
777 } else {
778 *size = buf->buffer->use;
779 *mem = xmlStrndup(buf->buffer->content, *size);
780 }
781 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000782}
783
784
785/************************************************************************
786 * *
787 * Dumping HTML tree content to an I/O output buffer *
788 * *
789 ************************************************************************/
790
Daniel Veillard95d845f2001-06-13 13:48:46 +0000791void
792htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
793 const char *encoding, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000794/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000795 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000796 * @buf: the HTML buffer output
797 * @doc: the document
798 * @encoding: the encoding string
799 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000800 * TODO: check whether encoding is needed
801 *
Owen Taylor3473f882001-02-23 17:55:21 +0000802 * Dump the HTML document DTD, if any.
803 */
804static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000805htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000806 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000807 xmlDtdPtr cur = doc->intSubset;
808
809 if (cur == NULL) {
810 xmlGenericError(xmlGenericErrorContext,
811 "htmlDtdDump : no internal subset\n");
812 return;
813 }
814 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
815 xmlOutputBufferWriteString(buf, (const char *)cur->name);
816 if (cur->ExternalID != NULL) {
817 xmlOutputBufferWriteString(buf, " PUBLIC ");
818 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
819 if (cur->SystemID != NULL) {
820 xmlOutputBufferWriteString(buf, " ");
821 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
822 }
823 } else if (cur->SystemID != NULL) {
824 xmlOutputBufferWriteString(buf, " SYSTEM ");
825 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
826 }
827 xmlOutputBufferWriteString(buf, ">\n");
828}
829
830/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000831 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000832 * @buf: the HTML buffer output
833 * @doc: the document
834 * @cur: the attribute pointer
835 * @encoding: the encoding string
836 *
837 * Dump an HTML attribute
838 */
839static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000840htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000841 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000842 xmlChar *value;
843
Daniel Veillardeca60d02001-06-13 07:45:41 +0000844 /*
845 * TODO: The html output method should not escape a & character
846 * occurring in an attribute value immediately followed by
847 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
848 */
849
Owen Taylor3473f882001-02-23 17:55:21 +0000850 if (cur == NULL) {
851 xmlGenericError(xmlGenericErrorContext,
852 "htmlAttrDump : property == NULL\n");
853 return;
854 }
855 xmlOutputBufferWriteString(buf, " ");
856 xmlOutputBufferWriteString(buf, (const char *)cur->name);
857 if (cur->children != NULL) {
858 value = xmlNodeListGetString(doc, cur->children, 0);
859 if (value) {
860 xmlOutputBufferWriteString(buf, "=");
861 xmlBufferWriteQuotedString(buf->buffer, value);
862 xmlFree(value);
863 } else {
864 xmlOutputBufferWriteString(buf, "=\"\"");
865 }
866 }
867}
868
869/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000870 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000871 * @buf: the HTML buffer output
872 * @doc: the document
873 * @cur: the first attribute pointer
874 * @encoding: the encoding string
875 *
876 * Dump a list of HTML attributes
877 */
878static void
879htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
880 if (cur == NULL) {
881 xmlGenericError(xmlGenericErrorContext,
882 "htmlAttrListDump : property == NULL\n");
883 return;
884 }
885 while (cur != NULL) {
886 htmlAttrDumpOutput(buf, doc, cur, encoding);
887 cur = cur->next;
888 }
889}
890
891
892void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
893 xmlNodePtr cur, const char *encoding);
894
895/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000896 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000897 * @buf: the HTML buffer output
898 * @doc: the document
899 * @cur: the first node
900 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000901 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000902 *
903 * Dump an HTML node list, recursive behaviour,children are printed too.
904 */
905static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000906htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
907 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000908 if (cur == NULL) {
909 xmlGenericError(xmlGenericErrorContext,
910 "htmlNodeListDump : node == NULL\n");
911 return;
912 }
913 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000914 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000915 cur = cur->next;
916 }
917}
918
919/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000920 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000921 * @buf: the HTML buffer output
922 * @doc: the document
923 * @cur: the current node
924 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000925 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000926 *
927 * Dump an HTML node, recursive behaviour,children are printed too.
928 */
929void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000930htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
931 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000932 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000933
934 if (cur == NULL) {
935 xmlGenericError(xmlGenericErrorContext,
936 "htmlNodeDump : node == NULL\n");
937 return;
938 }
939 /*
940 * Special cases.
941 */
942 if (cur->type == XML_DTD_NODE)
943 return;
944 if (cur->type == XML_HTML_DOCUMENT_NODE) {
945 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
946 return;
947 }
948 if (cur->type == HTML_TEXT_NODE) {
949 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000950 if (((cur->name == (const xmlChar *)xmlStringText) ||
951 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000952 ((cur->parent == NULL) ||
953 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000954 xmlChar *buffer;
955
956#ifndef XML_USE_BUFFER_CONTENT
957 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
958#else
959 buffer = xmlEncodeEntitiesReentrant(doc,
960 xmlBufferContent(cur->content));
961#endif
962 if (buffer != NULL) {
963 xmlOutputBufferWriteString(buf, (const char *)buffer);
964 xmlFree(buffer);
965 }
966 } else {
967 xmlOutputBufferWriteString(buf, (const char *)cur->content);
968 }
969 }
970 return;
971 }
972 if (cur->type == HTML_COMMENT_NODE) {
973 if (cur->content != NULL) {
974 xmlOutputBufferWriteString(buf, "<!--");
975#ifndef XML_USE_BUFFER_CONTENT
976 xmlOutputBufferWriteString(buf, (const char *)cur->content);
977#else
978 xmlOutputBufferWriteString(buf, (const char *)
979 xmlBufferContent(cur->content));
980#endif
981 xmlOutputBufferWriteString(buf, "-->");
982 }
983 return;
984 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000985 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000986 if (cur->name == NULL)
987 return;
988 xmlOutputBufferWriteString(buf, "<?");
989 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000990 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000991 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000992#ifndef XML_USE_BUFFER_CONTENT
993 xmlOutputBufferWriteString(buf, (const char *)cur->content);
994#else
995 xmlOutputBufferWriteString(buf, (const char *)
996 xmlBufferContent(cur->content));
997#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000998 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000999 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001000 return;
1001 }
Owen Taylor3473f882001-02-23 17:55:21 +00001002 if (cur->type == HTML_ENTITY_REF_NODE) {
1003 xmlOutputBufferWriteString(buf, "&");
1004 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1005 xmlOutputBufferWriteString(buf, ";");
1006 return;
1007 }
1008 if (cur->type == HTML_PRESERVE_NODE) {
1009 if (cur->content != NULL) {
1010#ifndef XML_USE_BUFFER_CONTENT
1011 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1012#else
1013 xmlOutputBufferWriteString(buf, (const char *)
1014 xmlBufferContent(cur->content));
1015#endif
1016 }
1017 return;
1018 }
1019
1020 /*
Daniel Veillard1ed3f882001-04-18 09:45:35 +00001021 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +00001022 */
1023 info = htmlTagLookup(cur->name);
1024
1025 xmlOutputBufferWriteString(buf, "<");
1026 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1027 if (cur->properties != NULL)
1028 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1029
1030 if ((info != NULL) && (info->empty)) {
1031 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001032 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001033 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001034 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1035 (cur->parent != NULL) &&
1036 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001037 xmlOutputBufferWriteString(buf, "\n");
1038 }
1039 return;
1040 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001041 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1042 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001043 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001044 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1045 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001046 xmlOutputBufferWriteString(buf, ">");
1047 } else {
1048 xmlOutputBufferWriteString(buf, "></");
1049 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1050 xmlOutputBufferWriteString(buf, ">");
1051 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001052 if ((format) && (cur->next != NULL) &&
1053 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001054 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001055 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1056 (cur->parent != NULL) &&
1057 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001058 xmlOutputBufferWriteString(buf, "\n");
1059 }
1060 return;
1061 }
1062 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001063 if ((cur->type != XML_ELEMENT_NODE) &&
1064 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001065 /*
1066 * Uses the OutputBuffer property to automatically convert
1067 * invalids to charrefs
1068 */
1069
1070#ifndef XML_USE_BUFFER_CONTENT
1071 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1072#else
1073 xmlOutputBufferWriteString(buf,
1074 (const char *) xmlBufferContent(cur->content));
1075#endif
1076 }
1077 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001078 if ((format) && (info != NULL) && (!info->isinline) &&
1079 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001080 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001081 (cur->children != cur->last) &&
1082 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001083 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001084 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001085 if ((format) && (info != NULL) && (!info->isinline) &&
1086 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001087 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001088 (cur->children != cur->last) &&
1089 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001090 xmlOutputBufferWriteString(buf, "\n");
1091 }
Owen Taylor3473f882001-02-23 17:55:21 +00001092 xmlOutputBufferWriteString(buf, "</");
1093 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1094 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001095 if ((format) && (info != NULL) && (!info->isinline) &&
1096 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001097 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001098 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1099 (cur->parent != NULL) &&
1100 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001101 xmlOutputBufferWriteString(buf, "\n");
1102 }
1103}
1104
1105/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001106 * htmlNodeDumpOutput:
1107 * @buf: the HTML buffer output
1108 * @doc: the document
1109 * @cur: the current node
1110 * @encoding: the encoding string
1111 *
1112 * Dump an HTML node, recursive behaviour,children are printed too,
1113 * and formatting returns/spaces are added.
1114 */
1115void
1116htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1117 xmlNodePtr cur, const char *encoding) {
1118 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1119}
1120
1121/**
1122 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001123 * @buf: the HTML buffer output
1124 * @cur: the document
1125 * @encoding: the encoding string
1126 *
1127 * Dump an HTML document.
1128 */
1129void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001130htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1131 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001132 int type;
1133
1134 /*
1135 * force to output the stuff as HTML, especially for entities
1136 */
1137 type = cur->type;
1138 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001139 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001140 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001141 }
1142 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001143 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001144 }
1145 xmlOutputBufferWriteString(buf, "\n");
1146 cur->type = (xmlElementType) type;
1147}
1148
Daniel Veillard95d845f2001-06-13 13:48:46 +00001149/**
1150 * htmlDocContentDumpOutput:
1151 * @buf: the HTML buffer output
1152 * @cur: the document
1153 * @encoding: the encoding string
1154 *
1155 * Dump an HTML document. Formating return/spaces are added.
1156 */
1157void
1158htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1159 const char *encoding) {
1160 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1161}
1162
Owen Taylor3473f882001-02-23 17:55:21 +00001163/************************************************************************
1164 * *
1165 * Saving functions front-ends *
1166 * *
1167 ************************************************************************/
1168
1169/**
1170 * htmlDocDump:
1171 * @f: the FILE*
1172 * @cur: the document
1173 *
1174 * Dump an HTML document to an open FILE.
1175 *
1176 * returns: the number of byte written or -1 in case of failure.
1177 */
1178int
1179htmlDocDump(FILE *f, xmlDocPtr cur) {
1180 xmlOutputBufferPtr buf;
1181 xmlCharEncodingHandlerPtr handler = NULL;
1182 const char *encoding;
1183 int ret;
1184
1185 if (cur == NULL) {
1186#ifdef DEBUG_TREE
1187 xmlGenericError(xmlGenericErrorContext,
1188 "htmlDocDump : document == NULL\n");
1189#endif
1190 return(-1);
1191 }
1192
1193 encoding = (const char *) htmlGetMetaEncoding(cur);
1194
1195 if (encoding != NULL) {
1196 xmlCharEncoding enc;
1197
1198 enc = xmlParseCharEncoding(encoding);
1199 if (enc != cur->charset) {
1200 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1201 /*
1202 * Not supported yet
1203 */
1204 return(-1);
1205 }
1206
1207 handler = xmlFindCharEncodingHandler(encoding);
1208 if (handler == NULL)
1209 return(-1);
1210 }
1211 }
1212
1213 /*
1214 * Fallback to HTML or ASCII when the encoding is unspecified
1215 */
1216 if (handler == NULL)
1217 handler = xmlFindCharEncodingHandler("HTML");
1218 if (handler == NULL)
1219 handler = xmlFindCharEncodingHandler("ascii");
1220
1221 buf = xmlOutputBufferCreateFile(f, handler);
1222 if (buf == NULL) return(-1);
1223 htmlDocContentDumpOutput(buf, cur, NULL);
1224
1225 ret = xmlOutputBufferClose(buf);
1226 return(ret);
1227}
1228
1229/**
1230 * htmlSaveFile:
1231 * @filename: the filename (or URL)
1232 * @cur: the document
1233 *
1234 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1235 * used.
1236 * returns: the number of byte written or -1 in case of failure.
1237 */
1238int
1239htmlSaveFile(const char *filename, xmlDocPtr cur) {
1240 xmlOutputBufferPtr buf;
1241 xmlCharEncodingHandlerPtr handler = NULL;
1242 const char *encoding;
1243 int ret;
1244
1245 encoding = (const char *) htmlGetMetaEncoding(cur);
1246
1247 if (encoding != NULL) {
1248 xmlCharEncoding enc;
1249
1250 enc = xmlParseCharEncoding(encoding);
1251 if (enc != cur->charset) {
1252 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1253 /*
1254 * Not supported yet
1255 */
1256 return(-1);
1257 }
1258
1259 handler = xmlFindCharEncodingHandler(encoding);
1260 if (handler == NULL)
1261 return(-1);
1262 }
1263 }
1264
1265 /*
1266 * Fallback to HTML or ASCII when the encoding is unspecified
1267 */
1268 if (handler == NULL)
1269 handler = xmlFindCharEncodingHandler("HTML");
1270 if (handler == NULL)
1271 handler = xmlFindCharEncodingHandler("ascii");
1272
1273 /*
1274 * save the content to a temp buffer.
1275 */
1276 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1277 if (buf == NULL) return(0);
1278
1279 htmlDocContentDumpOutput(buf, cur, NULL);
1280
1281 ret = xmlOutputBufferClose(buf);
1282 return(ret);
1283}
1284
1285/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001286 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001287 * @filename: the filename
1288 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001289 * @format: should formatting spaces been added
1290 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001291 *
1292 * Dump an HTML document to a file using a given encoding.
1293 *
1294 * returns: the number of byte written or -1 in case of failure.
1295 */
1296int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001297htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1298 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001299 xmlOutputBufferPtr buf;
1300 xmlCharEncodingHandlerPtr handler = NULL;
1301 int ret;
1302
1303 if (encoding != NULL) {
1304 xmlCharEncoding enc;
1305
1306 enc = xmlParseCharEncoding(encoding);
1307 if (enc != cur->charset) {
1308 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1309 /*
1310 * Not supported yet
1311 */
1312 return(-1);
1313 }
1314
1315 handler = xmlFindCharEncodingHandler(encoding);
1316 if (handler == NULL)
1317 return(-1);
1318 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1319 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001320 } else {
1321 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001322 }
1323
1324 /*
1325 * Fallback to HTML or ASCII when the encoding is unspecified
1326 */
1327 if (handler == NULL)
1328 handler = xmlFindCharEncodingHandler("HTML");
1329 if (handler == NULL)
1330 handler = xmlFindCharEncodingHandler("ascii");
1331
1332 /*
1333 * save the content to a temp buffer.
1334 */
1335 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1336 if (buf == NULL) return(0);
1337
Daniel Veillard95d845f2001-06-13 13:48:46 +00001338 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001339
1340 ret = xmlOutputBufferClose(buf);
1341 return(ret);
1342}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001343
1344/**
1345 * htmlSaveFileEnc:
1346 * @filename: the filename
1347 * @cur: the document
1348 * @encoding: the document encoding
1349 *
1350 * Dump an HTML document to a file using a given encoding
1351 * and formatting returns/spaces are added.
1352 *
1353 * returns: the number of byte written or -1 in case of failure.
1354 */
1355int
1356htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1357 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1358}
1359
Owen Taylor3473f882001-02-23 17:55:21 +00001360#endif /* LIBXML_HTML_ENABLED */