blob: cd9417c61858d45ba8e341afb0654097c1f7f232 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000027#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29/************************************************************************
30 * *
31 * Getting/Setting encoding meta tags *
32 * *
33 ************************************************************************/
34
35/**
36 * htmlGetMetaEncoding:
37 * @doc: the document
38 *
39 * Encoding definition lookup in the Meta tags
40 *
41 * Returns the current encoding as flagged in the HTML source
42 */
43const xmlChar *
44htmlGetMetaEncoding(htmlDocPtr doc) {
45 htmlNodePtr cur;
46 const xmlChar *content;
47 const xmlChar *encoding;
48
49 if (doc == NULL)
50 return(NULL);
51 cur = doc->children;
52
53 /*
54 * Search the html
55 */
56 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000057 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000058 if (xmlStrEqual(cur->name, BAD_CAST"html"))
59 break;
60 if (xmlStrEqual(cur->name, BAD_CAST"head"))
61 goto found_head;
62 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
63 goto found_meta;
64 }
65 cur = cur->next;
66 }
67 if (cur == NULL)
68 return(NULL);
69 cur = cur->children;
70
71 /*
72 * Search the head
73 */
74 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000075 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000076 if (xmlStrEqual(cur->name, BAD_CAST"head"))
77 break;
78 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 goto found_meta;
80 }
81 cur = cur->next;
82 }
83 if (cur == NULL)
84 return(NULL);
85found_head:
86 cur = cur->children;
87
88 /*
89 * Search the meta elements
90 */
91found_meta:
92 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000093 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000094 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
95 xmlAttrPtr attr = cur->properties;
96 int http;
97 const xmlChar *value;
98
99 content = NULL;
100 http = 0;
101 while (attr != NULL) {
102 if ((attr->children != NULL) &&
103 (attr->children->type == XML_TEXT_NODE) &&
104 (attr->children->next == NULL)) {
105#ifndef XML_USE_BUFFER_CONTENT
106 value = attr->children->content;
107#else
108 value = xmlBufferContent(attr->children->content);
109#endif
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000218 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
226 * Create a new Meta element with the right aatributes
227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000232 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000245 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000246
247 content = NULL;
248 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000249 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000250 while (attr != NULL) {
251 if ((attr->children != NULL) &&
252 (attr->children->type == XML_TEXT_NODE) &&
253 (attr->children->next == NULL)) {
254#ifndef XML_USE_BUFFER_CONTENT
255 value = attr->children->content;
256#else
257 value = xmlBufferContent(attr->children->content);
258#endif
259 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
260 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
261 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000262 else
263 {
264 if ((value != NULL) &&
265 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
266 content = value;
267 else
268 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
269 && (!xmlStrcasecmp(value, encoding)))
270 same_charset = 1;
271 }
272 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000273 break;
274 }
275 attr = attr->next;
276 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000277 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000278 meta = cur;
279 cur = cur->next;
280 xmlUnlinkNode(meta);
281 xmlFreeNode(meta);
282 continue;
283 }
284
285 }
286 }
287 cur = cur->next;
288 }
289 return(0);
290}
291
292/************************************************************************
293 * *
294 * Dumping HTML tree content to a simple buffer *
295 * *
296 ************************************************************************/
297
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000298void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
299 xmlNodePtr cur, const char *encoding, int format);
300
Owen Taylor3473f882001-02-23 17:55:21 +0000301static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000302htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304/**
305 * htmlDtdDump:
306 * @buf: the HTML buffer output
307 * @doc: the document
308 *
309 * Dump the HTML document DTD, if any.
310 */
311static void
312htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
313 xmlDtdPtr cur = doc->intSubset;
314
315 if (cur == NULL) {
316 xmlGenericError(xmlGenericErrorContext,
317 "htmlDtdDump : no internal subset\n");
318 return;
319 }
320 xmlBufferWriteChar(buf, "<!DOCTYPE ");
321 xmlBufferWriteCHAR(buf, cur->name);
322 if (cur->ExternalID != NULL) {
323 xmlBufferWriteChar(buf, " PUBLIC ");
324 xmlBufferWriteQuotedString(buf, cur->ExternalID);
325 if (cur->SystemID != NULL) {
326 xmlBufferWriteChar(buf, " ");
327 xmlBufferWriteQuotedString(buf, cur->SystemID);
328 }
329 } else if (cur->SystemID != NULL) {
330 xmlBufferWriteChar(buf, " SYSTEM ");
331 xmlBufferWriteQuotedString(buf, cur->SystemID);
332 }
333 xmlBufferWriteChar(buf, ">\n");
334}
335
336/**
337 * htmlAttrDump:
338 * @buf: the HTML buffer output
339 * @doc: the document
340 * @cur: the attribute pointer
341 *
342 * Dump an HTML attribute
343 */
344static void
345htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
346 xmlChar *value;
347
Daniel Veillardeca60d02001-06-13 07:45:41 +0000348 /*
349 * TODO: The html output method should not escape a & character
350 * occurring in an attribute value immediately followed by
351 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
352 */
353
Owen Taylor3473f882001-02-23 17:55:21 +0000354 if (cur == NULL) {
355 xmlGenericError(xmlGenericErrorContext,
356 "htmlAttrDump : property == NULL\n");
357 return;
358 }
359 xmlBufferWriteChar(buf, " ");
360 xmlBufferWriteCHAR(buf, cur->name);
361 if (cur->children != NULL) {
362 value = xmlNodeListGetString(doc, cur->children, 0);
363 if (value) {
364 xmlBufferWriteChar(buf, "=");
365 xmlBufferWriteQuotedString(buf, value);
366 xmlFree(value);
367 } else {
368 xmlBufferWriteChar(buf, "=\"\"");
369 }
370 }
371}
372
373/**
374 * htmlAttrListDump:
375 * @buf: the HTML buffer output
376 * @doc: the document
377 * @cur: the first attribute pointer
378 *
379 * Dump a list of HTML attributes
380 */
381static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000382htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
383 int i = 0;
384
Owen Taylor3473f882001-02-23 17:55:21 +0000385 if (cur == NULL) {
386 xmlGenericError(xmlGenericErrorContext,
387 "htmlAttrListDump : property == NULL\n");
388 return;
389 }
390 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000391 i++;
392 if ((format) && (i >= 5)) {
393 i = 0;
394 xmlBufferWriteChar(buf, "\n");
395 }
Owen Taylor3473f882001-02-23 17:55:21 +0000396 htmlAttrDump(buf, doc, cur);
397 cur = cur->next;
398 }
399}
400
Daniel Veillard95d845f2001-06-13 13:48:46 +0000401static void
402htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000403
Owen Taylor3473f882001-02-23 17:55:21 +0000404/**
405 * htmlNodeListDump:
406 * @buf: the HTML buffer output
407 * @doc: the document
408 * @cur: the first node
409 *
410 * Dump an HTML node list, recursive behaviour,children are printed too.
411 */
412static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000413htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000414 if (cur == NULL) {
415 xmlGenericError(xmlGenericErrorContext,
416 "htmlNodeListDump : node == NULL\n");
417 return;
418 }
419 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000420 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000421 cur = cur->next;
422 }
423}
424
425/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000426 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000427 * @buf: the HTML buffer output
428 * @doc: the document
429 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000430 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000431 *
432 * Dump an HTML node, recursive behaviour,children are printed too.
433 */
434void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000435htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
436 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000437 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000438
439 if (cur == NULL) {
440 xmlGenericError(xmlGenericErrorContext,
441 "htmlNodeDump : node == NULL\n");
442 return;
443 }
444 /*
445 * Special cases.
446 */
447 if (cur->type == XML_DTD_NODE)
448 return;
449 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000450 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000451 return;
452 }
453 if (cur->type == HTML_TEXT_NODE) {
454 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000455 if (((cur->name == (const xmlChar *)xmlStringText) ||
456 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000457 ((cur->parent == NULL) ||
458 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000459 xmlChar *buffer;
460
461#ifndef XML_USE_BUFFER_CONTENT
462 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
463#else
464 buffer = xmlEncodeEntitiesReentrant(doc,
465 xmlBufferContent(cur->content));
466#endif
467 if (buffer != NULL) {
468 xmlBufferWriteCHAR(buf, buffer);
469 xmlFree(buffer);
470 }
471 } else {
472 xmlBufferWriteCHAR(buf, cur->content);
473 }
474 }
475 return;
476 }
477 if (cur->type == HTML_COMMENT_NODE) {
478 if (cur->content != NULL) {
479 xmlBufferWriteChar(buf, "<!--");
480#ifndef XML_USE_BUFFER_CONTENT
481 xmlBufferWriteCHAR(buf, cur->content);
482#else
483 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
484#endif
485 xmlBufferWriteChar(buf, "-->");
486 }
487 return;
488 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000489 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000490 if (cur->name == NULL)
491 return;
492 xmlBufferWriteChar(buf, "<?");
493 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000494 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000495 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000496#ifndef XML_USE_BUFFER_CONTENT
497 xmlBufferWriteCHAR(buf, cur->content);
498#else
499 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
500#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000501 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000502 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000503 return;
504 }
Owen Taylor3473f882001-02-23 17:55:21 +0000505 if (cur->type == HTML_ENTITY_REF_NODE) {
506 xmlBufferWriteChar(buf, "&");
507 xmlBufferWriteCHAR(buf, cur->name);
508 xmlBufferWriteChar(buf, ";");
509 return;
510 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000511 if (cur->type == HTML_PRESERVE_NODE) {
512 if (cur->content != NULL) {
513#ifndef XML_USE_BUFFER_CONTENT
514 xmlBufferWriteCHAR(buf, cur->content);
515#else
516 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
517#endif
518 }
519 return;
520 }
Owen Taylor3473f882001-02-23 17:55:21 +0000521
522 /*
Daniel Veillard083c2662001-05-08 08:27:14 +0000523 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +0000524 */
525 info = htmlTagLookup(cur->name);
526
527 xmlBufferWriteChar(buf, "<");
528 xmlBufferWriteCHAR(buf, cur->name);
529 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000530 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000531
532 if ((info != NULL) && (info->empty)) {
533 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000534 if ((format) && (info != NULL) && (!info->isinline) &&
535 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000536 if ((cur->next->type != HTML_TEXT_NODE) &&
537 (cur->next->type != HTML_ENTITY_REF_NODE))
538 xmlBufferWriteChar(buf, "\n");
539 }
540 return;
541 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000542 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
543 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000544 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000545 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
546 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000548 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000549 xmlBufferWriteChar(buf, "></");
550 xmlBufferWriteCHAR(buf, cur->name);
551 xmlBufferWriteChar(buf, ">");
552 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000553 if ((format) && (info != NULL) && (!info->isinline) &&
554 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000555 if ((cur->next->type != HTML_TEXT_NODE) &&
556 (cur->next->type != HTML_ENTITY_REF_NODE))
557 xmlBufferWriteChar(buf, "\n");
558 }
559 return;
560 }
561 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000562 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000563 xmlChar *buffer;
564
565#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000566 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000567#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000568 buffer = xmlEncodeEntitiesReentrant(doc,
569 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000570#endif
571 if (buffer != NULL) {
572 xmlBufferWriteCHAR(buf, buffer);
573 xmlFree(buffer);
574 }
575 }
576 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000577 if ((format) && (info != NULL) && (!info->isinline) &&
578 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000579 (cur->children->type != HTML_ENTITY_REF_NODE) &&
580 (cur->children != cur->last))
581 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000582 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000583 if ((format) && (info != NULL) && (!info->isinline) &&
584 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000585 (cur->last->type != HTML_ENTITY_REF_NODE) &&
586 (cur->children != cur->last))
587 xmlBufferWriteChar(buf, "\n");
588 }
Owen Taylor3473f882001-02-23 17:55:21 +0000589 xmlBufferWriteChar(buf, "</");
590 xmlBufferWriteCHAR(buf, cur->name);
591 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000592 if ((format) && (info != NULL) && (!info->isinline) &&
593 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000594 if ((cur->next->type != HTML_TEXT_NODE) &&
595 (cur->next->type != HTML_ENTITY_REF_NODE))
596 xmlBufferWriteChar(buf, "\n");
597 }
598}
599
600/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000601 * htmlNodeDump:
602 * @buf: the HTML buffer output
603 * @doc: the document
604 * @cur: the current node
605 *
606 * Dump an HTML node, recursive behaviour,children are printed too,
607 * and formatting returns are added.
608 */
609void
610htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
611 htmlNodeDumpFormat(buf, doc, cur, 1);
612}
613
614/**
615 * htmlNodeDumpFileFormat:
616 * @out: the FILE pointer
617 * @doc: the document
618 * @cur: the current node
619 * @encoding: the document encoding
620 * @format: should formatting spaces been added
621 *
622 * Dump an HTML node, recursive behaviour,children are printed too.
623 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000624 * TODO: if encoding == NULL try to save in the doc encoding
625 *
626 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000627 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000628int
629htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
630 xmlNodePtr cur, const char *encoding, int format) {
631 xmlOutputBufferPtr buf;
632 xmlCharEncodingHandlerPtr handler = NULL;
633 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000634
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000635 if (encoding != NULL) {
636 xmlCharEncoding enc;
637
638 enc = xmlParseCharEncoding(encoding);
639 if (enc != XML_CHAR_ENCODING_UTF8) {
640 handler = xmlFindCharEncodingHandler(encoding);
641 if (handler == NULL)
642 return(-1);
643 }
644 }
645
646 /*
647 * Fallback to HTML or ASCII when the encoding is unspecified
648 */
649 if (handler == NULL)
650 handler = xmlFindCharEncodingHandler("HTML");
651 if (handler == NULL)
652 handler = xmlFindCharEncodingHandler("ascii");
653
654 /*
655 * save the content to a temp buffer.
656 */
657 buf = xmlOutputBufferCreateFile(out, handler);
658 if (buf == NULL) return(0);
659
660 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
661
662 ret = xmlOutputBufferClose(buf);
663 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000664}
665
666/**
Owen Taylor3473f882001-02-23 17:55:21 +0000667 * htmlNodeDumpFile:
668 * @out: the FILE pointer
669 * @doc: the document
670 * @cur: the current node
671 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000672 * Dump an HTML node, recursive behaviour,children are printed too,
673 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000674 */
675void
676htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000677 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000678}
679
680/**
681 * htmlDocContentDump:
682 * @buf: the HTML buffer output
683 * @cur: the document
684 *
685 * Dump an HTML document.
686 */
687static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000688htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000689 int type;
690
691 /*
692 * force to output the stuff as HTML, especially for entities
693 */
694 type = cur->type;
695 cur->type = XML_HTML_DOCUMENT_NODE;
696 if (cur->intSubset != NULL)
697 htmlDtdDump(buf, cur);
698 else {
699 /* Default to HTML-4.0 transitionnal @@@@ */
700 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
701
702 }
703 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000704 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000705 }
706 xmlBufferWriteChar(buf, "\n");
707 cur->type = (xmlElementType) type;
708}
709
710/**
711 * htmlDocDumpMemory:
712 * @cur: the document
713 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000714 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000715 *
716 * Dump an HTML document in memory and return the xmlChar * and it's size.
717 * It's up to the caller to free the memory.
718 */
719void
720htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000721 xmlOutputBufferPtr buf;
722 xmlCharEncodingHandlerPtr handler = NULL;
723 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000724
725 if (cur == NULL) {
726#ifdef DEBUG_TREE
727 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000728 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000729#endif
730 *mem = NULL;
731 *size = 0;
732 return;
733 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000734
735 encoding = (const char *) htmlGetMetaEncoding(cur);
736
737 if (encoding != NULL) {
738 xmlCharEncoding enc;
739
740 enc = xmlParseCharEncoding(encoding);
741 if (enc != cur->charset) {
742 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
743 /*
744 * Not supported yet
745 */
746 *mem = NULL;
747 *size = 0;
748 return;
749 }
750
751 handler = xmlFindCharEncodingHandler(encoding);
752 if (handler == NULL) {
753 *mem = NULL;
754 *size = 0;
755 return;
756 }
757 }
758 }
759
760 /*
761 * Fallback to HTML or ASCII when the encoding is unspecified
762 */
763 if (handler == NULL)
764 handler = xmlFindCharEncodingHandler("HTML");
765 if (handler == NULL)
766 handler = xmlFindCharEncodingHandler("ascii");
767
768 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 if (buf == NULL) {
770 *mem = NULL;
771 *size = 0;
772 return;
773 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000774
775 htmlDocContentDumpOutput(buf, cur, NULL);
776 xmlOutputBufferFlush(buf);
777 if (buf->conv != NULL) {
778 *size = buf->conv->use;
779 *mem = xmlStrndup(buf->conv->content, *size);
780 } else {
781 *size = buf->buffer->use;
782 *mem = xmlStrndup(buf->buffer->content, *size);
783 }
784 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000785}
786
787
788/************************************************************************
789 * *
790 * Dumping HTML tree content to an I/O output buffer *
791 * *
792 ************************************************************************/
793
Daniel Veillard95d845f2001-06-13 13:48:46 +0000794void
795htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
796 const char *encoding, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000797/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000798 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000799 * @buf: the HTML buffer output
800 * @doc: the document
801 * @encoding: the encoding string
802 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000803 * TODO: check whether encoding is needed
804 *
Owen Taylor3473f882001-02-23 17:55:21 +0000805 * Dump the HTML document DTD, if any.
806 */
807static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000808htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000809 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000810 xmlDtdPtr cur = doc->intSubset;
811
812 if (cur == NULL) {
813 xmlGenericError(xmlGenericErrorContext,
814 "htmlDtdDump : no internal subset\n");
815 return;
816 }
817 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
818 xmlOutputBufferWriteString(buf, (const char *)cur->name);
819 if (cur->ExternalID != NULL) {
820 xmlOutputBufferWriteString(buf, " PUBLIC ");
821 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
822 if (cur->SystemID != NULL) {
823 xmlOutputBufferWriteString(buf, " ");
824 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
825 }
826 } else if (cur->SystemID != NULL) {
827 xmlOutputBufferWriteString(buf, " SYSTEM ");
828 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
829 }
830 xmlOutputBufferWriteString(buf, ">\n");
831}
832
833/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000834 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000835 * @buf: the HTML buffer output
836 * @doc: the document
837 * @cur: the attribute pointer
838 * @encoding: the encoding string
839 *
840 * Dump an HTML attribute
841 */
842static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000843htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000844 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000845 xmlChar *value;
846
Daniel Veillardeca60d02001-06-13 07:45:41 +0000847 /*
848 * TODO: The html output method should not escape a & character
849 * occurring in an attribute value immediately followed by
850 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
851 */
852
Owen Taylor3473f882001-02-23 17:55:21 +0000853 if (cur == NULL) {
854 xmlGenericError(xmlGenericErrorContext,
855 "htmlAttrDump : property == NULL\n");
856 return;
857 }
858 xmlOutputBufferWriteString(buf, " ");
859 xmlOutputBufferWriteString(buf, (const char *)cur->name);
860 if (cur->children != NULL) {
861 value = xmlNodeListGetString(doc, cur->children, 0);
862 if (value) {
863 xmlOutputBufferWriteString(buf, "=");
864 xmlBufferWriteQuotedString(buf->buffer, value);
865 xmlFree(value);
866 } else {
867 xmlOutputBufferWriteString(buf, "=\"\"");
868 }
869 }
870}
871
872/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000873 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000874 * @buf: the HTML buffer output
875 * @doc: the document
876 * @cur: the first attribute pointer
877 * @encoding: the encoding string
878 *
879 * Dump a list of HTML attributes
880 */
881static void
882htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
883 if (cur == NULL) {
884 xmlGenericError(xmlGenericErrorContext,
885 "htmlAttrListDump : property == NULL\n");
886 return;
887 }
888 while (cur != NULL) {
889 htmlAttrDumpOutput(buf, doc, cur, encoding);
890 cur = cur->next;
891 }
892}
893
894
895void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
896 xmlNodePtr cur, const char *encoding);
897
898/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000899 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000900 * @buf: the HTML buffer output
901 * @doc: the document
902 * @cur: the first node
903 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000904 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000905 *
906 * Dump an HTML node list, recursive behaviour,children are printed too.
907 */
908static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000909htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
910 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000911 if (cur == NULL) {
912 xmlGenericError(xmlGenericErrorContext,
913 "htmlNodeListDump : node == NULL\n");
914 return;
915 }
916 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000917 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000918 cur = cur->next;
919 }
920}
921
922/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000923 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000924 * @buf: the HTML buffer output
925 * @doc: the document
926 * @cur: the current node
927 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000928 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000929 *
930 * Dump an HTML node, recursive behaviour,children are printed too.
931 */
932void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000933htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
934 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000935 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000936
937 if (cur == NULL) {
938 xmlGenericError(xmlGenericErrorContext,
939 "htmlNodeDump : node == NULL\n");
940 return;
941 }
942 /*
943 * Special cases.
944 */
945 if (cur->type == XML_DTD_NODE)
946 return;
947 if (cur->type == XML_HTML_DOCUMENT_NODE) {
948 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
949 return;
950 }
951 if (cur->type == HTML_TEXT_NODE) {
952 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000953 if (((cur->name == (const xmlChar *)xmlStringText) ||
954 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000955 ((cur->parent == NULL) ||
956 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000957 xmlChar *buffer;
958
959#ifndef XML_USE_BUFFER_CONTENT
960 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
961#else
962 buffer = xmlEncodeEntitiesReentrant(doc,
963 xmlBufferContent(cur->content));
964#endif
965 if (buffer != NULL) {
966 xmlOutputBufferWriteString(buf, (const char *)buffer);
967 xmlFree(buffer);
968 }
969 } else {
970 xmlOutputBufferWriteString(buf, (const char *)cur->content);
971 }
972 }
973 return;
974 }
975 if (cur->type == HTML_COMMENT_NODE) {
976 if (cur->content != NULL) {
977 xmlOutputBufferWriteString(buf, "<!--");
978#ifndef XML_USE_BUFFER_CONTENT
979 xmlOutputBufferWriteString(buf, (const char *)cur->content);
980#else
981 xmlOutputBufferWriteString(buf, (const char *)
982 xmlBufferContent(cur->content));
983#endif
984 xmlOutputBufferWriteString(buf, "-->");
985 }
986 return;
987 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000988 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000989 if (cur->name == NULL)
990 return;
991 xmlOutputBufferWriteString(buf, "<?");
992 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000993 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000994 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000995#ifndef XML_USE_BUFFER_CONTENT
996 xmlOutputBufferWriteString(buf, (const char *)cur->content);
997#else
998 xmlOutputBufferWriteString(buf, (const char *)
999 xmlBufferContent(cur->content));
1000#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +00001001 }
Daniel Veillard5146f202001-04-25 10:29:44 +00001002 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001003 return;
1004 }
Owen Taylor3473f882001-02-23 17:55:21 +00001005 if (cur->type == HTML_ENTITY_REF_NODE) {
1006 xmlOutputBufferWriteString(buf, "&");
1007 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1008 xmlOutputBufferWriteString(buf, ";");
1009 return;
1010 }
1011 if (cur->type == HTML_PRESERVE_NODE) {
1012 if (cur->content != NULL) {
1013#ifndef XML_USE_BUFFER_CONTENT
1014 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1015#else
1016 xmlOutputBufferWriteString(buf, (const char *)
1017 xmlBufferContent(cur->content));
1018#endif
1019 }
1020 return;
1021 }
1022
1023 /*
Daniel Veillard1ed3f882001-04-18 09:45:35 +00001024 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
1026 info = htmlTagLookup(cur->name);
1027
1028 xmlOutputBufferWriteString(buf, "<");
1029 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1030 if (cur->properties != NULL)
1031 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1032
1033 if ((info != NULL) && (info->empty)) {
1034 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001035 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001036 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001037 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1038 (cur->parent != NULL) &&
1039 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001040 xmlOutputBufferWriteString(buf, "\n");
1041 }
1042 return;
1043 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001044 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1045 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001046 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001047 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1048 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001049 xmlOutputBufferWriteString(buf, ">");
1050 } else {
1051 xmlOutputBufferWriteString(buf, "></");
1052 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1053 xmlOutputBufferWriteString(buf, ">");
1054 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001055 if ((format) && (cur->next != NULL) &&
1056 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001057 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001058 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1059 (cur->parent != NULL) &&
1060 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001061 xmlOutputBufferWriteString(buf, "\n");
1062 }
1063 return;
1064 }
1065 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001066 if ((cur->type != XML_ELEMENT_NODE) &&
1067 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001068 /*
1069 * Uses the OutputBuffer property to automatically convert
1070 * invalids to charrefs
1071 */
1072
1073#ifndef XML_USE_BUFFER_CONTENT
1074 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1075#else
1076 xmlOutputBufferWriteString(buf,
1077 (const char *) xmlBufferContent(cur->content));
1078#endif
1079 }
1080 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001081 if ((format) && (info != NULL) && (!info->isinline) &&
1082 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001083 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001084 (cur->children != cur->last) &&
1085 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001086 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001087 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001088 if ((format) && (info != NULL) && (!info->isinline) &&
1089 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001090 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001091 (cur->children != cur->last) &&
1092 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001093 xmlOutputBufferWriteString(buf, "\n");
1094 }
Owen Taylor3473f882001-02-23 17:55:21 +00001095 xmlOutputBufferWriteString(buf, "</");
1096 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1097 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001098 if ((format) && (info != NULL) && (!info->isinline) &&
1099 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001100 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001101 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1102 (cur->parent != NULL) &&
1103 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001104 xmlOutputBufferWriteString(buf, "\n");
1105 }
1106}
1107
1108/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001109 * htmlNodeDumpOutput:
1110 * @buf: the HTML buffer output
1111 * @doc: the document
1112 * @cur: the current node
1113 * @encoding: the encoding string
1114 *
1115 * Dump an HTML node, recursive behaviour,children are printed too,
1116 * and formatting returns/spaces are added.
1117 */
1118void
1119htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1120 xmlNodePtr cur, const char *encoding) {
1121 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1122}
1123
1124/**
1125 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001126 * @buf: the HTML buffer output
1127 * @cur: the document
1128 * @encoding: the encoding string
1129 *
1130 * Dump an HTML document.
1131 */
1132void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001133htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1134 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001135 int type;
1136
1137 /*
1138 * force to output the stuff as HTML, especially for entities
1139 */
1140 type = cur->type;
1141 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001142 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001143 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001144 }
1145 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001146 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001147 }
1148 xmlOutputBufferWriteString(buf, "\n");
1149 cur->type = (xmlElementType) type;
1150}
1151
Daniel Veillard95d845f2001-06-13 13:48:46 +00001152/**
1153 * htmlDocContentDumpOutput:
1154 * @buf: the HTML buffer output
1155 * @cur: the document
1156 * @encoding: the encoding string
1157 *
1158 * Dump an HTML document. Formating return/spaces are added.
1159 */
1160void
1161htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1162 const char *encoding) {
1163 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1164}
1165
Owen Taylor3473f882001-02-23 17:55:21 +00001166/************************************************************************
1167 * *
1168 * Saving functions front-ends *
1169 * *
1170 ************************************************************************/
1171
1172/**
1173 * htmlDocDump:
1174 * @f: the FILE*
1175 * @cur: the document
1176 *
1177 * Dump an HTML document to an open FILE.
1178 *
1179 * returns: the number of byte written or -1 in case of failure.
1180 */
1181int
1182htmlDocDump(FILE *f, xmlDocPtr cur) {
1183 xmlOutputBufferPtr buf;
1184 xmlCharEncodingHandlerPtr handler = NULL;
1185 const char *encoding;
1186 int ret;
1187
1188 if (cur == NULL) {
1189#ifdef DEBUG_TREE
1190 xmlGenericError(xmlGenericErrorContext,
1191 "htmlDocDump : document == NULL\n");
1192#endif
1193 return(-1);
1194 }
1195
1196 encoding = (const char *) htmlGetMetaEncoding(cur);
1197
1198 if (encoding != NULL) {
1199 xmlCharEncoding enc;
1200
1201 enc = xmlParseCharEncoding(encoding);
1202 if (enc != cur->charset) {
1203 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1204 /*
1205 * Not supported yet
1206 */
1207 return(-1);
1208 }
1209
1210 handler = xmlFindCharEncodingHandler(encoding);
1211 if (handler == NULL)
1212 return(-1);
1213 }
1214 }
1215
1216 /*
1217 * Fallback to HTML or ASCII when the encoding is unspecified
1218 */
1219 if (handler == NULL)
1220 handler = xmlFindCharEncodingHandler("HTML");
1221 if (handler == NULL)
1222 handler = xmlFindCharEncodingHandler("ascii");
1223
1224 buf = xmlOutputBufferCreateFile(f, handler);
1225 if (buf == NULL) return(-1);
1226 htmlDocContentDumpOutput(buf, cur, NULL);
1227
1228 ret = xmlOutputBufferClose(buf);
1229 return(ret);
1230}
1231
1232/**
1233 * htmlSaveFile:
1234 * @filename: the filename (or URL)
1235 * @cur: the document
1236 *
1237 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1238 * used.
1239 * returns: the number of byte written or -1 in case of failure.
1240 */
1241int
1242htmlSaveFile(const char *filename, xmlDocPtr cur) {
1243 xmlOutputBufferPtr buf;
1244 xmlCharEncodingHandlerPtr handler = NULL;
1245 const char *encoding;
1246 int ret;
1247
1248 encoding = (const char *) htmlGetMetaEncoding(cur);
1249
1250 if (encoding != NULL) {
1251 xmlCharEncoding enc;
1252
1253 enc = xmlParseCharEncoding(encoding);
1254 if (enc != cur->charset) {
1255 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1256 /*
1257 * Not supported yet
1258 */
1259 return(-1);
1260 }
1261
1262 handler = xmlFindCharEncodingHandler(encoding);
1263 if (handler == NULL)
1264 return(-1);
1265 }
1266 }
1267
1268 /*
1269 * Fallback to HTML or ASCII when the encoding is unspecified
1270 */
1271 if (handler == NULL)
1272 handler = xmlFindCharEncodingHandler("HTML");
1273 if (handler == NULL)
1274 handler = xmlFindCharEncodingHandler("ascii");
1275
1276 /*
1277 * save the content to a temp buffer.
1278 */
1279 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1280 if (buf == NULL) return(0);
1281
1282 htmlDocContentDumpOutput(buf, cur, NULL);
1283
1284 ret = xmlOutputBufferClose(buf);
1285 return(ret);
1286}
1287
1288/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001289 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001290 * @filename: the filename
1291 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001292 * @format: should formatting spaces been added
1293 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001294 *
1295 * Dump an HTML document to a file using a given encoding.
1296 *
1297 * returns: the number of byte written or -1 in case of failure.
1298 */
1299int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001300htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1301 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001302 xmlOutputBufferPtr buf;
1303 xmlCharEncodingHandlerPtr handler = NULL;
1304 int ret;
1305
1306 if (encoding != NULL) {
1307 xmlCharEncoding enc;
1308
1309 enc = xmlParseCharEncoding(encoding);
1310 if (enc != cur->charset) {
1311 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1312 /*
1313 * Not supported yet
1314 */
1315 return(-1);
1316 }
1317
1318 handler = xmlFindCharEncodingHandler(encoding);
1319 if (handler == NULL)
1320 return(-1);
1321 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1322 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001323 } else {
1324 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001325 }
1326
1327 /*
1328 * Fallback to HTML or ASCII when the encoding is unspecified
1329 */
1330 if (handler == NULL)
1331 handler = xmlFindCharEncodingHandler("HTML");
1332 if (handler == NULL)
1333 handler = xmlFindCharEncodingHandler("ascii");
1334
1335 /*
1336 * save the content to a temp buffer.
1337 */
1338 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1339 if (buf == NULL) return(0);
1340
Daniel Veillard95d845f2001-06-13 13:48:46 +00001341 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001342
1343 ret = xmlOutputBufferClose(buf);
1344 return(ret);
1345}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001346
1347/**
1348 * htmlSaveFileEnc:
1349 * @filename: the filename
1350 * @cur: the document
1351 * @encoding: the document encoding
1352 *
1353 * Dump an HTML document to a file using a given encoding
1354 * and formatting returns/spaces are added.
1355 *
1356 * returns: the number of byte written or -1 in case of failure.
1357 */
1358int
1359htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1360 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1361}
1362
Owen Taylor3473f882001-02-23 17:55:21 +00001363#endif /* LIBXML_HTML_ENABLED */