blob: cc61ef949c46dfc139f7e533902d63c24f4955d7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
27
28/************************************************************************
29 * *
30 * Getting/Setting encoding meta tags *
31 * *
32 ************************************************************************/
33
34/**
35 * htmlGetMetaEncoding:
36 * @doc: the document
37 *
38 * Encoding definition lookup in the Meta tags
39 *
40 * Returns the current encoding as flagged in the HTML source
41 */
42const xmlChar *
43htmlGetMetaEncoding(htmlDocPtr doc) {
44 htmlNodePtr cur;
45 const xmlChar *content;
46 const xmlChar *encoding;
47
48 if (doc == NULL)
49 return(NULL);
50 cur = doc->children;
51
52 /*
53 * Search the html
54 */
55 while (cur != NULL) {
56 if (cur->name != NULL) {
57 if (xmlStrEqual(cur->name, BAD_CAST"html"))
58 break;
59 if (xmlStrEqual(cur->name, BAD_CAST"head"))
60 goto found_head;
61 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
62 goto found_meta;
63 }
64 cur = cur->next;
65 }
66 if (cur == NULL)
67 return(NULL);
68 cur = cur->children;
69
70 /*
71 * Search the head
72 */
73 while (cur != NULL) {
74 if (cur->name != NULL) {
75 if (xmlStrEqual(cur->name, BAD_CAST"head"))
76 break;
77 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
78 goto found_meta;
79 }
80 cur = cur->next;
81 }
82 if (cur == NULL)
83 return(NULL);
84found_head:
85 cur = cur->children;
86
87 /*
88 * Search the meta elements
89 */
90found_meta:
91 while (cur != NULL) {
92 if (cur->name != NULL) {
93 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
94 xmlAttrPtr attr = cur->properties;
95 int http;
96 const xmlChar *value;
97
98 content = NULL;
99 http = 0;
100 while (attr != NULL) {
101 if ((attr->children != NULL) &&
102 (attr->children->type == XML_TEXT_NODE) &&
103 (attr->children->next == NULL)) {
104#ifndef XML_USE_BUFFER_CONTENT
105 value = attr->children->content;
106#else
107 value = xmlBufferContent(attr->children->content);
108#endif
109 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
110 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
111 http = 1;
112 else if ((value != NULL)
113 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
114 content = value;
115 if ((http != 0) && (content != NULL))
116 goto found_content;
117 }
118 attr = attr->next;
119 }
120 }
121 }
122 cur = cur->next;
123 }
124 return(NULL);
125
126found_content:
127 encoding = xmlStrstr(content, BAD_CAST"charset=");
128 if (encoding == NULL)
129 encoding = xmlStrstr(content, BAD_CAST"Charset=");
130 if (encoding == NULL)
131 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
132 if (encoding != NULL) {
133 encoding += 8;
134 } else {
135 encoding = xmlStrstr(content, BAD_CAST"charset =");
136 if (encoding == NULL)
137 encoding = xmlStrstr(content, BAD_CAST"Charset =");
138 if (encoding == NULL)
139 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
140 if (encoding != NULL)
141 encoding += 9;
142 }
143 if (encoding != NULL) {
144 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
145 }
146 return(encoding);
147}
148
149/**
150 * htmlSetMetaEncoding:
151 * @doc: the document
152 * @encoding: the encoding string
153 *
154 * Sets the current encoding in the Meta tags
155 * NOTE: this will not change the document content encoding, just
156 * the META flag associated.
157 *
158 * Returns 0 in case of success and -1 in case of error
159 */
160int
161htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
162 htmlNodePtr cur, meta;
163 const xmlChar *content;
164 char newcontent[100];
165
166
167 if (doc == NULL)
168 return(-1);
169
170 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000171 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
172 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000173 newcontent[sizeof(newcontent) - 1] = 0;
174 }
175
176 cur = doc->children;
177
178 /*
179 * Search the html
180 */
181 while (cur != NULL) {
182 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000183 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
184 break;
185 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
186 goto found_head;
187 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
188 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000189 }
190 cur = cur->next;
191 }
192 if (cur == NULL)
193 return(-1);
194 cur = cur->children;
195
196 /*
197 * Search the head
198 */
199 while (cur != NULL) {
200 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000201 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
202 break;
203 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
204 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000205 }
206 cur = cur->next;
207 }
208 if (cur == NULL)
209 return(-1);
210found_head:
211 if (cur->children == NULL) {
212 if (encoding == NULL)
213 return(0);
214 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
215 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000216 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000218 return(0);
219 }
220 cur = cur->children;
221
222found_meta:
223 if (encoding != NULL) {
224 /*
225 * Create a new Meta element with the right aatributes
226 */
227
228 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
229 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000230 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000232 }
233
234 /*
235 * Search and destroy all the remaining the meta elements carrying
236 * encoding informations
237 */
238 while (cur != NULL) {
239 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000240 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000241 xmlAttrPtr attr = cur->properties;
242 int http;
243 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000244 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000245
246 content = NULL;
247 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000248 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000249 while (attr != NULL) {
250 if ((attr->children != NULL) &&
251 (attr->children->type == XML_TEXT_NODE) &&
252 (attr->children->next == NULL)) {
253#ifndef XML_USE_BUFFER_CONTENT
254 value = attr->children->content;
255#else
256 value = xmlBufferContent(attr->children->content);
257#endif
258 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
259 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
260 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 else
262 {
263 if ((value != NULL) &&
264 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
265 content = value;
266 else
267 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
268 && (!xmlStrcasecmp(value, encoding)))
269 same_charset = 1;
270 }
271 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000272 break;
273 }
274 attr = attr->next;
275 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000276 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000277 meta = cur;
278 cur = cur->next;
279 xmlUnlinkNode(meta);
280 xmlFreeNode(meta);
281 continue;
282 }
283
284 }
285 }
286 cur = cur->next;
287 }
288 return(0);
289}
290
291/************************************************************************
292 * *
293 * Dumping HTML tree content to a simple buffer *
294 * *
295 ************************************************************************/
296
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000297void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
298 xmlNodePtr cur, const char *encoding, int format);
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000301htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000302
303/**
304 * htmlDtdDump:
305 * @buf: the HTML buffer output
306 * @doc: the document
307 *
308 * Dump the HTML document DTD, if any.
309 */
310static void
311htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
312 xmlDtdPtr cur = doc->intSubset;
313
314 if (cur == NULL) {
315 xmlGenericError(xmlGenericErrorContext,
316 "htmlDtdDump : no internal subset\n");
317 return;
318 }
319 xmlBufferWriteChar(buf, "<!DOCTYPE ");
320 xmlBufferWriteCHAR(buf, cur->name);
321 if (cur->ExternalID != NULL) {
322 xmlBufferWriteChar(buf, " PUBLIC ");
323 xmlBufferWriteQuotedString(buf, cur->ExternalID);
324 if (cur->SystemID != NULL) {
325 xmlBufferWriteChar(buf, " ");
326 xmlBufferWriteQuotedString(buf, cur->SystemID);
327 }
328 } else if (cur->SystemID != NULL) {
329 xmlBufferWriteChar(buf, " SYSTEM ");
330 xmlBufferWriteQuotedString(buf, cur->SystemID);
331 }
332 xmlBufferWriteChar(buf, ">\n");
333}
334
335/**
336 * htmlAttrDump:
337 * @buf: the HTML buffer output
338 * @doc: the document
339 * @cur: the attribute pointer
340 *
341 * Dump an HTML attribute
342 */
343static void
344htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
345 xmlChar *value;
346
Daniel Veillardeca60d02001-06-13 07:45:41 +0000347 /*
348 * TODO: The html output method should not escape a & character
349 * occurring in an attribute value immediately followed by
350 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
351 */
352
Owen Taylor3473f882001-02-23 17:55:21 +0000353 if (cur == NULL) {
354 xmlGenericError(xmlGenericErrorContext,
355 "htmlAttrDump : property == NULL\n");
356 return;
357 }
358 xmlBufferWriteChar(buf, " ");
359 xmlBufferWriteCHAR(buf, cur->name);
360 if (cur->children != NULL) {
361 value = xmlNodeListGetString(doc, cur->children, 0);
362 if (value) {
363 xmlBufferWriteChar(buf, "=");
364 xmlBufferWriteQuotedString(buf, value);
365 xmlFree(value);
366 } else {
367 xmlBufferWriteChar(buf, "=\"\"");
368 }
369 }
370}
371
372/**
373 * htmlAttrListDump:
374 * @buf: the HTML buffer output
375 * @doc: the document
376 * @cur: the first attribute pointer
377 *
378 * Dump a list of HTML attributes
379 */
380static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000381htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
382 int i = 0;
383
Owen Taylor3473f882001-02-23 17:55:21 +0000384 if (cur == NULL) {
385 xmlGenericError(xmlGenericErrorContext,
386 "htmlAttrListDump : property == NULL\n");
387 return;
388 }
389 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000390 i++;
391 if ((format) && (i >= 5)) {
392 i = 0;
393 xmlBufferWriteChar(buf, "\n");
394 }
Owen Taylor3473f882001-02-23 17:55:21 +0000395 htmlAttrDump(buf, doc, cur);
396 cur = cur->next;
397 }
398}
399
Daniel Veillard95d845f2001-06-13 13:48:46 +0000400static void
401htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000402
Owen Taylor3473f882001-02-23 17:55:21 +0000403/**
404 * htmlNodeListDump:
405 * @buf: the HTML buffer output
406 * @doc: the document
407 * @cur: the first node
408 *
409 * Dump an HTML node list, recursive behaviour,children are printed too.
410 */
411static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000412htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000413 if (cur == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "htmlNodeListDump : node == NULL\n");
416 return;
417 }
418 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000419 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000420 cur = cur->next;
421 }
422}
423
424/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000425 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000426 * @buf: the HTML buffer output
427 * @doc: the document
428 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000429 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000430 *
431 * Dump an HTML node, recursive behaviour,children are printed too.
432 */
433void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000434htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
435 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000436 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000437
438 if (cur == NULL) {
439 xmlGenericError(xmlGenericErrorContext,
440 "htmlNodeDump : node == NULL\n");
441 return;
442 }
443 /*
444 * Special cases.
445 */
446 if (cur->type == XML_DTD_NODE)
447 return;
448 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000449 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000450 return;
451 }
452 if (cur->type == HTML_TEXT_NODE) {
453 if (cur->content != NULL) {
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000454 if (((cur->name == xmlStringText) ||
455 (cur->name != xmlStringTextNoenc)) &&
456 ((cur->parent == NULL) ||
457 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000458 xmlChar *buffer;
459
460#ifndef XML_USE_BUFFER_CONTENT
461 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
462#else
463 buffer = xmlEncodeEntitiesReentrant(doc,
464 xmlBufferContent(cur->content));
465#endif
466 if (buffer != NULL) {
467 xmlBufferWriteCHAR(buf, buffer);
468 xmlFree(buffer);
469 }
470 } else {
471 xmlBufferWriteCHAR(buf, cur->content);
472 }
473 }
474 return;
475 }
476 if (cur->type == HTML_COMMENT_NODE) {
477 if (cur->content != NULL) {
478 xmlBufferWriteChar(buf, "<!--");
479#ifndef XML_USE_BUFFER_CONTENT
480 xmlBufferWriteCHAR(buf, cur->content);
481#else
482 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
483#endif
484 xmlBufferWriteChar(buf, "-->");
485 }
486 return;
487 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000488 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000489 if (cur->name == NULL)
490 return;
491 xmlBufferWriteChar(buf, "<?");
492 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000493 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000494 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000495#ifndef XML_USE_BUFFER_CONTENT
496 xmlBufferWriteCHAR(buf, cur->content);
497#else
498 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
499#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000500 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000501 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000502 return;
503 }
Owen Taylor3473f882001-02-23 17:55:21 +0000504 if (cur->type == HTML_ENTITY_REF_NODE) {
505 xmlBufferWriteChar(buf, "&");
506 xmlBufferWriteCHAR(buf, cur->name);
507 xmlBufferWriteChar(buf, ";");
508 return;
509 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000510 if (cur->type == HTML_PRESERVE_NODE) {
511 if (cur->content != NULL) {
512#ifndef XML_USE_BUFFER_CONTENT
513 xmlBufferWriteCHAR(buf, cur->content);
514#else
515 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
516#endif
517 }
518 return;
519 }
Owen Taylor3473f882001-02-23 17:55:21 +0000520
521 /*
Daniel Veillard083c2662001-05-08 08:27:14 +0000522 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +0000523 */
524 info = htmlTagLookup(cur->name);
525
526 xmlBufferWriteChar(buf, "<");
527 xmlBufferWriteCHAR(buf, cur->name);
528 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000529 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000530
531 if ((info != NULL) && (info->empty)) {
532 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000533 if ((format) && (info != NULL) && (!info->isinline) &&
534 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000535 if ((cur->next->type != HTML_TEXT_NODE) &&
536 (cur->next->type != HTML_ENTITY_REF_NODE))
537 xmlBufferWriteChar(buf, "\n");
538 }
539 return;
540 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000541 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
542 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000543 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000544 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
545 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000546 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000547 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000548 xmlBufferWriteChar(buf, "></");
549 xmlBufferWriteCHAR(buf, cur->name);
550 xmlBufferWriteChar(buf, ">");
551 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000552 if ((format) && (info != NULL) && (!info->isinline) &&
553 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000554 if ((cur->next->type != HTML_TEXT_NODE) &&
555 (cur->next->type != HTML_ENTITY_REF_NODE))
556 xmlBufferWriteChar(buf, "\n");
557 }
558 return;
559 }
560 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000561 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000562 xmlChar *buffer;
563
564#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000565 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000566#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000567 buffer = xmlEncodeEntitiesReentrant(doc,
568 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000569#endif
570 if (buffer != NULL) {
571 xmlBufferWriteCHAR(buf, buffer);
572 xmlFree(buffer);
573 }
574 }
575 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000576 if ((format) && (info != NULL) && (!info->isinline) &&
577 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000578 (cur->children->type != HTML_ENTITY_REF_NODE) &&
579 (cur->children != cur->last))
580 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000581 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000582 if ((format) && (info != NULL) && (!info->isinline) &&
583 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000584 (cur->last->type != HTML_ENTITY_REF_NODE) &&
585 (cur->children != cur->last))
586 xmlBufferWriteChar(buf, "\n");
587 }
Owen Taylor3473f882001-02-23 17:55:21 +0000588 xmlBufferWriteChar(buf, "</");
589 xmlBufferWriteCHAR(buf, cur->name);
590 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000591 if ((format) && (info != NULL) && (!info->isinline) &&
592 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000593 if ((cur->next->type != HTML_TEXT_NODE) &&
594 (cur->next->type != HTML_ENTITY_REF_NODE))
595 xmlBufferWriteChar(buf, "\n");
596 }
597}
598
599/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000600 * htmlNodeDump:
601 * @buf: the HTML buffer output
602 * @doc: the document
603 * @cur: the current node
604 *
605 * Dump an HTML node, recursive behaviour,children are printed too,
606 * and formatting returns are added.
607 */
608void
609htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
610 htmlNodeDumpFormat(buf, doc, cur, 1);
611}
612
613/**
614 * htmlNodeDumpFileFormat:
615 * @out: the FILE pointer
616 * @doc: the document
617 * @cur: the current node
618 * @encoding: the document encoding
619 * @format: should formatting spaces been added
620 *
621 * Dump an HTML node, recursive behaviour,children are printed too.
622 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000623 * TODO: if encoding == NULL try to save in the doc encoding
624 *
625 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000626 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000627int
628htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
629 xmlNodePtr cur, const char *encoding, int format) {
630 xmlOutputBufferPtr buf;
631 xmlCharEncodingHandlerPtr handler = NULL;
632 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000633
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000634 if (encoding != NULL) {
635 xmlCharEncoding enc;
636
637 enc = xmlParseCharEncoding(encoding);
638 if (enc != XML_CHAR_ENCODING_UTF8) {
639 handler = xmlFindCharEncodingHandler(encoding);
640 if (handler == NULL)
641 return(-1);
642 }
643 }
644
645 /*
646 * Fallback to HTML or ASCII when the encoding is unspecified
647 */
648 if (handler == NULL)
649 handler = xmlFindCharEncodingHandler("HTML");
650 if (handler == NULL)
651 handler = xmlFindCharEncodingHandler("ascii");
652
653 /*
654 * save the content to a temp buffer.
655 */
656 buf = xmlOutputBufferCreateFile(out, handler);
657 if (buf == NULL) return(0);
658
659 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
660
661 ret = xmlOutputBufferClose(buf);
662 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000663}
664
665/**
Owen Taylor3473f882001-02-23 17:55:21 +0000666 * htmlNodeDumpFile:
667 * @out: the FILE pointer
668 * @doc: the document
669 * @cur: the current node
670 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000671 * Dump an HTML node, recursive behaviour,children are printed too,
672 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000673 */
674void
675htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000676 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000677}
678
679/**
680 * htmlDocContentDump:
681 * @buf: the HTML buffer output
682 * @cur: the document
683 *
684 * Dump an HTML document.
685 */
686static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000687htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000688 int type;
689
690 /*
691 * force to output the stuff as HTML, especially for entities
692 */
693 type = cur->type;
694 cur->type = XML_HTML_DOCUMENT_NODE;
695 if (cur->intSubset != NULL)
696 htmlDtdDump(buf, cur);
697 else {
698 /* Default to HTML-4.0 transitionnal @@@@ */
699 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
700
701 }
702 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000703 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000704 }
705 xmlBufferWriteChar(buf, "\n");
706 cur->type = (xmlElementType) type;
707}
708
709/**
710 * htmlDocDumpMemory:
711 * @cur: the document
712 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000713 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000714 *
715 * Dump an HTML document in memory and return the xmlChar * and it's size.
716 * It's up to the caller to free the memory.
717 */
718void
719htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000720 xmlOutputBufferPtr buf;
721 xmlCharEncodingHandlerPtr handler = NULL;
722 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000723
724 if (cur == NULL) {
725#ifdef DEBUG_TREE
726 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000727 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000728#endif
729 *mem = NULL;
730 *size = 0;
731 return;
732 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000733
734 encoding = (const char *) htmlGetMetaEncoding(cur);
735
736 if (encoding != NULL) {
737 xmlCharEncoding enc;
738
739 enc = xmlParseCharEncoding(encoding);
740 if (enc != cur->charset) {
741 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
742 /*
743 * Not supported yet
744 */
745 *mem = NULL;
746 *size = 0;
747 return;
748 }
749
750 handler = xmlFindCharEncodingHandler(encoding);
751 if (handler == NULL) {
752 *mem = NULL;
753 *size = 0;
754 return;
755 }
756 }
757 }
758
759 /*
760 * Fallback to HTML or ASCII when the encoding is unspecified
761 */
762 if (handler == NULL)
763 handler = xmlFindCharEncodingHandler("HTML");
764 if (handler == NULL)
765 handler = xmlFindCharEncodingHandler("ascii");
766
767 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000768 if (buf == NULL) {
769 *mem = NULL;
770 *size = 0;
771 return;
772 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000773
774 htmlDocContentDumpOutput(buf, cur, NULL);
775 xmlOutputBufferFlush(buf);
776 if (buf->conv != NULL) {
777 *size = buf->conv->use;
778 *mem = xmlStrndup(buf->conv->content, *size);
779 } else {
780 *size = buf->buffer->use;
781 *mem = xmlStrndup(buf->buffer->content, *size);
782 }
783 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000784}
785
786
787/************************************************************************
788 * *
789 * Dumping HTML tree content to an I/O output buffer *
790 * *
791 ************************************************************************/
792
Daniel Veillard95d845f2001-06-13 13:48:46 +0000793void
794htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
795 const char *encoding, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000796/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000797 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000798 * @buf: the HTML buffer output
799 * @doc: the document
800 * @encoding: the encoding string
801 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000802 * TODO: check whether encoding is needed
803 *
Owen Taylor3473f882001-02-23 17:55:21 +0000804 * Dump the HTML document DTD, if any.
805 */
806static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000807htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000808 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000809 xmlDtdPtr cur = doc->intSubset;
810
811 if (cur == NULL) {
812 xmlGenericError(xmlGenericErrorContext,
813 "htmlDtdDump : no internal subset\n");
814 return;
815 }
816 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
817 xmlOutputBufferWriteString(buf, (const char *)cur->name);
818 if (cur->ExternalID != NULL) {
819 xmlOutputBufferWriteString(buf, " PUBLIC ");
820 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
821 if (cur->SystemID != NULL) {
822 xmlOutputBufferWriteString(buf, " ");
823 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
824 }
825 } else if (cur->SystemID != NULL) {
826 xmlOutputBufferWriteString(buf, " SYSTEM ");
827 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
828 }
829 xmlOutputBufferWriteString(buf, ">\n");
830}
831
832/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000833 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000834 * @buf: the HTML buffer output
835 * @doc: the document
836 * @cur: the attribute pointer
837 * @encoding: the encoding string
838 *
839 * Dump an HTML attribute
840 */
841static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000842htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000843 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000844 xmlChar *value;
845
Daniel Veillardeca60d02001-06-13 07:45:41 +0000846 /*
847 * TODO: The html output method should not escape a & character
848 * occurring in an attribute value immediately followed by
849 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
850 */
851
Owen Taylor3473f882001-02-23 17:55:21 +0000852 if (cur == NULL) {
853 xmlGenericError(xmlGenericErrorContext,
854 "htmlAttrDump : property == NULL\n");
855 return;
856 }
857 xmlOutputBufferWriteString(buf, " ");
858 xmlOutputBufferWriteString(buf, (const char *)cur->name);
859 if (cur->children != NULL) {
860 value = xmlNodeListGetString(doc, cur->children, 0);
861 if (value) {
862 xmlOutputBufferWriteString(buf, "=");
863 xmlBufferWriteQuotedString(buf->buffer, value);
864 xmlFree(value);
865 } else {
866 xmlOutputBufferWriteString(buf, "=\"\"");
867 }
868 }
869}
870
871/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000872 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000873 * @buf: the HTML buffer output
874 * @doc: the document
875 * @cur: the first attribute pointer
876 * @encoding: the encoding string
877 *
878 * Dump a list of HTML attributes
879 */
880static void
881htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
882 if (cur == NULL) {
883 xmlGenericError(xmlGenericErrorContext,
884 "htmlAttrListDump : property == NULL\n");
885 return;
886 }
887 while (cur != NULL) {
888 htmlAttrDumpOutput(buf, doc, cur, encoding);
889 cur = cur->next;
890 }
891}
892
893
894void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
895 xmlNodePtr cur, const char *encoding);
896
897/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000898 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000899 * @buf: the HTML buffer output
900 * @doc: the document
901 * @cur: the first node
902 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000903 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000904 *
905 * Dump an HTML node list, recursive behaviour,children are printed too.
906 */
907static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000908htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
909 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (cur == NULL) {
911 xmlGenericError(xmlGenericErrorContext,
912 "htmlNodeListDump : node == NULL\n");
913 return;
914 }
915 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000916 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000917 cur = cur->next;
918 }
919}
920
921/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000922 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000923 * @buf: the HTML buffer output
924 * @doc: the document
925 * @cur: the current node
926 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000927 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000928 *
929 * Dump an HTML node, recursive behaviour,children are printed too.
930 */
931void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000932htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
933 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000934 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000935
936 if (cur == NULL) {
937 xmlGenericError(xmlGenericErrorContext,
938 "htmlNodeDump : node == NULL\n");
939 return;
940 }
941 /*
942 * Special cases.
943 */
944 if (cur->type == XML_DTD_NODE)
945 return;
946 if (cur->type == XML_HTML_DOCUMENT_NODE) {
947 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
948 return;
949 }
950 if (cur->type == HTML_TEXT_NODE) {
951 if (cur->content != NULL) {
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000952 if (((cur->name == xmlStringText) ||
953 (cur->name != xmlStringTextNoenc)) &&
954 ((cur->parent == NULL) ||
955 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000956 xmlChar *buffer;
957
958#ifndef XML_USE_BUFFER_CONTENT
959 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
960#else
961 buffer = xmlEncodeEntitiesReentrant(doc,
962 xmlBufferContent(cur->content));
963#endif
964 if (buffer != NULL) {
965 xmlOutputBufferWriteString(buf, (const char *)buffer);
966 xmlFree(buffer);
967 }
968 } else {
969 xmlOutputBufferWriteString(buf, (const char *)cur->content);
970 }
971 }
972 return;
973 }
974 if (cur->type == HTML_COMMENT_NODE) {
975 if (cur->content != NULL) {
976 xmlOutputBufferWriteString(buf, "<!--");
977#ifndef XML_USE_BUFFER_CONTENT
978 xmlOutputBufferWriteString(buf, (const char *)cur->content);
979#else
980 xmlOutputBufferWriteString(buf, (const char *)
981 xmlBufferContent(cur->content));
982#endif
983 xmlOutputBufferWriteString(buf, "-->");
984 }
985 return;
986 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000987 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000988 if (cur->name == NULL)
989 return;
990 xmlOutputBufferWriteString(buf, "<?");
991 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000992 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000993 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000994#ifndef XML_USE_BUFFER_CONTENT
995 xmlOutputBufferWriteString(buf, (const char *)cur->content);
996#else
997 xmlOutputBufferWriteString(buf, (const char *)
998 xmlBufferContent(cur->content));
999#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +00001000 }
Daniel Veillard5146f202001-04-25 10:29:44 +00001001 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001002 return;
1003 }
Owen Taylor3473f882001-02-23 17:55:21 +00001004 if (cur->type == HTML_ENTITY_REF_NODE) {
1005 xmlOutputBufferWriteString(buf, "&");
1006 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1007 xmlOutputBufferWriteString(buf, ";");
1008 return;
1009 }
1010 if (cur->type == HTML_PRESERVE_NODE) {
1011 if (cur->content != NULL) {
1012#ifndef XML_USE_BUFFER_CONTENT
1013 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1014#else
1015 xmlOutputBufferWriteString(buf, (const char *)
1016 xmlBufferContent(cur->content));
1017#endif
1018 }
1019 return;
1020 }
1021
1022 /*
Daniel Veillard1ed3f882001-04-18 09:45:35 +00001023 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +00001024 */
1025 info = htmlTagLookup(cur->name);
1026
1027 xmlOutputBufferWriteString(buf, "<");
1028 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1029 if (cur->properties != NULL)
1030 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1031
1032 if ((info != NULL) && (info->empty)) {
1033 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001034 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001035 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001036 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1037 (cur->parent != NULL) &&
1038 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001039 xmlOutputBufferWriteString(buf, "\n");
1040 }
1041 return;
1042 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001043 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1044 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001045 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001046 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1047 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001048 xmlOutputBufferWriteString(buf, ">");
1049 } else {
1050 xmlOutputBufferWriteString(buf, "></");
1051 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1052 xmlOutputBufferWriteString(buf, ">");
1053 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001054 if ((format) && (cur->next != NULL) &&
1055 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001056 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001057 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1058 (cur->parent != NULL) &&
1059 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001060 xmlOutputBufferWriteString(buf, "\n");
1061 }
1062 return;
1063 }
1064 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001065 if ((cur->type != XML_ELEMENT_NODE) &&
1066 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001067 /*
1068 * Uses the OutputBuffer property to automatically convert
1069 * invalids to charrefs
1070 */
1071
1072#ifndef XML_USE_BUFFER_CONTENT
1073 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1074#else
1075 xmlOutputBufferWriteString(buf,
1076 (const char *) xmlBufferContent(cur->content));
1077#endif
1078 }
1079 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001080 if ((format) && (info != NULL) && (!info->isinline) &&
1081 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001082 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001083 (cur->children != cur->last) &&
1084 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001085 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001086 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001087 if ((format) && (info != NULL) && (!info->isinline) &&
1088 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001089 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001090 (cur->children != cur->last) &&
1091 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001092 xmlOutputBufferWriteString(buf, "\n");
1093 }
Owen Taylor3473f882001-02-23 17:55:21 +00001094 xmlOutputBufferWriteString(buf, "</");
1095 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1096 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001097 if ((format) && (info != NULL) && (!info->isinline) &&
1098 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001099 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001100 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1101 (cur->parent != NULL) &&
1102 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001103 xmlOutputBufferWriteString(buf, "\n");
1104 }
1105}
1106
1107/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001108 * htmlNodeDumpOutput:
1109 * @buf: the HTML buffer output
1110 * @doc: the document
1111 * @cur: the current node
1112 * @encoding: the encoding string
1113 *
1114 * Dump an HTML node, recursive behaviour,children are printed too,
1115 * and formatting returns/spaces are added.
1116 */
1117void
1118htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1119 xmlNodePtr cur, const char *encoding) {
1120 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1121}
1122
1123/**
1124 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001125 * @buf: the HTML buffer output
1126 * @cur: the document
1127 * @encoding: the encoding string
1128 *
1129 * Dump an HTML document.
1130 */
1131void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001132htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1133 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001134 int type;
1135
1136 /*
1137 * force to output the stuff as HTML, especially for entities
1138 */
1139 type = cur->type;
1140 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001141 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001142 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001143 }
1144 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001145 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001146 }
1147 xmlOutputBufferWriteString(buf, "\n");
1148 cur->type = (xmlElementType) type;
1149}
1150
Daniel Veillard95d845f2001-06-13 13:48:46 +00001151/**
1152 * htmlDocContentDumpOutput:
1153 * @buf: the HTML buffer output
1154 * @cur: the document
1155 * @encoding: the encoding string
1156 *
1157 * Dump an HTML document. Formating return/spaces are added.
1158 */
1159void
1160htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1161 const char *encoding) {
1162 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1163}
1164
Owen Taylor3473f882001-02-23 17:55:21 +00001165/************************************************************************
1166 * *
1167 * Saving functions front-ends *
1168 * *
1169 ************************************************************************/
1170
1171/**
1172 * htmlDocDump:
1173 * @f: the FILE*
1174 * @cur: the document
1175 *
1176 * Dump an HTML document to an open FILE.
1177 *
1178 * returns: the number of byte written or -1 in case of failure.
1179 */
1180int
1181htmlDocDump(FILE *f, xmlDocPtr cur) {
1182 xmlOutputBufferPtr buf;
1183 xmlCharEncodingHandlerPtr handler = NULL;
1184 const char *encoding;
1185 int ret;
1186
1187 if (cur == NULL) {
1188#ifdef DEBUG_TREE
1189 xmlGenericError(xmlGenericErrorContext,
1190 "htmlDocDump : document == NULL\n");
1191#endif
1192 return(-1);
1193 }
1194
1195 encoding = (const char *) htmlGetMetaEncoding(cur);
1196
1197 if (encoding != NULL) {
1198 xmlCharEncoding enc;
1199
1200 enc = xmlParseCharEncoding(encoding);
1201 if (enc != cur->charset) {
1202 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1203 /*
1204 * Not supported yet
1205 */
1206 return(-1);
1207 }
1208
1209 handler = xmlFindCharEncodingHandler(encoding);
1210 if (handler == NULL)
1211 return(-1);
1212 }
1213 }
1214
1215 /*
1216 * Fallback to HTML or ASCII when the encoding is unspecified
1217 */
1218 if (handler == NULL)
1219 handler = xmlFindCharEncodingHandler("HTML");
1220 if (handler == NULL)
1221 handler = xmlFindCharEncodingHandler("ascii");
1222
1223 buf = xmlOutputBufferCreateFile(f, handler);
1224 if (buf == NULL) return(-1);
1225 htmlDocContentDumpOutput(buf, cur, NULL);
1226
1227 ret = xmlOutputBufferClose(buf);
1228 return(ret);
1229}
1230
1231/**
1232 * htmlSaveFile:
1233 * @filename: the filename (or URL)
1234 * @cur: the document
1235 *
1236 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1237 * used.
1238 * returns: the number of byte written or -1 in case of failure.
1239 */
1240int
1241htmlSaveFile(const char *filename, xmlDocPtr cur) {
1242 xmlOutputBufferPtr buf;
1243 xmlCharEncodingHandlerPtr handler = NULL;
1244 const char *encoding;
1245 int ret;
1246
1247 encoding = (const char *) htmlGetMetaEncoding(cur);
1248
1249 if (encoding != NULL) {
1250 xmlCharEncoding enc;
1251
1252 enc = xmlParseCharEncoding(encoding);
1253 if (enc != cur->charset) {
1254 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1255 /*
1256 * Not supported yet
1257 */
1258 return(-1);
1259 }
1260
1261 handler = xmlFindCharEncodingHandler(encoding);
1262 if (handler == NULL)
1263 return(-1);
1264 }
1265 }
1266
1267 /*
1268 * Fallback to HTML or ASCII when the encoding is unspecified
1269 */
1270 if (handler == NULL)
1271 handler = xmlFindCharEncodingHandler("HTML");
1272 if (handler == NULL)
1273 handler = xmlFindCharEncodingHandler("ascii");
1274
1275 /*
1276 * save the content to a temp buffer.
1277 */
1278 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1279 if (buf == NULL) return(0);
1280
1281 htmlDocContentDumpOutput(buf, cur, NULL);
1282
1283 ret = xmlOutputBufferClose(buf);
1284 return(ret);
1285}
1286
1287/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001288 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001289 * @filename: the filename
1290 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001291 * @format: should formatting spaces been added
1292 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001293 *
1294 * Dump an HTML document to a file using a given encoding.
1295 *
1296 * returns: the number of byte written or -1 in case of failure.
1297 */
1298int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001299htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1300 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001301 xmlOutputBufferPtr buf;
1302 xmlCharEncodingHandlerPtr handler = NULL;
1303 int ret;
1304
1305 if (encoding != NULL) {
1306 xmlCharEncoding enc;
1307
1308 enc = xmlParseCharEncoding(encoding);
1309 if (enc != cur->charset) {
1310 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1311 /*
1312 * Not supported yet
1313 */
1314 return(-1);
1315 }
1316
1317 handler = xmlFindCharEncodingHandler(encoding);
1318 if (handler == NULL)
1319 return(-1);
1320 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1321 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001322 } else {
1323 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001324 }
1325
1326 /*
1327 * Fallback to HTML or ASCII when the encoding is unspecified
1328 */
1329 if (handler == NULL)
1330 handler = xmlFindCharEncodingHandler("HTML");
1331 if (handler == NULL)
1332 handler = xmlFindCharEncodingHandler("ascii");
1333
1334 /*
1335 * save the content to a temp buffer.
1336 */
1337 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1338 if (buf == NULL) return(0);
1339
Daniel Veillard95d845f2001-06-13 13:48:46 +00001340 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001341
1342 ret = xmlOutputBufferClose(buf);
1343 return(ret);
1344}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001345
1346/**
1347 * htmlSaveFileEnc:
1348 * @filename: the filename
1349 * @cur: the document
1350 * @encoding: the document encoding
1351 *
1352 * Dump an HTML document to a file using a given encoding
1353 * and formatting returns/spaces are added.
1354 *
1355 * returns: the number of byte written or -1 in case of failure.
1356 */
1357int
1358htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1359 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1360}
1361
Owen Taylor3473f882001-02-23 17:55:21 +00001362#endif /* LIBXML_HTML_ENABLED */