blob: ae0374c8128e5d3585b5575d72001b6e5e3630d1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
27
28/************************************************************************
29 * *
30 * Getting/Setting encoding meta tags *
31 * *
32 ************************************************************************/
33
34/**
35 * htmlGetMetaEncoding:
36 * @doc: the document
37 *
38 * Encoding definition lookup in the Meta tags
39 *
40 * Returns the current encoding as flagged in the HTML source
41 */
42const xmlChar *
43htmlGetMetaEncoding(htmlDocPtr doc) {
44 htmlNodePtr cur;
45 const xmlChar *content;
46 const xmlChar *encoding;
47
48 if (doc == NULL)
49 return(NULL);
50 cur = doc->children;
51
52 /*
53 * Search the html
54 */
55 while (cur != NULL) {
56 if (cur->name != NULL) {
57 if (xmlStrEqual(cur->name, BAD_CAST"html"))
58 break;
59 if (xmlStrEqual(cur->name, BAD_CAST"head"))
60 goto found_head;
61 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
62 goto found_meta;
63 }
64 cur = cur->next;
65 }
66 if (cur == NULL)
67 return(NULL);
68 cur = cur->children;
69
70 /*
71 * Search the head
72 */
73 while (cur != NULL) {
74 if (cur->name != NULL) {
75 if (xmlStrEqual(cur->name, BAD_CAST"head"))
76 break;
77 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
78 goto found_meta;
79 }
80 cur = cur->next;
81 }
82 if (cur == NULL)
83 return(NULL);
84found_head:
85 cur = cur->children;
86
87 /*
88 * Search the meta elements
89 */
90found_meta:
91 while (cur != NULL) {
92 if (cur->name != NULL) {
93 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
94 xmlAttrPtr attr = cur->properties;
95 int http;
96 const xmlChar *value;
97
98 content = NULL;
99 http = 0;
100 while (attr != NULL) {
101 if ((attr->children != NULL) &&
102 (attr->children->type == XML_TEXT_NODE) &&
103 (attr->children->next == NULL)) {
104#ifndef XML_USE_BUFFER_CONTENT
105 value = attr->children->content;
106#else
107 value = xmlBufferContent(attr->children->content);
108#endif
109 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
110 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
111 http = 1;
112 else if ((value != NULL)
113 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
114 content = value;
115 if ((http != 0) && (content != NULL))
116 goto found_content;
117 }
118 attr = attr->next;
119 }
120 }
121 }
122 cur = cur->next;
123 }
124 return(NULL);
125
126found_content:
127 encoding = xmlStrstr(content, BAD_CAST"charset=");
128 if (encoding == NULL)
129 encoding = xmlStrstr(content, BAD_CAST"Charset=");
130 if (encoding == NULL)
131 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
132 if (encoding != NULL) {
133 encoding += 8;
134 } else {
135 encoding = xmlStrstr(content, BAD_CAST"charset =");
136 if (encoding == NULL)
137 encoding = xmlStrstr(content, BAD_CAST"Charset =");
138 if (encoding == NULL)
139 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
140 if (encoding != NULL)
141 encoding += 9;
142 }
143 if (encoding != NULL) {
144 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
145 }
146 return(encoding);
147}
148
149/**
150 * htmlSetMetaEncoding:
151 * @doc: the document
152 * @encoding: the encoding string
153 *
154 * Sets the current encoding in the Meta tags
155 * NOTE: this will not change the document content encoding, just
156 * the META flag associated.
157 *
158 * Returns 0 in case of success and -1 in case of error
159 */
160int
161htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
162 htmlNodePtr cur, meta;
163 const xmlChar *content;
164 char newcontent[100];
165
166
167 if (doc == NULL)
168 return(-1);
169
170 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000171 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
172 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000173 newcontent[sizeof(newcontent) - 1] = 0;
174 }
175
176 cur = doc->children;
177
178 /*
179 * Search the html
180 */
181 while (cur != NULL) {
182 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000183 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
184 break;
185 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
186 goto found_head;
187 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
188 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000189 }
190 cur = cur->next;
191 }
192 if (cur == NULL)
193 return(-1);
194 cur = cur->children;
195
196 /*
197 * Search the head
198 */
199 while (cur != NULL) {
200 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000201 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
202 break;
203 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
204 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000205 }
206 cur = cur->next;
207 }
208 if (cur == NULL)
209 return(-1);
210found_head:
211 if (cur->children == NULL) {
212 if (encoding == NULL)
213 return(0);
214 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
215 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000216 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000218 return(0);
219 }
220 cur = cur->children;
221
222found_meta:
223 if (encoding != NULL) {
224 /*
225 * Create a new Meta element with the right aatributes
226 */
227
228 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
229 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000230 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000232 }
233
234 /*
235 * Search and destroy all the remaining the meta elements carrying
236 * encoding informations
237 */
238 while (cur != NULL) {
239 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000240 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000241 xmlAttrPtr attr = cur->properties;
242 int http;
243 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000244 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000245
246 content = NULL;
247 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000248 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000249 while (attr != NULL) {
250 if ((attr->children != NULL) &&
251 (attr->children->type == XML_TEXT_NODE) &&
252 (attr->children->next == NULL)) {
253#ifndef XML_USE_BUFFER_CONTENT
254 value = attr->children->content;
255#else
256 value = xmlBufferContent(attr->children->content);
257#endif
258 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
259 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
260 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 else
262 {
263 if ((value != NULL) &&
264 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
265 content = value;
266 else
267 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
268 && (!xmlStrcasecmp(value, encoding)))
269 same_charset = 1;
270 }
271 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000272 break;
273 }
274 attr = attr->next;
275 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000276 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000277 meta = cur;
278 cur = cur->next;
279 xmlUnlinkNode(meta);
280 xmlFreeNode(meta);
281 continue;
282 }
283
284 }
285 }
286 cur = cur->next;
287 }
288 return(0);
289}
290
291/************************************************************************
292 * *
293 * Dumping HTML tree content to a simple buffer *
294 * *
295 ************************************************************************/
296
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000297void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
298 xmlNodePtr cur, const char *encoding, int format);
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000301htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000302
303/**
304 * htmlDtdDump:
305 * @buf: the HTML buffer output
306 * @doc: the document
307 *
308 * Dump the HTML document DTD, if any.
309 */
310static void
311htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
312 xmlDtdPtr cur = doc->intSubset;
313
314 if (cur == NULL) {
315 xmlGenericError(xmlGenericErrorContext,
316 "htmlDtdDump : no internal subset\n");
317 return;
318 }
319 xmlBufferWriteChar(buf, "<!DOCTYPE ");
320 xmlBufferWriteCHAR(buf, cur->name);
321 if (cur->ExternalID != NULL) {
322 xmlBufferWriteChar(buf, " PUBLIC ");
323 xmlBufferWriteQuotedString(buf, cur->ExternalID);
324 if (cur->SystemID != NULL) {
325 xmlBufferWriteChar(buf, " ");
326 xmlBufferWriteQuotedString(buf, cur->SystemID);
327 }
328 } else if (cur->SystemID != NULL) {
329 xmlBufferWriteChar(buf, " SYSTEM ");
330 xmlBufferWriteQuotedString(buf, cur->SystemID);
331 }
332 xmlBufferWriteChar(buf, ">\n");
333}
334
335/**
336 * htmlAttrDump:
337 * @buf: the HTML buffer output
338 * @doc: the document
339 * @cur: the attribute pointer
340 *
341 * Dump an HTML attribute
342 */
343static void
344htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
345 xmlChar *value;
346
Daniel Veillardeca60d02001-06-13 07:45:41 +0000347 /*
348 * TODO: The html output method should not escape a & character
349 * occurring in an attribute value immediately followed by
350 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
351 */
352
Owen Taylor3473f882001-02-23 17:55:21 +0000353 if (cur == NULL) {
354 xmlGenericError(xmlGenericErrorContext,
355 "htmlAttrDump : property == NULL\n");
356 return;
357 }
358 xmlBufferWriteChar(buf, " ");
359 xmlBufferWriteCHAR(buf, cur->name);
360 if (cur->children != NULL) {
361 value = xmlNodeListGetString(doc, cur->children, 0);
362 if (value) {
363 xmlBufferWriteChar(buf, "=");
364 xmlBufferWriteQuotedString(buf, value);
365 xmlFree(value);
366 } else {
367 xmlBufferWriteChar(buf, "=\"\"");
368 }
369 }
370}
371
372/**
373 * htmlAttrListDump:
374 * @buf: the HTML buffer output
375 * @doc: the document
376 * @cur: the first attribute pointer
377 *
378 * Dump a list of HTML attributes
379 */
380static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000381htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
382 int i = 0;
383
Owen Taylor3473f882001-02-23 17:55:21 +0000384 if (cur == NULL) {
385 xmlGenericError(xmlGenericErrorContext,
386 "htmlAttrListDump : property == NULL\n");
387 return;
388 }
389 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000390 i++;
391 if ((format) && (i >= 5)) {
392 i = 0;
393 xmlBufferWriteChar(buf, "\n");
394 }
Owen Taylor3473f882001-02-23 17:55:21 +0000395 htmlAttrDump(buf, doc, cur);
396 cur = cur->next;
397 }
398}
399
Daniel Veillard95d845f2001-06-13 13:48:46 +0000400static void
401htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000402
Owen Taylor3473f882001-02-23 17:55:21 +0000403/**
404 * htmlNodeListDump:
405 * @buf: the HTML buffer output
406 * @doc: the document
407 * @cur: the first node
408 *
409 * Dump an HTML node list, recursive behaviour,children are printed too.
410 */
411static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000412htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000413 if (cur == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "htmlNodeListDump : node == NULL\n");
416 return;
417 }
418 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000419 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000420 cur = cur->next;
421 }
422}
423
424/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000425 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000426 * @buf: the HTML buffer output
427 * @doc: the document
428 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000429 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000430 *
431 * Dump an HTML node, recursive behaviour,children are printed too.
432 */
433void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000434htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
435 int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000436 htmlElemDescPtr info;
437
438 if (cur == NULL) {
439 xmlGenericError(xmlGenericErrorContext,
440 "htmlNodeDump : node == NULL\n");
441 return;
442 }
443 /*
444 * Special cases.
445 */
446 if (cur->type == XML_DTD_NODE)
447 return;
448 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000449 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000450 return;
451 }
452 if (cur->type == HTML_TEXT_NODE) {
453 if (cur->content != NULL) {
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000454 if (((cur->name == xmlStringText) ||
455 (cur->name != xmlStringTextNoenc)) &&
456 ((cur->parent == NULL) ||
457 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000458 xmlChar *buffer;
459
460#ifndef XML_USE_BUFFER_CONTENT
461 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
462#else
463 buffer = xmlEncodeEntitiesReentrant(doc,
464 xmlBufferContent(cur->content));
465#endif
466 if (buffer != NULL) {
467 xmlBufferWriteCHAR(buf, buffer);
468 xmlFree(buffer);
469 }
470 } else {
471 xmlBufferWriteCHAR(buf, cur->content);
472 }
473 }
474 return;
475 }
476 if (cur->type == HTML_COMMENT_NODE) {
477 if (cur->content != NULL) {
478 xmlBufferWriteChar(buf, "<!--");
479#ifndef XML_USE_BUFFER_CONTENT
480 xmlBufferWriteCHAR(buf, cur->content);
481#else
482 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
483#endif
484 xmlBufferWriteChar(buf, "-->");
485 }
486 return;
487 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000488 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000489 if (cur->name == NULL)
490 return;
491 xmlBufferWriteChar(buf, "<?");
492 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000493 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000494 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000495#ifndef XML_USE_BUFFER_CONTENT
496 xmlBufferWriteCHAR(buf, cur->content);
497#else
498 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
499#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000500 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000501 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000502 return;
503 }
Owen Taylor3473f882001-02-23 17:55:21 +0000504 if (cur->type == HTML_ENTITY_REF_NODE) {
505 xmlBufferWriteChar(buf, "&");
506 xmlBufferWriteCHAR(buf, cur->name);
507 xmlBufferWriteChar(buf, ";");
508 return;
509 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000510 if (cur->type == HTML_PRESERVE_NODE) {
511 if (cur->content != NULL) {
512#ifndef XML_USE_BUFFER_CONTENT
513 xmlBufferWriteCHAR(buf, cur->content);
514#else
515 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
516#endif
517 }
518 return;
519 }
Owen Taylor3473f882001-02-23 17:55:21 +0000520
521 /*
Daniel Veillard083c2662001-05-08 08:27:14 +0000522 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +0000523 */
524 info = htmlTagLookup(cur->name);
525
526 xmlBufferWriteChar(buf, "<");
527 xmlBufferWriteCHAR(buf, cur->name);
528 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000529 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000530
531 if ((info != NULL) && (info->empty)) {
532 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000533 if ((format) && (info != NULL) && (!info->isinline) &&
534 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000535 if ((cur->next->type != HTML_TEXT_NODE) &&
536 (cur->next->type != HTML_ENTITY_REF_NODE))
537 xmlBufferWriteChar(buf, "\n");
538 }
539 return;
540 }
541 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000542 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000543 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
544 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000545 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000546 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000547 xmlBufferWriteChar(buf, "></");
548 xmlBufferWriteCHAR(buf, cur->name);
549 xmlBufferWriteChar(buf, ">");
550 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000551 if ((format) && (info != NULL) && (!info->isinline) &&
552 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000553 if ((cur->next->type != HTML_TEXT_NODE) &&
554 (cur->next->type != HTML_ENTITY_REF_NODE))
555 xmlBufferWriteChar(buf, "\n");
556 }
557 return;
558 }
559 xmlBufferWriteChar(buf, ">");
560 if (cur->content != NULL) {
561 xmlChar *buffer;
562
563#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000564 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000565#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000566 buffer = xmlEncodeEntitiesReentrant(doc,
567 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000568#endif
569 if (buffer != NULL) {
570 xmlBufferWriteCHAR(buf, buffer);
571 xmlFree(buffer);
572 }
573 }
574 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000575 if ((format) && (info != NULL) && (!info->isinline) &&
576 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000577 (cur->children->type != HTML_ENTITY_REF_NODE) &&
578 (cur->children != cur->last))
579 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000580 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000581 if ((format) && (info != NULL) && (!info->isinline) &&
582 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000583 (cur->last->type != HTML_ENTITY_REF_NODE) &&
584 (cur->children != cur->last))
585 xmlBufferWriteChar(buf, "\n");
586 }
Owen Taylor3473f882001-02-23 17:55:21 +0000587 xmlBufferWriteChar(buf, "</");
588 xmlBufferWriteCHAR(buf, cur->name);
589 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000590 if ((format) && (info != NULL) && (!info->isinline) &&
591 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000592 if ((cur->next->type != HTML_TEXT_NODE) &&
593 (cur->next->type != HTML_ENTITY_REF_NODE))
594 xmlBufferWriteChar(buf, "\n");
595 }
596}
597
598/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000599 * htmlNodeDump:
600 * @buf: the HTML buffer output
601 * @doc: the document
602 * @cur: the current node
603 *
604 * Dump an HTML node, recursive behaviour,children are printed too,
605 * and formatting returns are added.
606 */
607void
608htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
609 htmlNodeDumpFormat(buf, doc, cur, 1);
610}
611
612/**
613 * htmlNodeDumpFileFormat:
614 * @out: the FILE pointer
615 * @doc: the document
616 * @cur: the current node
617 * @encoding: the document encoding
618 * @format: should formatting spaces been added
619 *
620 * Dump an HTML node, recursive behaviour,children are printed too.
621 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000622 * TODO: if encoding == NULL try to save in the doc encoding
623 *
624 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000625 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000626int
627htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
628 xmlNodePtr cur, const char *encoding, int format) {
629 xmlOutputBufferPtr buf;
630 xmlCharEncodingHandlerPtr handler = NULL;
631 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000632
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000633 if (encoding != NULL) {
634 xmlCharEncoding enc;
635
636 enc = xmlParseCharEncoding(encoding);
637 if (enc != XML_CHAR_ENCODING_UTF8) {
638 handler = xmlFindCharEncodingHandler(encoding);
639 if (handler == NULL)
640 return(-1);
641 }
642 }
643
644 /*
645 * Fallback to HTML or ASCII when the encoding is unspecified
646 */
647 if (handler == NULL)
648 handler = xmlFindCharEncodingHandler("HTML");
649 if (handler == NULL)
650 handler = xmlFindCharEncodingHandler("ascii");
651
652 /*
653 * save the content to a temp buffer.
654 */
655 buf = xmlOutputBufferCreateFile(out, handler);
656 if (buf == NULL) return(0);
657
658 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
659
660 ret = xmlOutputBufferClose(buf);
661 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000662}
663
664/**
Owen Taylor3473f882001-02-23 17:55:21 +0000665 * htmlNodeDumpFile:
666 * @out: the FILE pointer
667 * @doc: the document
668 * @cur: the current node
669 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000670 * Dump an HTML node, recursive behaviour,children are printed too,
671 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000672 */
673void
674htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000675 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000676}
677
678/**
679 * htmlDocContentDump:
680 * @buf: the HTML buffer output
681 * @cur: the document
682 *
683 * Dump an HTML document.
684 */
685static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000686htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000687 int type;
688
689 /*
690 * force to output the stuff as HTML, especially for entities
691 */
692 type = cur->type;
693 cur->type = XML_HTML_DOCUMENT_NODE;
694 if (cur->intSubset != NULL)
695 htmlDtdDump(buf, cur);
696 else {
697 /* Default to HTML-4.0 transitionnal @@@@ */
698 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
699
700 }
701 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000702 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000703 }
704 xmlBufferWriteChar(buf, "\n");
705 cur->type = (xmlElementType) type;
706}
707
708/**
709 * htmlDocDumpMemory:
710 * @cur: the document
711 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000712 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000713 *
714 * Dump an HTML document in memory and return the xmlChar * and it's size.
715 * It's up to the caller to free the memory.
716 */
717void
718htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000719 xmlOutputBufferPtr buf;
720 xmlCharEncodingHandlerPtr handler = NULL;
721 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000722
723 if (cur == NULL) {
724#ifdef DEBUG_TREE
725 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000726 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000727#endif
728 *mem = NULL;
729 *size = 0;
730 return;
731 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000732
733 encoding = (const char *) htmlGetMetaEncoding(cur);
734
735 if (encoding != NULL) {
736 xmlCharEncoding enc;
737
738 enc = xmlParseCharEncoding(encoding);
739 if (enc != cur->charset) {
740 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
741 /*
742 * Not supported yet
743 */
744 *mem = NULL;
745 *size = 0;
746 return;
747 }
748
749 handler = xmlFindCharEncodingHandler(encoding);
750 if (handler == NULL) {
751 *mem = NULL;
752 *size = 0;
753 return;
754 }
755 }
756 }
757
758 /*
759 * Fallback to HTML or ASCII when the encoding is unspecified
760 */
761 if (handler == NULL)
762 handler = xmlFindCharEncodingHandler("HTML");
763 if (handler == NULL)
764 handler = xmlFindCharEncodingHandler("ascii");
765
766 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000767 if (buf == NULL) {
768 *mem = NULL;
769 *size = 0;
770 return;
771 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000772
773 htmlDocContentDumpOutput(buf, cur, NULL);
774 xmlOutputBufferFlush(buf);
775 if (buf->conv != NULL) {
776 *size = buf->conv->use;
777 *mem = xmlStrndup(buf->conv->content, *size);
778 } else {
779 *size = buf->buffer->use;
780 *mem = xmlStrndup(buf->buffer->content, *size);
781 }
782 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000783}
784
785
786/************************************************************************
787 * *
788 * Dumping HTML tree content to an I/O output buffer *
789 * *
790 ************************************************************************/
791
Daniel Veillard95d845f2001-06-13 13:48:46 +0000792void
793htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
794 const char *encoding, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000795/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000796 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000797 * @buf: the HTML buffer output
798 * @doc: the document
799 * @encoding: the encoding string
800 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000801 * TODO: check whether encoding is needed
802 *
Owen Taylor3473f882001-02-23 17:55:21 +0000803 * Dump the HTML document DTD, if any.
804 */
805static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000806htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000807 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000808 xmlDtdPtr cur = doc->intSubset;
809
810 if (cur == NULL) {
811 xmlGenericError(xmlGenericErrorContext,
812 "htmlDtdDump : no internal subset\n");
813 return;
814 }
815 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
816 xmlOutputBufferWriteString(buf, (const char *)cur->name);
817 if (cur->ExternalID != NULL) {
818 xmlOutputBufferWriteString(buf, " PUBLIC ");
819 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
820 if (cur->SystemID != NULL) {
821 xmlOutputBufferWriteString(buf, " ");
822 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
823 }
824 } else if (cur->SystemID != NULL) {
825 xmlOutputBufferWriteString(buf, " SYSTEM ");
826 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
827 }
828 xmlOutputBufferWriteString(buf, ">\n");
829}
830
831/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000832 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000833 * @buf: the HTML buffer output
834 * @doc: the document
835 * @cur: the attribute pointer
836 * @encoding: the encoding string
837 *
838 * Dump an HTML attribute
839 */
840static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000841htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000842 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000843 xmlChar *value;
844
Daniel Veillardeca60d02001-06-13 07:45:41 +0000845 /*
846 * TODO: The html output method should not escape a & character
847 * occurring in an attribute value immediately followed by
848 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
849 */
850
Owen Taylor3473f882001-02-23 17:55:21 +0000851 if (cur == NULL) {
852 xmlGenericError(xmlGenericErrorContext,
853 "htmlAttrDump : property == NULL\n");
854 return;
855 }
856 xmlOutputBufferWriteString(buf, " ");
857 xmlOutputBufferWriteString(buf, (const char *)cur->name);
858 if (cur->children != NULL) {
859 value = xmlNodeListGetString(doc, cur->children, 0);
860 if (value) {
861 xmlOutputBufferWriteString(buf, "=");
862 xmlBufferWriteQuotedString(buf->buffer, value);
863 xmlFree(value);
864 } else {
865 xmlOutputBufferWriteString(buf, "=\"\"");
866 }
867 }
868}
869
870/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000871 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000872 * @buf: the HTML buffer output
873 * @doc: the document
874 * @cur: the first attribute pointer
875 * @encoding: the encoding string
876 *
877 * Dump a list of HTML attributes
878 */
879static void
880htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
881 if (cur == NULL) {
882 xmlGenericError(xmlGenericErrorContext,
883 "htmlAttrListDump : property == NULL\n");
884 return;
885 }
886 while (cur != NULL) {
887 htmlAttrDumpOutput(buf, doc, cur, encoding);
888 cur = cur->next;
889 }
890}
891
892
893void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
894 xmlNodePtr cur, const char *encoding);
895
896/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000897 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000898 * @buf: the HTML buffer output
899 * @doc: the document
900 * @cur: the first node
901 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000902 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000903 *
904 * Dump an HTML node list, recursive behaviour,children are printed too.
905 */
906static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000907htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
908 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000909 if (cur == NULL) {
910 xmlGenericError(xmlGenericErrorContext,
911 "htmlNodeListDump : node == NULL\n");
912 return;
913 }
914 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000915 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000916 cur = cur->next;
917 }
918}
919
920/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000921 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000922 * @buf: the HTML buffer output
923 * @doc: the document
924 * @cur: the current node
925 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000926 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000927 *
928 * Dump an HTML node, recursive behaviour,children are printed too.
929 */
930void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000931htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
932 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000933 htmlElemDescPtr info;
934
935 if (cur == NULL) {
936 xmlGenericError(xmlGenericErrorContext,
937 "htmlNodeDump : node == NULL\n");
938 return;
939 }
940 /*
941 * Special cases.
942 */
943 if (cur->type == XML_DTD_NODE)
944 return;
945 if (cur->type == XML_HTML_DOCUMENT_NODE) {
946 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
947 return;
948 }
949 if (cur->type == HTML_TEXT_NODE) {
950 if (cur->content != NULL) {
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000951 if (((cur->name == xmlStringText) ||
952 (cur->name != xmlStringTextNoenc)) &&
953 ((cur->parent == NULL) ||
954 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000955 xmlChar *buffer;
956
957#ifndef XML_USE_BUFFER_CONTENT
958 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
959#else
960 buffer = xmlEncodeEntitiesReentrant(doc,
961 xmlBufferContent(cur->content));
962#endif
963 if (buffer != NULL) {
964 xmlOutputBufferWriteString(buf, (const char *)buffer);
965 xmlFree(buffer);
966 }
967 } else {
968 xmlOutputBufferWriteString(buf, (const char *)cur->content);
969 }
970 }
971 return;
972 }
973 if (cur->type == HTML_COMMENT_NODE) {
974 if (cur->content != NULL) {
975 xmlOutputBufferWriteString(buf, "<!--");
976#ifndef XML_USE_BUFFER_CONTENT
977 xmlOutputBufferWriteString(buf, (const char *)cur->content);
978#else
979 xmlOutputBufferWriteString(buf, (const char *)
980 xmlBufferContent(cur->content));
981#endif
982 xmlOutputBufferWriteString(buf, "-->");
983 }
984 return;
985 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000986 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000987 if (cur->name == NULL)
988 return;
989 xmlOutputBufferWriteString(buf, "<?");
990 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000991 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000992 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000993#ifndef XML_USE_BUFFER_CONTENT
994 xmlOutputBufferWriteString(buf, (const char *)cur->content);
995#else
996 xmlOutputBufferWriteString(buf, (const char *)
997 xmlBufferContent(cur->content));
998#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000999 }
Daniel Veillard5146f202001-04-25 10:29:44 +00001000 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001001 return;
1002 }
Owen Taylor3473f882001-02-23 17:55:21 +00001003 if (cur->type == HTML_ENTITY_REF_NODE) {
1004 xmlOutputBufferWriteString(buf, "&");
1005 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1006 xmlOutputBufferWriteString(buf, ";");
1007 return;
1008 }
1009 if (cur->type == HTML_PRESERVE_NODE) {
1010 if (cur->content != NULL) {
1011#ifndef XML_USE_BUFFER_CONTENT
1012 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1013#else
1014 xmlOutputBufferWriteString(buf, (const char *)
1015 xmlBufferContent(cur->content));
1016#endif
1017 }
1018 return;
1019 }
1020
1021 /*
Daniel Veillard1ed3f882001-04-18 09:45:35 +00001022 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +00001023 */
1024 info = htmlTagLookup(cur->name);
1025
1026 xmlOutputBufferWriteString(buf, "<");
1027 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1028 if (cur->properties != NULL)
1029 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1030
1031 if ((info != NULL) && (info->empty)) {
1032 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001033 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001034 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001035 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1036 (cur->parent != NULL) &&
1037 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001038 xmlOutputBufferWriteString(buf, "\n");
1039 }
1040 return;
1041 }
1042 if ((cur->content == NULL) && (cur->children == NULL)) {
1043 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001044 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1045 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001046 xmlOutputBufferWriteString(buf, ">");
1047 } else {
1048 xmlOutputBufferWriteString(buf, "></");
1049 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1050 xmlOutputBufferWriteString(buf, ">");
1051 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001052 if ((format) && (cur->next != NULL) &&
1053 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001054 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001055 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1056 (cur->parent != NULL) &&
1057 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001058 xmlOutputBufferWriteString(buf, "\n");
1059 }
1060 return;
1061 }
1062 xmlOutputBufferWriteString(buf, ">");
1063 if (cur->content != NULL) {
1064 /*
1065 * Uses the OutputBuffer property to automatically convert
1066 * invalids to charrefs
1067 */
1068
1069#ifndef XML_USE_BUFFER_CONTENT
1070 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1071#else
1072 xmlOutputBufferWriteString(buf,
1073 (const char *) xmlBufferContent(cur->content));
1074#endif
1075 }
1076 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001077 if ((format) && (info != NULL) && (!info->isinline) &&
1078 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001079 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001080 (cur->children != cur->last) &&
1081 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001082 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001083 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001084 if ((format) && (info != NULL) && (!info->isinline) &&
1085 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001086 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001087 (cur->children != cur->last) &&
1088 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001089 xmlOutputBufferWriteString(buf, "\n");
1090 }
Owen Taylor3473f882001-02-23 17:55:21 +00001091 xmlOutputBufferWriteString(buf, "</");
1092 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1093 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001094 if ((format) && (info != NULL) && (!info->isinline) &&
1095 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001096 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001097 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1098 (cur->parent != NULL) &&
1099 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001100 xmlOutputBufferWriteString(buf, "\n");
1101 }
1102}
1103
1104/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001105 * htmlNodeDumpOutput:
1106 * @buf: the HTML buffer output
1107 * @doc: the document
1108 * @cur: the current node
1109 * @encoding: the encoding string
1110 *
1111 * Dump an HTML node, recursive behaviour,children are printed too,
1112 * and formatting returns/spaces are added.
1113 */
1114void
1115htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1116 xmlNodePtr cur, const char *encoding) {
1117 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1118}
1119
1120/**
1121 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001122 * @buf: the HTML buffer output
1123 * @cur: the document
1124 * @encoding: the encoding string
1125 *
1126 * Dump an HTML document.
1127 */
1128void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001129htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1130 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001131 int type;
1132
1133 /*
1134 * force to output the stuff as HTML, especially for entities
1135 */
1136 type = cur->type;
1137 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001138 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001139 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001140 }
1141 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001142 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001143 }
1144 xmlOutputBufferWriteString(buf, "\n");
1145 cur->type = (xmlElementType) type;
1146}
1147
Daniel Veillard95d845f2001-06-13 13:48:46 +00001148/**
1149 * htmlDocContentDumpOutput:
1150 * @buf: the HTML buffer output
1151 * @cur: the document
1152 * @encoding: the encoding string
1153 *
1154 * Dump an HTML document. Formating return/spaces are added.
1155 */
1156void
1157htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1158 const char *encoding) {
1159 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1160}
1161
Owen Taylor3473f882001-02-23 17:55:21 +00001162/************************************************************************
1163 * *
1164 * Saving functions front-ends *
1165 * *
1166 ************************************************************************/
1167
1168/**
1169 * htmlDocDump:
1170 * @f: the FILE*
1171 * @cur: the document
1172 *
1173 * Dump an HTML document to an open FILE.
1174 *
1175 * returns: the number of byte written or -1 in case of failure.
1176 */
1177int
1178htmlDocDump(FILE *f, xmlDocPtr cur) {
1179 xmlOutputBufferPtr buf;
1180 xmlCharEncodingHandlerPtr handler = NULL;
1181 const char *encoding;
1182 int ret;
1183
1184 if (cur == NULL) {
1185#ifdef DEBUG_TREE
1186 xmlGenericError(xmlGenericErrorContext,
1187 "htmlDocDump : document == NULL\n");
1188#endif
1189 return(-1);
1190 }
1191
1192 encoding = (const char *) htmlGetMetaEncoding(cur);
1193
1194 if (encoding != NULL) {
1195 xmlCharEncoding enc;
1196
1197 enc = xmlParseCharEncoding(encoding);
1198 if (enc != cur->charset) {
1199 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1200 /*
1201 * Not supported yet
1202 */
1203 return(-1);
1204 }
1205
1206 handler = xmlFindCharEncodingHandler(encoding);
1207 if (handler == NULL)
1208 return(-1);
1209 }
1210 }
1211
1212 /*
1213 * Fallback to HTML or ASCII when the encoding is unspecified
1214 */
1215 if (handler == NULL)
1216 handler = xmlFindCharEncodingHandler("HTML");
1217 if (handler == NULL)
1218 handler = xmlFindCharEncodingHandler("ascii");
1219
1220 buf = xmlOutputBufferCreateFile(f, handler);
1221 if (buf == NULL) return(-1);
1222 htmlDocContentDumpOutput(buf, cur, NULL);
1223
1224 ret = xmlOutputBufferClose(buf);
1225 return(ret);
1226}
1227
1228/**
1229 * htmlSaveFile:
1230 * @filename: the filename (or URL)
1231 * @cur: the document
1232 *
1233 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1234 * used.
1235 * returns: the number of byte written or -1 in case of failure.
1236 */
1237int
1238htmlSaveFile(const char *filename, xmlDocPtr cur) {
1239 xmlOutputBufferPtr buf;
1240 xmlCharEncodingHandlerPtr handler = NULL;
1241 const char *encoding;
1242 int ret;
1243
1244 encoding = (const char *) htmlGetMetaEncoding(cur);
1245
1246 if (encoding != NULL) {
1247 xmlCharEncoding enc;
1248
1249 enc = xmlParseCharEncoding(encoding);
1250 if (enc != cur->charset) {
1251 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1252 /*
1253 * Not supported yet
1254 */
1255 return(-1);
1256 }
1257
1258 handler = xmlFindCharEncodingHandler(encoding);
1259 if (handler == NULL)
1260 return(-1);
1261 }
1262 }
1263
1264 /*
1265 * Fallback to HTML or ASCII when the encoding is unspecified
1266 */
1267 if (handler == NULL)
1268 handler = xmlFindCharEncodingHandler("HTML");
1269 if (handler == NULL)
1270 handler = xmlFindCharEncodingHandler("ascii");
1271
1272 /*
1273 * save the content to a temp buffer.
1274 */
1275 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1276 if (buf == NULL) return(0);
1277
1278 htmlDocContentDumpOutput(buf, cur, NULL);
1279
1280 ret = xmlOutputBufferClose(buf);
1281 return(ret);
1282}
1283
1284/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001285 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001286 * @filename: the filename
1287 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001288 * @format: should formatting spaces been added
1289 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001290 *
1291 * Dump an HTML document to a file using a given encoding.
1292 *
1293 * returns: the number of byte written or -1 in case of failure.
1294 */
1295int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001296htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1297 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001298 xmlOutputBufferPtr buf;
1299 xmlCharEncodingHandlerPtr handler = NULL;
1300 int ret;
1301
1302 if (encoding != NULL) {
1303 xmlCharEncoding enc;
1304
1305 enc = xmlParseCharEncoding(encoding);
1306 if (enc != cur->charset) {
1307 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1308 /*
1309 * Not supported yet
1310 */
1311 return(-1);
1312 }
1313
1314 handler = xmlFindCharEncodingHandler(encoding);
1315 if (handler == NULL)
1316 return(-1);
1317 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1318 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001319 } else {
1320 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001321 }
1322
1323 /*
1324 * Fallback to HTML or ASCII when the encoding is unspecified
1325 */
1326 if (handler == NULL)
1327 handler = xmlFindCharEncodingHandler("HTML");
1328 if (handler == NULL)
1329 handler = xmlFindCharEncodingHandler("ascii");
1330
1331 /*
1332 * save the content to a temp buffer.
1333 */
1334 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1335 if (buf == NULL) return(0);
1336
Daniel Veillard95d845f2001-06-13 13:48:46 +00001337 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001338
1339 ret = xmlOutputBufferClose(buf);
1340 return(ret);
1341}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001342
1343/**
1344 * htmlSaveFileEnc:
1345 * @filename: the filename
1346 * @cur: the document
1347 * @encoding: the document encoding
1348 *
1349 * Dump an HTML document to a file using a given encoding
1350 * and formatting returns/spaces are added.
1351 *
1352 * returns: the number of byte written or -1 in case of failure.
1353 */
1354int
1355htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1356 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1357}
1358
Owen Taylor3473f882001-02-23 17:55:21 +00001359#endif /* LIBXML_HTML_ENABLED */