blob: 14c4260b4caa8ebb320f9ae7f0ee350a788dd09b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
27
28/************************************************************************
29 * *
30 * Getting/Setting encoding meta tags *
31 * *
32 ************************************************************************/
33
34/**
35 * htmlGetMetaEncoding:
36 * @doc: the document
37 *
38 * Encoding definition lookup in the Meta tags
39 *
40 * Returns the current encoding as flagged in the HTML source
41 */
42const xmlChar *
43htmlGetMetaEncoding(htmlDocPtr doc) {
44 htmlNodePtr cur;
45 const xmlChar *content;
46 const xmlChar *encoding;
47
48 if (doc == NULL)
49 return(NULL);
50 cur = doc->children;
51
52 /*
53 * Search the html
54 */
55 while (cur != NULL) {
56 if (cur->name != NULL) {
57 if (xmlStrEqual(cur->name, BAD_CAST"html"))
58 break;
59 if (xmlStrEqual(cur->name, BAD_CAST"head"))
60 goto found_head;
61 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
62 goto found_meta;
63 }
64 cur = cur->next;
65 }
66 if (cur == NULL)
67 return(NULL);
68 cur = cur->children;
69
70 /*
71 * Search the head
72 */
73 while (cur != NULL) {
74 if (cur->name != NULL) {
75 if (xmlStrEqual(cur->name, BAD_CAST"head"))
76 break;
77 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
78 goto found_meta;
79 }
80 cur = cur->next;
81 }
82 if (cur == NULL)
83 return(NULL);
84found_head:
85 cur = cur->children;
86
87 /*
88 * Search the meta elements
89 */
90found_meta:
91 while (cur != NULL) {
92 if (cur->name != NULL) {
93 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
94 xmlAttrPtr attr = cur->properties;
95 int http;
96 const xmlChar *value;
97
98 content = NULL;
99 http = 0;
100 while (attr != NULL) {
101 if ((attr->children != NULL) &&
102 (attr->children->type == XML_TEXT_NODE) &&
103 (attr->children->next == NULL)) {
104#ifndef XML_USE_BUFFER_CONTENT
105 value = attr->children->content;
106#else
107 value = xmlBufferContent(attr->children->content);
108#endif
109 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
110 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
111 http = 1;
112 else if ((value != NULL)
113 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
114 content = value;
115 if ((http != 0) && (content != NULL))
116 goto found_content;
117 }
118 attr = attr->next;
119 }
120 }
121 }
122 cur = cur->next;
123 }
124 return(NULL);
125
126found_content:
127 encoding = xmlStrstr(content, BAD_CAST"charset=");
128 if (encoding == NULL)
129 encoding = xmlStrstr(content, BAD_CAST"Charset=");
130 if (encoding == NULL)
131 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
132 if (encoding != NULL) {
133 encoding += 8;
134 } else {
135 encoding = xmlStrstr(content, BAD_CAST"charset =");
136 if (encoding == NULL)
137 encoding = xmlStrstr(content, BAD_CAST"Charset =");
138 if (encoding == NULL)
139 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
140 if (encoding != NULL)
141 encoding += 9;
142 }
143 if (encoding != NULL) {
144 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
145 }
146 return(encoding);
147}
148
149/**
150 * htmlSetMetaEncoding:
151 * @doc: the document
152 * @encoding: the encoding string
153 *
154 * Sets the current encoding in the Meta tags
155 * NOTE: this will not change the document content encoding, just
156 * the META flag associated.
157 *
158 * Returns 0 in case of success and -1 in case of error
159 */
160int
161htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
162 htmlNodePtr cur, meta;
163 const xmlChar *content;
164 char newcontent[100];
165
166
167 if (doc == NULL)
168 return(-1);
169
170 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000171 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
172 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000173 newcontent[sizeof(newcontent) - 1] = 0;
174 }
175
176 cur = doc->children;
177
178 /*
179 * Search the html
180 */
181 while (cur != NULL) {
182 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000183 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
184 break;
185 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
186 goto found_head;
187 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
188 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000189 }
190 cur = cur->next;
191 }
192 if (cur == NULL)
193 return(-1);
194 cur = cur->children;
195
196 /*
197 * Search the head
198 */
199 while (cur != NULL) {
200 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000201 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
202 break;
203 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
204 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000205 }
206 cur = cur->next;
207 }
208 if (cur == NULL)
209 return(-1);
210found_head:
211 if (cur->children == NULL) {
212 if (encoding == NULL)
213 return(0);
214 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
215 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000216 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000217 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000218 return(0);
219 }
220 cur = cur->children;
221
222found_meta:
223 if (encoding != NULL) {
224 /*
225 * Create a new Meta element with the right aatributes
226 */
227
228 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
229 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000230 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000231 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000232 }
233
234 /*
235 * Search and destroy all the remaining the meta elements carrying
236 * encoding informations
237 */
238 while (cur != NULL) {
239 if (cur->name != NULL) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000240 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000241 xmlAttrPtr attr = cur->properties;
242 int http;
243 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000244 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000245
246 content = NULL;
247 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000248 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000249 while (attr != NULL) {
250 if ((attr->children != NULL) &&
251 (attr->children->type == XML_TEXT_NODE) &&
252 (attr->children->next == NULL)) {
253#ifndef XML_USE_BUFFER_CONTENT
254 value = attr->children->content;
255#else
256 value = xmlBufferContent(attr->children->content);
257#endif
258 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
259 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
260 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000261 else
262 {
263 if ((value != NULL) &&
264 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
265 content = value;
266 else
267 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
268 && (!xmlStrcasecmp(value, encoding)))
269 same_charset = 1;
270 }
271 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000272 break;
273 }
274 attr = attr->next;
275 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000276 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000277 meta = cur;
278 cur = cur->next;
279 xmlUnlinkNode(meta);
280 xmlFreeNode(meta);
281 continue;
282 }
283
284 }
285 }
286 cur = cur->next;
287 }
288 return(0);
289}
290
291/************************************************************************
292 * *
293 * Dumping HTML tree content to a simple buffer *
294 * *
295 ************************************************************************/
296
297static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000298htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000299
300/**
301 * htmlDtdDump:
302 * @buf: the HTML buffer output
303 * @doc: the document
304 *
305 * Dump the HTML document DTD, if any.
306 */
307static void
308htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
309 xmlDtdPtr cur = doc->intSubset;
310
311 if (cur == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "htmlDtdDump : no internal subset\n");
314 return;
315 }
316 xmlBufferWriteChar(buf, "<!DOCTYPE ");
317 xmlBufferWriteCHAR(buf, cur->name);
318 if (cur->ExternalID != NULL) {
319 xmlBufferWriteChar(buf, " PUBLIC ");
320 xmlBufferWriteQuotedString(buf, cur->ExternalID);
321 if (cur->SystemID != NULL) {
322 xmlBufferWriteChar(buf, " ");
323 xmlBufferWriteQuotedString(buf, cur->SystemID);
324 }
325 } else if (cur->SystemID != NULL) {
326 xmlBufferWriteChar(buf, " SYSTEM ");
327 xmlBufferWriteQuotedString(buf, cur->SystemID);
328 }
329 xmlBufferWriteChar(buf, ">\n");
330}
331
332/**
333 * htmlAttrDump:
334 * @buf: the HTML buffer output
335 * @doc: the document
336 * @cur: the attribute pointer
337 *
338 * Dump an HTML attribute
339 */
340static void
341htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
342 xmlChar *value;
343
Daniel Veillardeca60d02001-06-13 07:45:41 +0000344 /*
345 * TODO: The html output method should not escape a & character
346 * occurring in an attribute value immediately followed by
347 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
348 */
349
Owen Taylor3473f882001-02-23 17:55:21 +0000350 if (cur == NULL) {
351 xmlGenericError(xmlGenericErrorContext,
352 "htmlAttrDump : property == NULL\n");
353 return;
354 }
355 xmlBufferWriteChar(buf, " ");
356 xmlBufferWriteCHAR(buf, cur->name);
357 if (cur->children != NULL) {
358 value = xmlNodeListGetString(doc, cur->children, 0);
359 if (value) {
360 xmlBufferWriteChar(buf, "=");
361 xmlBufferWriteQuotedString(buf, value);
362 xmlFree(value);
363 } else {
364 xmlBufferWriteChar(buf, "=\"\"");
365 }
366 }
367}
368
369/**
370 * htmlAttrListDump:
371 * @buf: the HTML buffer output
372 * @doc: the document
373 * @cur: the first attribute pointer
374 *
375 * Dump a list of HTML attributes
376 */
377static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000378htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
379 int i = 0;
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381 if (cur == NULL) {
382 xmlGenericError(xmlGenericErrorContext,
383 "htmlAttrListDump : property == NULL\n");
384 return;
385 }
386 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000387 i++;
388 if ((format) && (i >= 5)) {
389 i = 0;
390 xmlBufferWriteChar(buf, "\n");
391 }
Owen Taylor3473f882001-02-23 17:55:21 +0000392 htmlAttrDump(buf, doc, cur);
393 cur = cur->next;
394 }
395}
396
Daniel Veillard95d845f2001-06-13 13:48:46 +0000397static void
398htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000399
Owen Taylor3473f882001-02-23 17:55:21 +0000400/**
401 * htmlNodeListDump:
402 * @buf: the HTML buffer output
403 * @doc: the document
404 * @cur: the first node
405 *
406 * Dump an HTML node list, recursive behaviour,children are printed too.
407 */
408static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000409htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000410 if (cur == NULL) {
411 xmlGenericError(xmlGenericErrorContext,
412 "htmlNodeListDump : node == NULL\n");
413 return;
414 }
415 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000416 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000417 cur = cur->next;
418 }
419}
420
421/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000422 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000423 * @buf: the HTML buffer output
424 * @doc: the document
425 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000426 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000427 *
428 * Dump an HTML node, recursive behaviour,children are printed too.
429 */
430void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000431htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
432 int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000433 htmlElemDescPtr info;
434
435 if (cur == NULL) {
436 xmlGenericError(xmlGenericErrorContext,
437 "htmlNodeDump : node == NULL\n");
438 return;
439 }
440 /*
441 * Special cases.
442 */
443 if (cur->type == XML_DTD_NODE)
444 return;
445 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000446 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000447 return;
448 }
449 if (cur->type == HTML_TEXT_NODE) {
450 if (cur->content != NULL) {
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000451 if (((cur->name == xmlStringText) ||
452 (cur->name != xmlStringTextNoenc)) &&
453 ((cur->parent == NULL) ||
454 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000455 xmlChar *buffer;
456
457#ifndef XML_USE_BUFFER_CONTENT
458 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
459#else
460 buffer = xmlEncodeEntitiesReentrant(doc,
461 xmlBufferContent(cur->content));
462#endif
463 if (buffer != NULL) {
464 xmlBufferWriteCHAR(buf, buffer);
465 xmlFree(buffer);
466 }
467 } else {
468 xmlBufferWriteCHAR(buf, cur->content);
469 }
470 }
471 return;
472 }
473 if (cur->type == HTML_COMMENT_NODE) {
474 if (cur->content != NULL) {
475 xmlBufferWriteChar(buf, "<!--");
476#ifndef XML_USE_BUFFER_CONTENT
477 xmlBufferWriteCHAR(buf, cur->content);
478#else
479 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
480#endif
481 xmlBufferWriteChar(buf, "-->");
482 }
483 return;
484 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000485 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000486 if (cur->name == NULL)
487 return;
488 xmlBufferWriteChar(buf, "<?");
489 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000490 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000491 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000492#ifndef XML_USE_BUFFER_CONTENT
493 xmlBufferWriteCHAR(buf, cur->content);
494#else
495 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
496#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000497 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000498 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000499 return;
500 }
Owen Taylor3473f882001-02-23 17:55:21 +0000501 if (cur->type == HTML_ENTITY_REF_NODE) {
502 xmlBufferWriteChar(buf, "&");
503 xmlBufferWriteCHAR(buf, cur->name);
504 xmlBufferWriteChar(buf, ";");
505 return;
506 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000507 if (cur->type == HTML_PRESERVE_NODE) {
508 if (cur->content != NULL) {
509#ifndef XML_USE_BUFFER_CONTENT
510 xmlBufferWriteCHAR(buf, cur->content);
511#else
512 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
513#endif
514 }
515 return;
516 }
Owen Taylor3473f882001-02-23 17:55:21 +0000517
518 /*
Daniel Veillard083c2662001-05-08 08:27:14 +0000519 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +0000520 */
521 info = htmlTagLookup(cur->name);
522
523 xmlBufferWriteChar(buf, "<");
524 xmlBufferWriteCHAR(buf, cur->name);
525 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000526 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000527
528 if ((info != NULL) && (info->empty)) {
529 xmlBufferWriteChar(buf, ">");
530 if (cur->next != NULL) {
531 if ((cur->next->type != HTML_TEXT_NODE) &&
532 (cur->next->type != HTML_ENTITY_REF_NODE))
533 xmlBufferWriteChar(buf, "\n");
534 }
535 return;
536 }
537 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000538 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000539 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
540 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000541 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000542 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000543 xmlBufferWriteChar(buf, "></");
544 xmlBufferWriteCHAR(buf, cur->name);
545 xmlBufferWriteChar(buf, ">");
546 }
547 if (cur->next != NULL) {
548 if ((cur->next->type != HTML_TEXT_NODE) &&
549 (cur->next->type != HTML_ENTITY_REF_NODE))
550 xmlBufferWriteChar(buf, "\n");
551 }
552 return;
553 }
554 xmlBufferWriteChar(buf, ">");
555 if (cur->content != NULL) {
556 xmlChar *buffer;
557
558#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000559 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000560#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000561 buffer = xmlEncodeEntitiesReentrant(doc,
562 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000563#endif
564 if (buffer != NULL) {
565 xmlBufferWriteCHAR(buf, buffer);
566 xmlFree(buffer);
567 }
568 }
569 if (cur->children != NULL) {
570 if ((cur->children->type != HTML_TEXT_NODE) &&
571 (cur->children->type != HTML_ENTITY_REF_NODE) &&
572 (cur->children != cur->last))
573 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000574 htmlNodeListDump(buf, doc, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000575 if ((cur->last->type != HTML_TEXT_NODE) &&
576 (cur->last->type != HTML_ENTITY_REF_NODE) &&
577 (cur->children != cur->last))
578 xmlBufferWriteChar(buf, "\n");
579 }
Owen Taylor3473f882001-02-23 17:55:21 +0000580 xmlBufferWriteChar(buf, "</");
581 xmlBufferWriteCHAR(buf, cur->name);
582 xmlBufferWriteChar(buf, ">");
Owen Taylor3473f882001-02-23 17:55:21 +0000583 if (cur->next != NULL) {
584 if ((cur->next->type != HTML_TEXT_NODE) &&
585 (cur->next->type != HTML_ENTITY_REF_NODE))
586 xmlBufferWriteChar(buf, "\n");
587 }
588}
589
590/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000591 * htmlNodeDump:
592 * @buf: the HTML buffer output
593 * @doc: the document
594 * @cur: the current node
595 *
596 * Dump an HTML node, recursive behaviour,children are printed too,
597 * and formatting returns are added.
598 */
599void
600htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
601 htmlNodeDumpFormat(buf, doc, cur, 1);
602}
603
604/**
605 * htmlNodeDumpFileFormat:
606 * @out: the FILE pointer
607 * @doc: the document
608 * @cur: the current node
609 * @encoding: the document encoding
610 * @format: should formatting spaces been added
611 *
612 * Dump an HTML node, recursive behaviour,children are printed too.
613 *
614 * TODO: handle the encoding not used yet
615 */
616void
617htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, xmlNodePtr cur,
618 const char *encoding ATTRIBUTE_UNUSED, int format) {
619 xmlBufferPtr buf;
620
621 buf = xmlBufferCreate();
622 if (buf == NULL) return;
623 htmlNodeDumpFormat(buf, doc, cur, format);
624 xmlBufferDump(out, buf);
625 xmlBufferFree(buf);
626}
627
628/**
Owen Taylor3473f882001-02-23 17:55:21 +0000629 * htmlNodeDumpFile:
630 * @out: the FILE pointer
631 * @doc: the document
632 * @cur: the current node
633 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000634 * Dump an HTML node, recursive behaviour,children are printed too,
635 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000636 */
637void
638htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000639 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000640}
641
642/**
643 * htmlDocContentDump:
644 * @buf: the HTML buffer output
645 * @cur: the document
646 *
647 * Dump an HTML document.
648 */
649static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000650htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000651 int type;
652
653 /*
654 * force to output the stuff as HTML, especially for entities
655 */
656 type = cur->type;
657 cur->type = XML_HTML_DOCUMENT_NODE;
658 if (cur->intSubset != NULL)
659 htmlDtdDump(buf, cur);
660 else {
661 /* Default to HTML-4.0 transitionnal @@@@ */
662 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
663
664 }
665 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000666 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000667 }
668 xmlBufferWriteChar(buf, "\n");
669 cur->type = (xmlElementType) type;
670}
671
672/**
673 * htmlDocDumpMemory:
674 * @cur: the document
675 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000676 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000677 *
678 * Dump an HTML document in memory and return the xmlChar * and it's size.
679 * It's up to the caller to free the memory.
680 */
681void
682htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000683 xmlOutputBufferPtr buf;
684 xmlCharEncodingHandlerPtr handler = NULL;
685 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000686
687 if (cur == NULL) {
688#ifdef DEBUG_TREE
689 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000690 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000691#endif
692 *mem = NULL;
693 *size = 0;
694 return;
695 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000696
697 encoding = (const char *) htmlGetMetaEncoding(cur);
698
699 if (encoding != NULL) {
700 xmlCharEncoding enc;
701
702 enc = xmlParseCharEncoding(encoding);
703 if (enc != cur->charset) {
704 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
705 /*
706 * Not supported yet
707 */
708 *mem = NULL;
709 *size = 0;
710 return;
711 }
712
713 handler = xmlFindCharEncodingHandler(encoding);
714 if (handler == NULL) {
715 *mem = NULL;
716 *size = 0;
717 return;
718 }
719 }
720 }
721
722 /*
723 * Fallback to HTML or ASCII when the encoding is unspecified
724 */
725 if (handler == NULL)
726 handler = xmlFindCharEncodingHandler("HTML");
727 if (handler == NULL)
728 handler = xmlFindCharEncodingHandler("ascii");
729
730 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000731 if (buf == NULL) {
732 *mem = NULL;
733 *size = 0;
734 return;
735 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000736
737 htmlDocContentDumpOutput(buf, cur, NULL);
738 xmlOutputBufferFlush(buf);
739 if (buf->conv != NULL) {
740 *size = buf->conv->use;
741 *mem = xmlStrndup(buf->conv->content, *size);
742 } else {
743 *size = buf->buffer->use;
744 *mem = xmlStrndup(buf->buffer->content, *size);
745 }
746 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000747}
748
749
750/************************************************************************
751 * *
752 * Dumping HTML tree content to an I/O output buffer *
753 * *
754 ************************************************************************/
755
Daniel Veillard95d845f2001-06-13 13:48:46 +0000756void
757htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
758 const char *encoding, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000759/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000760 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000761 * @buf: the HTML buffer output
762 * @doc: the document
763 * @encoding: the encoding string
764 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000765 * TODO: check whether encoding is needed
766 *
Owen Taylor3473f882001-02-23 17:55:21 +0000767 * Dump the HTML document DTD, if any.
768 */
769static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000770htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000771 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000772 xmlDtdPtr cur = doc->intSubset;
773
774 if (cur == NULL) {
775 xmlGenericError(xmlGenericErrorContext,
776 "htmlDtdDump : no internal subset\n");
777 return;
778 }
779 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
780 xmlOutputBufferWriteString(buf, (const char *)cur->name);
781 if (cur->ExternalID != NULL) {
782 xmlOutputBufferWriteString(buf, " PUBLIC ");
783 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
784 if (cur->SystemID != NULL) {
785 xmlOutputBufferWriteString(buf, " ");
786 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
787 }
788 } else if (cur->SystemID != NULL) {
789 xmlOutputBufferWriteString(buf, " SYSTEM ");
790 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
791 }
792 xmlOutputBufferWriteString(buf, ">\n");
793}
794
795/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000796 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000797 * @buf: the HTML buffer output
798 * @doc: the document
799 * @cur: the attribute pointer
800 * @encoding: the encoding string
801 *
802 * Dump an HTML attribute
803 */
804static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000805htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000806 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000807 xmlChar *value;
808
Daniel Veillardeca60d02001-06-13 07:45:41 +0000809 /*
810 * TODO: The html output method should not escape a & character
811 * occurring in an attribute value immediately followed by
812 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
813 */
814
Owen Taylor3473f882001-02-23 17:55:21 +0000815 if (cur == NULL) {
816 xmlGenericError(xmlGenericErrorContext,
817 "htmlAttrDump : property == NULL\n");
818 return;
819 }
820 xmlOutputBufferWriteString(buf, " ");
821 xmlOutputBufferWriteString(buf, (const char *)cur->name);
822 if (cur->children != NULL) {
823 value = xmlNodeListGetString(doc, cur->children, 0);
824 if (value) {
825 xmlOutputBufferWriteString(buf, "=");
826 xmlBufferWriteQuotedString(buf->buffer, value);
827 xmlFree(value);
828 } else {
829 xmlOutputBufferWriteString(buf, "=\"\"");
830 }
831 }
832}
833
834/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000835 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000836 * @buf: the HTML buffer output
837 * @doc: the document
838 * @cur: the first attribute pointer
839 * @encoding: the encoding string
840 *
841 * Dump a list of HTML attributes
842 */
843static void
844htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
845 if (cur == NULL) {
846 xmlGenericError(xmlGenericErrorContext,
847 "htmlAttrListDump : property == NULL\n");
848 return;
849 }
850 while (cur != NULL) {
851 htmlAttrDumpOutput(buf, doc, cur, encoding);
852 cur = cur->next;
853 }
854}
855
856
857void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
858 xmlNodePtr cur, const char *encoding);
859
Daniel Veillard95d845f2001-06-13 13:48:46 +0000860void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
861 xmlNodePtr cur, const char *encoding, int format);
862
Owen Taylor3473f882001-02-23 17:55:21 +0000863/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000864 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000865 * @buf: the HTML buffer output
866 * @doc: the document
867 * @cur: the first node
868 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000869 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000870 *
871 * Dump an HTML node list, recursive behaviour,children are printed too.
872 */
873static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000874htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
875 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000876 if (cur == NULL) {
877 xmlGenericError(xmlGenericErrorContext,
878 "htmlNodeListDump : node == NULL\n");
879 return;
880 }
881 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000882 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000883 cur = cur->next;
884 }
885}
886
887/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000888 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000889 * @buf: the HTML buffer output
890 * @doc: the document
891 * @cur: the current node
892 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000893 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000894 *
895 * Dump an HTML node, recursive behaviour,children are printed too.
896 */
897void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000898htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
899 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000900 htmlElemDescPtr info;
901
902 if (cur == NULL) {
903 xmlGenericError(xmlGenericErrorContext,
904 "htmlNodeDump : node == NULL\n");
905 return;
906 }
907 /*
908 * Special cases.
909 */
910 if (cur->type == XML_DTD_NODE)
911 return;
912 if (cur->type == XML_HTML_DOCUMENT_NODE) {
913 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
914 return;
915 }
916 if (cur->type == HTML_TEXT_NODE) {
917 if (cur->content != NULL) {
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000918 if (((cur->name == xmlStringText) ||
919 (cur->name != xmlStringTextNoenc)) &&
920 ((cur->parent == NULL) ||
921 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000922 xmlChar *buffer;
923
924#ifndef XML_USE_BUFFER_CONTENT
925 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
926#else
927 buffer = xmlEncodeEntitiesReentrant(doc,
928 xmlBufferContent(cur->content));
929#endif
930 if (buffer != NULL) {
931 xmlOutputBufferWriteString(buf, (const char *)buffer);
932 xmlFree(buffer);
933 }
934 } else {
935 xmlOutputBufferWriteString(buf, (const char *)cur->content);
936 }
937 }
938 return;
939 }
940 if (cur->type == HTML_COMMENT_NODE) {
941 if (cur->content != NULL) {
942 xmlOutputBufferWriteString(buf, "<!--");
943#ifndef XML_USE_BUFFER_CONTENT
944 xmlOutputBufferWriteString(buf, (const char *)cur->content);
945#else
946 xmlOutputBufferWriteString(buf, (const char *)
947 xmlBufferContent(cur->content));
948#endif
949 xmlOutputBufferWriteString(buf, "-->");
950 }
951 return;
952 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000953 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000954 if (cur->name == NULL)
955 return;
956 xmlOutputBufferWriteString(buf, "<?");
957 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000958 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000959 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000960#ifndef XML_USE_BUFFER_CONTENT
961 xmlOutputBufferWriteString(buf, (const char *)cur->content);
962#else
963 xmlOutputBufferWriteString(buf, (const char *)
964 xmlBufferContent(cur->content));
965#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000966 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000967 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000968 return;
969 }
Owen Taylor3473f882001-02-23 17:55:21 +0000970 if (cur->type == HTML_ENTITY_REF_NODE) {
971 xmlOutputBufferWriteString(buf, "&");
972 xmlOutputBufferWriteString(buf, (const char *)cur->name);
973 xmlOutputBufferWriteString(buf, ";");
974 return;
975 }
976 if (cur->type == HTML_PRESERVE_NODE) {
977 if (cur->content != NULL) {
978#ifndef XML_USE_BUFFER_CONTENT
979 xmlOutputBufferWriteString(buf, (const char *)cur->content);
980#else
981 xmlOutputBufferWriteString(buf, (const char *)
982 xmlBufferContent(cur->content));
983#endif
984 }
985 return;
986 }
987
988 /*
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000989 * Get specific HTML info for taht node.
Owen Taylor3473f882001-02-23 17:55:21 +0000990 */
991 info = htmlTagLookup(cur->name);
992
993 xmlOutputBufferWriteString(buf, "<");
994 xmlOutputBufferWriteString(buf, (const char *)cur->name);
995 if (cur->properties != NULL)
996 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
997
998 if ((info != NULL) && (info->empty)) {
999 xmlOutputBufferWriteString(buf, ">");
1000 if (cur->next != NULL) {
1001 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001002 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1003 (cur->parent != NULL) &&
1004 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001005 xmlOutputBufferWriteString(buf, "\n");
1006 }
1007 return;
1008 }
1009 if ((cur->content == NULL) && (cur->children == NULL)) {
1010 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001011 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1012 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001013 xmlOutputBufferWriteString(buf, ">");
1014 } else {
1015 xmlOutputBufferWriteString(buf, "></");
1016 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1017 xmlOutputBufferWriteString(buf, ">");
1018 }
1019 if (cur->next != NULL) {
1020 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001021 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1022 (cur->parent != NULL) &&
1023 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001024 xmlOutputBufferWriteString(buf, "\n");
1025 }
1026 return;
1027 }
1028 xmlOutputBufferWriteString(buf, ">");
1029 if (cur->content != NULL) {
1030 /*
1031 * Uses the OutputBuffer property to automatically convert
1032 * invalids to charrefs
1033 */
1034
1035#ifndef XML_USE_BUFFER_CONTENT
1036 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1037#else
1038 xmlOutputBufferWriteString(buf,
1039 (const char *) xmlBufferContent(cur->content));
1040#endif
1041 }
1042 if (cur->children != NULL) {
1043 if ((cur->children->type != HTML_TEXT_NODE) &&
1044 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001045 (cur->children != cur->last) &&
1046 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001047 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001048 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001049 if ((cur->last->type != HTML_TEXT_NODE) &&
1050 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001051 (cur->children != cur->last) &&
1052 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001053 xmlOutputBufferWriteString(buf, "\n");
1054 }
Owen Taylor3473f882001-02-23 17:55:21 +00001055 xmlOutputBufferWriteString(buf, "</");
1056 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1057 xmlOutputBufferWriteString(buf, ">");
Owen Taylor3473f882001-02-23 17:55:21 +00001058 if (cur->next != NULL) {
1059 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001060 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1061 (cur->parent != NULL) &&
1062 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001063 xmlOutputBufferWriteString(buf, "\n");
1064 }
1065}
1066
1067/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001068 * htmlNodeDumpOutput:
1069 * @buf: the HTML buffer output
1070 * @doc: the document
1071 * @cur: the current node
1072 * @encoding: the encoding string
1073 *
1074 * Dump an HTML node, recursive behaviour,children are printed too,
1075 * and formatting returns/spaces are added.
1076 */
1077void
1078htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1079 xmlNodePtr cur, const char *encoding) {
1080 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1081}
1082
1083/**
1084 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001085 * @buf: the HTML buffer output
1086 * @cur: the document
1087 * @encoding: the encoding string
1088 *
1089 * Dump an HTML document.
1090 */
1091void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001092htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1093 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001094 int type;
1095
1096 /*
1097 * force to output the stuff as HTML, especially for entities
1098 */
1099 type = cur->type;
1100 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001101 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001102 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001103 }
1104 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001105 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001106 }
1107 xmlOutputBufferWriteString(buf, "\n");
1108 cur->type = (xmlElementType) type;
1109}
1110
Daniel Veillard95d845f2001-06-13 13:48:46 +00001111/**
1112 * htmlDocContentDumpOutput:
1113 * @buf: the HTML buffer output
1114 * @cur: the document
1115 * @encoding: the encoding string
1116 *
1117 * Dump an HTML document. Formating return/spaces are added.
1118 */
1119void
1120htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1121 const char *encoding) {
1122 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1123}
1124
Owen Taylor3473f882001-02-23 17:55:21 +00001125/************************************************************************
1126 * *
1127 * Saving functions front-ends *
1128 * *
1129 ************************************************************************/
1130
1131/**
1132 * htmlDocDump:
1133 * @f: the FILE*
1134 * @cur: the document
1135 *
1136 * Dump an HTML document to an open FILE.
1137 *
1138 * returns: the number of byte written or -1 in case of failure.
1139 */
1140int
1141htmlDocDump(FILE *f, xmlDocPtr cur) {
1142 xmlOutputBufferPtr buf;
1143 xmlCharEncodingHandlerPtr handler = NULL;
1144 const char *encoding;
1145 int ret;
1146
1147 if (cur == NULL) {
1148#ifdef DEBUG_TREE
1149 xmlGenericError(xmlGenericErrorContext,
1150 "htmlDocDump : document == NULL\n");
1151#endif
1152 return(-1);
1153 }
1154
1155 encoding = (const char *) htmlGetMetaEncoding(cur);
1156
1157 if (encoding != NULL) {
1158 xmlCharEncoding enc;
1159
1160 enc = xmlParseCharEncoding(encoding);
1161 if (enc != cur->charset) {
1162 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1163 /*
1164 * Not supported yet
1165 */
1166 return(-1);
1167 }
1168
1169 handler = xmlFindCharEncodingHandler(encoding);
1170 if (handler == NULL)
1171 return(-1);
1172 }
1173 }
1174
1175 /*
1176 * Fallback to HTML or ASCII when the encoding is unspecified
1177 */
1178 if (handler == NULL)
1179 handler = xmlFindCharEncodingHandler("HTML");
1180 if (handler == NULL)
1181 handler = xmlFindCharEncodingHandler("ascii");
1182
1183 buf = xmlOutputBufferCreateFile(f, handler);
1184 if (buf == NULL) return(-1);
1185 htmlDocContentDumpOutput(buf, cur, NULL);
1186
1187 ret = xmlOutputBufferClose(buf);
1188 return(ret);
1189}
1190
1191/**
1192 * htmlSaveFile:
1193 * @filename: the filename (or URL)
1194 * @cur: the document
1195 *
1196 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1197 * used.
1198 * returns: the number of byte written or -1 in case of failure.
1199 */
1200int
1201htmlSaveFile(const char *filename, xmlDocPtr cur) {
1202 xmlOutputBufferPtr buf;
1203 xmlCharEncodingHandlerPtr handler = NULL;
1204 const char *encoding;
1205 int ret;
1206
1207 encoding = (const char *) htmlGetMetaEncoding(cur);
1208
1209 if (encoding != NULL) {
1210 xmlCharEncoding enc;
1211
1212 enc = xmlParseCharEncoding(encoding);
1213 if (enc != cur->charset) {
1214 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1215 /*
1216 * Not supported yet
1217 */
1218 return(-1);
1219 }
1220
1221 handler = xmlFindCharEncodingHandler(encoding);
1222 if (handler == NULL)
1223 return(-1);
1224 }
1225 }
1226
1227 /*
1228 * Fallback to HTML or ASCII when the encoding is unspecified
1229 */
1230 if (handler == NULL)
1231 handler = xmlFindCharEncodingHandler("HTML");
1232 if (handler == NULL)
1233 handler = xmlFindCharEncodingHandler("ascii");
1234
1235 /*
1236 * save the content to a temp buffer.
1237 */
1238 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1239 if (buf == NULL) return(0);
1240
1241 htmlDocContentDumpOutput(buf, cur, NULL);
1242
1243 ret = xmlOutputBufferClose(buf);
1244 return(ret);
1245}
1246
1247/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001248 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001249 * @filename: the filename
1250 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001251 * @format: should formatting spaces been added
1252 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001253 *
1254 * Dump an HTML document to a file using a given encoding.
1255 *
1256 * returns: the number of byte written or -1 in case of failure.
1257 */
1258int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001259htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1260 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001261 xmlOutputBufferPtr buf;
1262 xmlCharEncodingHandlerPtr handler = NULL;
1263 int ret;
1264
1265 if (encoding != NULL) {
1266 xmlCharEncoding enc;
1267
1268 enc = xmlParseCharEncoding(encoding);
1269 if (enc != cur->charset) {
1270 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1271 /*
1272 * Not supported yet
1273 */
1274 return(-1);
1275 }
1276
1277 handler = xmlFindCharEncodingHandler(encoding);
1278 if (handler == NULL)
1279 return(-1);
1280 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1281 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001282 } else {
1283 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001284 }
1285
1286 /*
1287 * Fallback to HTML or ASCII when the encoding is unspecified
1288 */
1289 if (handler == NULL)
1290 handler = xmlFindCharEncodingHandler("HTML");
1291 if (handler == NULL)
1292 handler = xmlFindCharEncodingHandler("ascii");
1293
1294 /*
1295 * save the content to a temp buffer.
1296 */
1297 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1298 if (buf == NULL) return(0);
1299
Daniel Veillard95d845f2001-06-13 13:48:46 +00001300 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001301
1302 ret = xmlOutputBufferClose(buf);
1303 return(ret);
1304}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001305
1306/**
1307 * htmlSaveFileEnc:
1308 * @filename: the filename
1309 * @cur: the document
1310 * @encoding: the document encoding
1311 *
1312 * Dump an HTML document to a file using a given encoding
1313 * and formatting returns/spaces are added.
1314 *
1315 * returns: the number of byte written or -1 in case of failure.
1316 */
1317int
1318htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1319 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1320}
1321
Owen Taylor3473f882001-02-23 17:55:21 +00001322#endif /* LIBXML_HTML_ENABLED */