blob: 8c6354a50b1273f46154215c132e223beb7c8fd4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9
10#ifdef WIN32
11#include "win32config.h"
12#else
13#include "config.h"
14#endif
15
16#include <libxml/xmlversion.h>
17#ifdef LIBXML_HTML_ENABLED
18
19#include <stdio.h>
20#include <string.h> /* for memset() only ! */
21
22#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
29#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
34#include <libxml/xmlerror.h>
35#include <libxml/parserInternals.h>
36
37/************************************************************************
38 * *
39 * Getting/Setting encoding meta tags *
40 * *
41 ************************************************************************/
42
43/**
44 * htmlGetMetaEncoding:
45 * @doc: the document
46 *
47 * Encoding definition lookup in the Meta tags
48 *
49 * Returns the current encoding as flagged in the HTML source
50 */
51const xmlChar *
52htmlGetMetaEncoding(htmlDocPtr doc) {
53 htmlNodePtr cur;
54 const xmlChar *content;
55 const xmlChar *encoding;
56
57 if (doc == NULL)
58 return(NULL);
59 cur = doc->children;
60
61 /*
62 * Search the html
63 */
64 while (cur != NULL) {
65 if (cur->name != NULL) {
66 if (xmlStrEqual(cur->name, BAD_CAST"html"))
67 break;
68 if (xmlStrEqual(cur->name, BAD_CAST"head"))
69 goto found_head;
70 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
71 goto found_meta;
72 }
73 cur = cur->next;
74 }
75 if (cur == NULL)
76 return(NULL);
77 cur = cur->children;
78
79 /*
80 * Search the head
81 */
82 while (cur != NULL) {
83 if (cur->name != NULL) {
84 if (xmlStrEqual(cur->name, BAD_CAST"head"))
85 break;
86 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
87 goto found_meta;
88 }
89 cur = cur->next;
90 }
91 if (cur == NULL)
92 return(NULL);
93found_head:
94 cur = cur->children;
95
96 /*
97 * Search the meta elements
98 */
99found_meta:
100 while (cur != NULL) {
101 if (cur->name != NULL) {
102 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
103 xmlAttrPtr attr = cur->properties;
104 int http;
105 const xmlChar *value;
106
107 content = NULL;
108 http = 0;
109 while (attr != NULL) {
110 if ((attr->children != NULL) &&
111 (attr->children->type == XML_TEXT_NODE) &&
112 (attr->children->next == NULL)) {
113#ifndef XML_USE_BUFFER_CONTENT
114 value = attr->children->content;
115#else
116 value = xmlBufferContent(attr->children->content);
117#endif
118 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
119 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
120 http = 1;
121 else if ((value != NULL)
122 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
123 content = value;
124 if ((http != 0) && (content != NULL))
125 goto found_content;
126 }
127 attr = attr->next;
128 }
129 }
130 }
131 cur = cur->next;
132 }
133 return(NULL);
134
135found_content:
136 encoding = xmlStrstr(content, BAD_CAST"charset=");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset=");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
141 if (encoding != NULL) {
142 encoding += 8;
143 } else {
144 encoding = xmlStrstr(content, BAD_CAST"charset =");
145 if (encoding == NULL)
146 encoding = xmlStrstr(content, BAD_CAST"Charset =");
147 if (encoding == NULL)
148 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
149 if (encoding != NULL)
150 encoding += 9;
151 }
152 if (encoding != NULL) {
153 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
154 }
155 return(encoding);
156}
157
158/**
159 * htmlSetMetaEncoding:
160 * @doc: the document
161 * @encoding: the encoding string
162 *
163 * Sets the current encoding in the Meta tags
164 * NOTE: this will not change the document content encoding, just
165 * the META flag associated.
166 *
167 * Returns 0 in case of success and -1 in case of error
168 */
169int
170htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
171 htmlNodePtr cur, meta;
172 const xmlChar *content;
173 char newcontent[100];
174
175
176 if (doc == NULL)
177 return(-1);
178
179 if (encoding != NULL) {
180#ifdef HAVE_SNPRINTF
181 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
182 encoding);
183#else
184 sprintf(newcontent, "text/html; charset=%s", encoding);
185#endif
186 newcontent[sizeof(newcontent) - 1] = 0;
187 }
188
189 cur = doc->children;
190
191 /*
192 * Search the html
193 */
194 while (cur != NULL) {
195 if (cur->name != NULL) {
196 if (xmlStrEqual(cur->name, BAD_CAST"html"))
197 break;
198 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
199 if (encoding == NULL)
200 return(0);
201 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
202 xmlAddPrevSibling(cur, meta);
203 cur = meta;
204 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
205 xmlAddChild(cur, meta);
206 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
207 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
208 return(0);
209 }
210 if (xmlStrEqual(cur->name, BAD_CAST"head"))
211 goto found_head;
212 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
213 goto found_meta;
214 }
215 cur = cur->next;
216 }
217 if (cur == NULL)
218 return(-1);
219 cur = cur->children;
220
221 /*
222 * Search the head
223 */
224 while (cur != NULL) {
225 if (cur->name != NULL) {
226 if (xmlStrEqual(cur->name, BAD_CAST"head"))
227 break;
228 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
229 if (encoding == NULL)
230 return(0);
231 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
232 xmlAddPrevSibling(cur, meta);
233 cur = meta;
234 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
235 xmlAddChild(cur, meta);
236 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
237 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
238 return(0);
239 }
240 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
241 goto found_meta;
242 }
243 cur = cur->next;
244 }
245 if (cur == NULL)
246 return(-1);
247found_head:
248 if (cur->children == NULL) {
249 if (encoding == NULL)
250 return(0);
251 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
252 xmlAddChild(cur, meta);
253 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
254 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
255 return(0);
256 }
257 cur = cur->children;
258
259found_meta:
260 if (encoding != NULL) {
261 /*
262 * Create a new Meta element with the right aatributes
263 */
264
265 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
266 xmlAddPrevSibling(cur, meta);
267 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
268 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
269 }
270
271 /*
272 * Search and destroy all the remaining the meta elements carrying
273 * encoding informations
274 */
275 while (cur != NULL) {
276 if (cur->name != NULL) {
277 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
278 xmlAttrPtr attr = cur->properties;
279 int http;
280 const xmlChar *value;
281
282 content = NULL;
283 http = 0;
284 while (attr != NULL) {
285 if ((attr->children != NULL) &&
286 (attr->children->type == XML_TEXT_NODE) &&
287 (attr->children->next == NULL)) {
288#ifndef XML_USE_BUFFER_CONTENT
289 value = attr->children->content;
290#else
291 value = xmlBufferContent(attr->children->content);
292#endif
293 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
294 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
295 http = 1;
296 else if ((value != NULL)
297 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
298 content = value;
299 if ((http != 0) && (content != NULL))
300 break;
301 }
302 attr = attr->next;
303 }
304 if ((http != 0) && (content != NULL)) {
305 meta = cur;
306 cur = cur->next;
307 xmlUnlinkNode(meta);
308 xmlFreeNode(meta);
309 continue;
310 }
311
312 }
313 }
314 cur = cur->next;
315 }
316 return(0);
317}
318
319/************************************************************************
320 * *
321 * Dumping HTML tree content to a simple buffer *
322 * *
323 ************************************************************************/
324
325static void
326htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
327
328/**
329 * htmlDtdDump:
330 * @buf: the HTML buffer output
331 * @doc: the document
332 *
333 * Dump the HTML document DTD, if any.
334 */
335static void
336htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
337 xmlDtdPtr cur = doc->intSubset;
338
339 if (cur == NULL) {
340 xmlGenericError(xmlGenericErrorContext,
341 "htmlDtdDump : no internal subset\n");
342 return;
343 }
344 xmlBufferWriteChar(buf, "<!DOCTYPE ");
345 xmlBufferWriteCHAR(buf, cur->name);
346 if (cur->ExternalID != NULL) {
347 xmlBufferWriteChar(buf, " PUBLIC ");
348 xmlBufferWriteQuotedString(buf, cur->ExternalID);
349 if (cur->SystemID != NULL) {
350 xmlBufferWriteChar(buf, " ");
351 xmlBufferWriteQuotedString(buf, cur->SystemID);
352 }
353 } else if (cur->SystemID != NULL) {
354 xmlBufferWriteChar(buf, " SYSTEM ");
355 xmlBufferWriteQuotedString(buf, cur->SystemID);
356 }
357 xmlBufferWriteChar(buf, ">\n");
358}
359
360/**
361 * htmlAttrDump:
362 * @buf: the HTML buffer output
363 * @doc: the document
364 * @cur: the attribute pointer
365 *
366 * Dump an HTML attribute
367 */
368static void
369htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
370 xmlChar *value;
371
372 if (cur == NULL) {
373 xmlGenericError(xmlGenericErrorContext,
374 "htmlAttrDump : property == NULL\n");
375 return;
376 }
377 xmlBufferWriteChar(buf, " ");
378 xmlBufferWriteCHAR(buf, cur->name);
379 if (cur->children != NULL) {
380 value = xmlNodeListGetString(doc, cur->children, 0);
381 if (value) {
382 xmlBufferWriteChar(buf, "=");
383 xmlBufferWriteQuotedString(buf, value);
384 xmlFree(value);
385 } else {
386 xmlBufferWriteChar(buf, "=\"\"");
387 }
388 }
389}
390
391/**
392 * htmlAttrListDump:
393 * @buf: the HTML buffer output
394 * @doc: the document
395 * @cur: the first attribute pointer
396 *
397 * Dump a list of HTML attributes
398 */
399static void
400htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
401 if (cur == NULL) {
402 xmlGenericError(xmlGenericErrorContext,
403 "htmlAttrListDump : property == NULL\n");
404 return;
405 }
406 while (cur != NULL) {
407 htmlAttrDump(buf, doc, cur);
408 cur = cur->next;
409 }
410}
411
412
413void
414htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
415/**
416 * htmlNodeListDump:
417 * @buf: the HTML buffer output
418 * @doc: the document
419 * @cur: the first node
420 *
421 * Dump an HTML node list, recursive behaviour,children are printed too.
422 */
423static void
424htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
425 if (cur == NULL) {
426 xmlGenericError(xmlGenericErrorContext,
427 "htmlNodeListDump : node == NULL\n");
428 return;
429 }
430 while (cur != NULL) {
431 htmlNodeDump(buf, doc, cur);
432 cur = cur->next;
433 }
434}
435
436/**
437 * htmlNodeDump:
438 * @buf: the HTML buffer output
439 * @doc: the document
440 * @cur: the current node
441 *
442 * Dump an HTML node, recursive behaviour,children are printed too.
443 */
444void
445htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
446 htmlElemDescPtr info;
447
448 if (cur == NULL) {
449 xmlGenericError(xmlGenericErrorContext,
450 "htmlNodeDump : node == NULL\n");
451 return;
452 }
453 /*
454 * Special cases.
455 */
456 if (cur->type == XML_DTD_NODE)
457 return;
458 if (cur->type == XML_HTML_DOCUMENT_NODE) {
459 htmlDocContentDump(buf, (xmlDocPtr) cur);
460 return;
461 }
462 if (cur->type == HTML_TEXT_NODE) {
463 if (cur->content != NULL) {
464 if ((cur->name == xmlStringText) ||
465 (cur->name != xmlStringTextNoenc)) {
466 xmlChar *buffer;
467
468#ifndef XML_USE_BUFFER_CONTENT
469 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
470#else
471 buffer = xmlEncodeEntitiesReentrant(doc,
472 xmlBufferContent(cur->content));
473#endif
474 if (buffer != NULL) {
475 xmlBufferWriteCHAR(buf, buffer);
476 xmlFree(buffer);
477 }
478 } else {
479 xmlBufferWriteCHAR(buf, cur->content);
480 }
481 }
482 return;
483 }
484 if (cur->type == HTML_COMMENT_NODE) {
485 if (cur->content != NULL) {
486 xmlBufferWriteChar(buf, "<!--");
487#ifndef XML_USE_BUFFER_CONTENT
488 xmlBufferWriteCHAR(buf, cur->content);
489#else
490 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
491#endif
492 xmlBufferWriteChar(buf, "-->");
493 }
494 return;
495 }
496 if (cur->type == HTML_ENTITY_REF_NODE) {
497 xmlBufferWriteChar(buf, "&");
498 xmlBufferWriteCHAR(buf, cur->name);
499 xmlBufferWriteChar(buf, ";");
500 return;
501 }
502
503 /*
504 * Get specific HTmL info for taht node.
505 */
506 info = htmlTagLookup(cur->name);
507
508 xmlBufferWriteChar(buf, "<");
509 xmlBufferWriteCHAR(buf, cur->name);
510 if (cur->properties != NULL)
511 htmlAttrListDump(buf, doc, cur->properties);
512
513 if ((info != NULL) && (info->empty)) {
514 xmlBufferWriteChar(buf, ">");
515 if (cur->next != NULL) {
516 if ((cur->next->type != HTML_TEXT_NODE) &&
517 (cur->next->type != HTML_ENTITY_REF_NODE))
518 xmlBufferWriteChar(buf, "\n");
519 }
520 return;
521 }
522 if ((cur->content == NULL) && (cur->children == NULL)) {
523 if ((info != NULL) && (info->endTag != 0))
524 xmlBufferWriteChar(buf, ">");
525 else {
526 xmlBufferWriteChar(buf, "></");
527 xmlBufferWriteCHAR(buf, cur->name);
528 xmlBufferWriteChar(buf, ">");
529 }
530 if (cur->next != NULL) {
531 if ((cur->next->type != HTML_TEXT_NODE) &&
532 (cur->next->type != HTML_ENTITY_REF_NODE))
533 xmlBufferWriteChar(buf, "\n");
534 }
535 return;
536 }
537 xmlBufferWriteChar(buf, ">");
538 if (cur->content != NULL) {
539 xmlChar *buffer;
540
541#ifndef XML_USE_BUFFER_CONTENT
542 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
543#else
544 buffer = xmlEncodeEntitiesReentrant(doc,
545 xmlBufferContent(cur->content));
546#endif
547 if (buffer != NULL) {
548 xmlBufferWriteCHAR(buf, buffer);
549 xmlFree(buffer);
550 }
551 }
552 if (cur->children != NULL) {
553 if ((cur->children->type != HTML_TEXT_NODE) &&
554 (cur->children->type != HTML_ENTITY_REF_NODE) &&
555 (cur->children != cur->last))
556 xmlBufferWriteChar(buf, "\n");
557 htmlNodeListDump(buf, doc, cur->children);
558 if ((cur->last->type != HTML_TEXT_NODE) &&
559 (cur->last->type != HTML_ENTITY_REF_NODE) &&
560 (cur->children != cur->last))
561 xmlBufferWriteChar(buf, "\n");
562 }
563 if (!htmlIsAutoClosed(doc, cur)) {
564 xmlBufferWriteChar(buf, "</");
565 xmlBufferWriteCHAR(buf, cur->name);
566 xmlBufferWriteChar(buf, ">");
567 }
568#if 0
569 if (!htmlIsAutoClosed(doc, cur)) {
570 xmlBufferWriteChar(buf, "</");
571 xmlBufferWriteCHAR(buf, cur->name);
572 xmlBufferWriteChar(buf, ">");
573 }
574#else
575 xmlBufferWriteChar(buf, "</");
576 xmlBufferWriteCHAR(buf, cur->name);
577 xmlBufferWriteChar(buf, ">");
578#endif
579 if (cur->next != NULL) {
580 if ((cur->next->type != HTML_TEXT_NODE) &&
581 (cur->next->type != HTML_ENTITY_REF_NODE))
582 xmlBufferWriteChar(buf, "\n");
583 }
584}
585
586/**
587 * htmlNodeDumpFile:
588 * @out: the FILE pointer
589 * @doc: the document
590 * @cur: the current node
591 *
592 * Dump an HTML node, recursive behaviour,children are printed too.
593 */
594void
595htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
596 xmlBufferPtr buf;
597
598 buf = xmlBufferCreate();
599 if (buf == NULL) return;
600 htmlNodeDump(buf, doc, cur);
601 xmlBufferDump(out, buf);
602 xmlBufferFree(buf);
603}
604
605/**
606 * htmlDocContentDump:
607 * @buf: the HTML buffer output
608 * @cur: the document
609 *
610 * Dump an HTML document.
611 */
612static void
613htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
614 int type;
615
616 /*
617 * force to output the stuff as HTML, especially for entities
618 */
619 type = cur->type;
620 cur->type = XML_HTML_DOCUMENT_NODE;
621 if (cur->intSubset != NULL)
622 htmlDtdDump(buf, cur);
623 else {
624 /* Default to HTML-4.0 transitionnal @@@@ */
625 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
626
627 }
628 if (cur->children != NULL) {
629 htmlNodeListDump(buf, cur, cur->children);
630 }
631 xmlBufferWriteChar(buf, "\n");
632 cur->type = (xmlElementType) type;
633}
634
635/**
636 * htmlDocDumpMemory:
637 * @cur: the document
638 * @mem: OUT: the memory pointer
639 * @size: OUT: the memory lenght
640 *
641 * Dump an HTML document in memory and return the xmlChar * and it's size.
642 * It's up to the caller to free the memory.
643 */
644void
645htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
646 xmlBufferPtr buf;
647
648 if (cur == NULL) {
649#ifdef DEBUG_TREE
650 xmlGenericError(xmlGenericErrorContext,
651 "htmlxmlDocDumpMemory : document == NULL\n");
652#endif
653 *mem = NULL;
654 *size = 0;
655 return;
656 }
657 buf = xmlBufferCreate();
658 if (buf == NULL) {
659 *mem = NULL;
660 *size = 0;
661 return;
662 }
663 htmlDocContentDump(buf, cur);
664 *mem = buf->content;
665 *size = buf->use;
666 memset(buf, -1, sizeof(xmlBuffer));
667 xmlFree(buf);
668}
669
670
671/************************************************************************
672 * *
673 * Dumping HTML tree content to an I/O output buffer *
674 * *
675 ************************************************************************/
676
677/**
678 * htmlDtdDump:
679 * @buf: the HTML buffer output
680 * @doc: the document
681 * @encoding: the encoding string
682 *
683 * Dump the HTML document DTD, if any.
684 */
685static void
686htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
687 xmlDtdPtr cur = doc->intSubset;
688
689 if (cur == NULL) {
690 xmlGenericError(xmlGenericErrorContext,
691 "htmlDtdDump : no internal subset\n");
692 return;
693 }
694 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
695 xmlOutputBufferWriteString(buf, (const char *)cur->name);
696 if (cur->ExternalID != NULL) {
697 xmlOutputBufferWriteString(buf, " PUBLIC ");
698 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
699 if (cur->SystemID != NULL) {
700 xmlOutputBufferWriteString(buf, " ");
701 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
702 }
703 } else if (cur->SystemID != NULL) {
704 xmlOutputBufferWriteString(buf, " SYSTEM ");
705 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
706 }
707 xmlOutputBufferWriteString(buf, ">\n");
708}
709
710/**
711 * htmlAttrDump:
712 * @buf: the HTML buffer output
713 * @doc: the document
714 * @cur: the attribute pointer
715 * @encoding: the encoding string
716 *
717 * Dump an HTML attribute
718 */
719static void
720htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
721 xmlChar *value;
722
723 if (cur == NULL) {
724 xmlGenericError(xmlGenericErrorContext,
725 "htmlAttrDump : property == NULL\n");
726 return;
727 }
728 xmlOutputBufferWriteString(buf, " ");
729 xmlOutputBufferWriteString(buf, (const char *)cur->name);
730 if (cur->children != NULL) {
731 value = xmlNodeListGetString(doc, cur->children, 0);
732 if (value) {
733 xmlOutputBufferWriteString(buf, "=");
734 xmlBufferWriteQuotedString(buf->buffer, value);
735 xmlFree(value);
736 } else {
737 xmlOutputBufferWriteString(buf, "=\"\"");
738 }
739 }
740}
741
742/**
743 * htmlAttrListDump:
744 * @buf: the HTML buffer output
745 * @doc: the document
746 * @cur: the first attribute pointer
747 * @encoding: the encoding string
748 *
749 * Dump a list of HTML attributes
750 */
751static void
752htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
753 if (cur == NULL) {
754 xmlGenericError(xmlGenericErrorContext,
755 "htmlAttrListDump : property == NULL\n");
756 return;
757 }
758 while (cur != NULL) {
759 htmlAttrDumpOutput(buf, doc, cur, encoding);
760 cur = cur->next;
761 }
762}
763
764
765void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
766 xmlNodePtr cur, const char *encoding);
767
768/**
769 * htmlNodeListDump:
770 * @buf: the HTML buffer output
771 * @doc: the document
772 * @cur: the first node
773 * @encoding: the encoding string
774 *
775 * Dump an HTML node list, recursive behaviour,children are printed too.
776 */
777static void
778htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
779 if (cur == NULL) {
780 xmlGenericError(xmlGenericErrorContext,
781 "htmlNodeListDump : node == NULL\n");
782 return;
783 }
784 while (cur != NULL) {
785 htmlNodeDumpOutput(buf, doc, cur, encoding);
786 cur = cur->next;
787 }
788}
789
790/**
791 * htmlNodeDumpOutput:
792 * @buf: the HTML buffer output
793 * @doc: the document
794 * @cur: the current node
795 * @encoding: the encoding string
796 *
797 * Dump an HTML node, recursive behaviour,children are printed too.
798 */
799void
800htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
801 htmlElemDescPtr info;
802
803 if (cur == NULL) {
804 xmlGenericError(xmlGenericErrorContext,
805 "htmlNodeDump : node == NULL\n");
806 return;
807 }
808 /*
809 * Special cases.
810 */
811 if (cur->type == XML_DTD_NODE)
812 return;
813 if (cur->type == XML_HTML_DOCUMENT_NODE) {
814 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
815 return;
816 }
817 if (cur->type == HTML_TEXT_NODE) {
818 if (cur->content != NULL) {
819 if ((cur->name == xmlStringText) ||
820 (cur->name != xmlStringTextNoenc)) {
821 xmlChar *buffer;
822
823#ifndef XML_USE_BUFFER_CONTENT
824 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
825#else
826 buffer = xmlEncodeEntitiesReentrant(doc,
827 xmlBufferContent(cur->content));
828#endif
829 if (buffer != NULL) {
830 xmlOutputBufferWriteString(buf, (const char *)buffer);
831 xmlFree(buffer);
832 }
833 } else {
834 xmlOutputBufferWriteString(buf, (const char *)cur->content);
835 }
836 }
837 return;
838 }
839 if (cur->type == HTML_COMMENT_NODE) {
840 if (cur->content != NULL) {
841 xmlOutputBufferWriteString(buf, "<!--");
842#ifndef XML_USE_BUFFER_CONTENT
843 xmlOutputBufferWriteString(buf, (const char *)cur->content);
844#else
845 xmlOutputBufferWriteString(buf, (const char *)
846 xmlBufferContent(cur->content));
847#endif
848 xmlOutputBufferWriteString(buf, "-->");
849 }
850 return;
851 }
852 if (cur->type == HTML_ENTITY_REF_NODE) {
853 xmlOutputBufferWriteString(buf, "&");
854 xmlOutputBufferWriteString(buf, (const char *)cur->name);
855 xmlOutputBufferWriteString(buf, ";");
856 return;
857 }
858 if (cur->type == HTML_PRESERVE_NODE) {
859 if (cur->content != NULL) {
860#ifndef XML_USE_BUFFER_CONTENT
861 xmlOutputBufferWriteString(buf, (const char *)cur->content);
862#else
863 xmlOutputBufferWriteString(buf, (const char *)
864 xmlBufferContent(cur->content));
865#endif
866 }
867 return;
868 }
869
870 /*
871 * Get specific HTmL info for taht node.
872 */
873 info = htmlTagLookup(cur->name);
874
875 xmlOutputBufferWriteString(buf, "<");
876 xmlOutputBufferWriteString(buf, (const char *)cur->name);
877 if (cur->properties != NULL)
878 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
879
880 if ((info != NULL) && (info->empty)) {
881 xmlOutputBufferWriteString(buf, ">");
882 if (cur->next != NULL) {
883 if ((cur->next->type != HTML_TEXT_NODE) &&
884 (cur->next->type != HTML_ENTITY_REF_NODE))
885 xmlOutputBufferWriteString(buf, "\n");
886 }
887 return;
888 }
889 if ((cur->content == NULL) && (cur->children == NULL)) {
890 if ((info != NULL) && (info->saveEndTag != 0) &&
891 (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
892 xmlOutputBufferWriteString(buf, ">");
893 } else {
894 xmlOutputBufferWriteString(buf, "></");
895 xmlOutputBufferWriteString(buf, (const char *)cur->name);
896 xmlOutputBufferWriteString(buf, ">");
897 }
898 if (cur->next != NULL) {
899 if ((cur->next->type != HTML_TEXT_NODE) &&
900 (cur->next->type != HTML_ENTITY_REF_NODE))
901 xmlOutputBufferWriteString(buf, "\n");
902 }
903 return;
904 }
905 xmlOutputBufferWriteString(buf, ">");
906 if (cur->content != NULL) {
907 /*
908 * Uses the OutputBuffer property to automatically convert
909 * invalids to charrefs
910 */
911
912#ifndef XML_USE_BUFFER_CONTENT
913 xmlOutputBufferWriteString(buf, (const char *) cur->content);
914#else
915 xmlOutputBufferWriteString(buf,
916 (const char *) xmlBufferContent(cur->content));
917#endif
918 }
919 if (cur->children != NULL) {
920 if ((cur->children->type != HTML_TEXT_NODE) &&
921 (cur->children->type != HTML_ENTITY_REF_NODE) &&
922 (cur->children != cur->last))
923 xmlOutputBufferWriteString(buf, "\n");
924 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
925 if ((cur->last->type != HTML_TEXT_NODE) &&
926 (cur->last->type != HTML_ENTITY_REF_NODE) &&
927 (cur->children != cur->last))
928 xmlOutputBufferWriteString(buf, "\n");
929 }
930#if 0
931 if (!htmlIsAutoClosed(doc, cur)) {
932 xmlOutputBufferWriteString(buf, "</");
933 xmlOutputBufferWriteString(buf, (const char *)cur->name);
934 xmlOutputBufferWriteString(buf, ">");
935 }
936#else
937 xmlOutputBufferWriteString(buf, "</");
938 xmlOutputBufferWriteString(buf, (const char *)cur->name);
939 xmlOutputBufferWriteString(buf, ">");
940#endif
941 if (cur->next != NULL) {
942 if ((cur->next->type != HTML_TEXT_NODE) &&
943 (cur->next->type != HTML_ENTITY_REF_NODE))
944 xmlOutputBufferWriteString(buf, "\n");
945 }
946}
947
948/**
949 * htmlDocContentDump:
950 * @buf: the HTML buffer output
951 * @cur: the document
952 * @encoding: the encoding string
953 *
954 * Dump an HTML document.
955 */
956void
957htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
958 int type;
959
960 /*
961 * force to output the stuff as HTML, especially for entities
962 */
963 type = cur->type;
964 cur->type = XML_HTML_DOCUMENT_NODE;
965 if (cur->intSubset != NULL)
966 htmlDtdDumpOutput(buf, cur, NULL);
967 else {
968 /* Default to HTML-4.0 transitionnal @@@@ */
969 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
970
971 }
972 if (cur->children != NULL) {
973 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
974 }
975 xmlOutputBufferWriteString(buf, "\n");
976 cur->type = (xmlElementType) type;
977}
978
979
980/************************************************************************
981 * *
982 * Saving functions front-ends *
983 * *
984 ************************************************************************/
985
986/**
987 * htmlDocDump:
988 * @f: the FILE*
989 * @cur: the document
990 *
991 * Dump an HTML document to an open FILE.
992 *
993 * returns: the number of byte written or -1 in case of failure.
994 */
995int
996htmlDocDump(FILE *f, xmlDocPtr cur) {
997 xmlOutputBufferPtr buf;
998 xmlCharEncodingHandlerPtr handler = NULL;
999 const char *encoding;
1000 int ret;
1001
1002 if (cur == NULL) {
1003#ifdef DEBUG_TREE
1004 xmlGenericError(xmlGenericErrorContext,
1005 "htmlDocDump : document == NULL\n");
1006#endif
1007 return(-1);
1008 }
1009
1010 encoding = (const char *) htmlGetMetaEncoding(cur);
1011
1012 if (encoding != NULL) {
1013 xmlCharEncoding enc;
1014
1015 enc = xmlParseCharEncoding(encoding);
1016 if (enc != cur->charset) {
1017 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1018 /*
1019 * Not supported yet
1020 */
1021 return(-1);
1022 }
1023
1024 handler = xmlFindCharEncodingHandler(encoding);
1025 if (handler == NULL)
1026 return(-1);
1027 }
1028 }
1029
1030 /*
1031 * Fallback to HTML or ASCII when the encoding is unspecified
1032 */
1033 if (handler == NULL)
1034 handler = xmlFindCharEncodingHandler("HTML");
1035 if (handler == NULL)
1036 handler = xmlFindCharEncodingHandler("ascii");
1037
1038 buf = xmlOutputBufferCreateFile(f, handler);
1039 if (buf == NULL) return(-1);
1040 htmlDocContentDumpOutput(buf, cur, NULL);
1041
1042 ret = xmlOutputBufferClose(buf);
1043 return(ret);
1044}
1045
1046/**
1047 * htmlSaveFile:
1048 * @filename: the filename (or URL)
1049 * @cur: the document
1050 *
1051 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1052 * used.
1053 * returns: the number of byte written or -1 in case of failure.
1054 */
1055int
1056htmlSaveFile(const char *filename, xmlDocPtr cur) {
1057 xmlOutputBufferPtr buf;
1058 xmlCharEncodingHandlerPtr handler = NULL;
1059 const char *encoding;
1060 int ret;
1061
1062 encoding = (const char *) htmlGetMetaEncoding(cur);
1063
1064 if (encoding != NULL) {
1065 xmlCharEncoding enc;
1066
1067 enc = xmlParseCharEncoding(encoding);
1068 if (enc != cur->charset) {
1069 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1070 /*
1071 * Not supported yet
1072 */
1073 return(-1);
1074 }
1075
1076 handler = xmlFindCharEncodingHandler(encoding);
1077 if (handler == NULL)
1078 return(-1);
1079 }
1080 }
1081
1082 /*
1083 * Fallback to HTML or ASCII when the encoding is unspecified
1084 */
1085 if (handler == NULL)
1086 handler = xmlFindCharEncodingHandler("HTML");
1087 if (handler == NULL)
1088 handler = xmlFindCharEncodingHandler("ascii");
1089
1090 /*
1091 * save the content to a temp buffer.
1092 */
1093 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1094 if (buf == NULL) return(0);
1095
1096 htmlDocContentDumpOutput(buf, cur, NULL);
1097
1098 ret = xmlOutputBufferClose(buf);
1099 return(ret);
1100}
1101
1102/**
1103 * htmlSaveFileEnc:
1104 * @filename: the filename
1105 * @cur: the document
1106 *
1107 * Dump an HTML document to a file using a given encoding.
1108 *
1109 * returns: the number of byte written or -1 in case of failure.
1110 */
1111int
1112htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1113 xmlOutputBufferPtr buf;
1114 xmlCharEncodingHandlerPtr handler = NULL;
1115 int ret;
1116
1117 if (encoding != NULL) {
1118 xmlCharEncoding enc;
1119
1120 enc = xmlParseCharEncoding(encoding);
1121 if (enc != cur->charset) {
1122 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1123 /*
1124 * Not supported yet
1125 */
1126 return(-1);
1127 }
1128
1129 handler = xmlFindCharEncodingHandler(encoding);
1130 if (handler == NULL)
1131 return(-1);
1132 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1133 }
1134 }
1135
1136 /*
1137 * Fallback to HTML or ASCII when the encoding is unspecified
1138 */
1139 if (handler == NULL)
1140 handler = xmlFindCharEncodingHandler("HTML");
1141 if (handler == NULL)
1142 handler = xmlFindCharEncodingHandler("ascii");
1143
1144 /*
1145 * save the content to a temp buffer.
1146 */
1147 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1148 if (buf == NULL) return(0);
1149
1150 htmlDocContentDumpOutput(buf, cur, encoding);
1151
1152 ret = xmlOutputBufferClose(buf);
1153 return(ret);
1154}
1155#endif /* LIBXML_HTML_ENABLED */