blob: a542a579edd3f115f77f14de4e683b85bf35b217 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9
10#ifdef WIN32
11#include "win32config.h"
12#else
13#include "config.h"
14#endif
15
16#include <libxml/xmlversion.h>
17#ifdef LIBXML_HTML_ENABLED
18
19#include <stdio.h>
20#include <string.h> /* for memset() only ! */
21
22#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
29#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
34#include <libxml/xmlerror.h>
35#include <libxml/parserInternals.h>
36
37/************************************************************************
38 * *
Daniel Veillard56a4cb82001-03-24 17:00:36 +000039 * When running GCC in vaacum cleaner mode *
40 * *
41 ************************************************************************/
42
43#ifdef __GNUC__
44#define UNUSED __attribute__((__unused__))
45#else
46#define UNUSED
47#endif
48
49/************************************************************************
50 * *
Owen Taylor3473f882001-02-23 17:55:21 +000051 * Getting/Setting encoding meta tags *
52 * *
53 ************************************************************************/
54
55/**
56 * htmlGetMetaEncoding:
57 * @doc: the document
58 *
59 * Encoding definition lookup in the Meta tags
60 *
61 * Returns the current encoding as flagged in the HTML source
62 */
63const xmlChar *
64htmlGetMetaEncoding(htmlDocPtr doc) {
65 htmlNodePtr cur;
66 const xmlChar *content;
67 const xmlChar *encoding;
68
69 if (doc == NULL)
70 return(NULL);
71 cur = doc->children;
72
73 /*
74 * Search the html
75 */
76 while (cur != NULL) {
77 if (cur->name != NULL) {
78 if (xmlStrEqual(cur->name, BAD_CAST"html"))
79 break;
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 goto found_head;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
84 }
85 cur = cur->next;
86 }
87 if (cur == NULL)
88 return(NULL);
89 cur = cur->children;
90
91 /*
92 * Search the head
93 */
94 while (cur != NULL) {
95 if (cur->name != NULL) {
96 if (xmlStrEqual(cur->name, BAD_CAST"head"))
97 break;
98 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
99 goto found_meta;
100 }
101 cur = cur->next;
102 }
103 if (cur == NULL)
104 return(NULL);
105found_head:
106 cur = cur->children;
107
108 /*
109 * Search the meta elements
110 */
111found_meta:
112 while (cur != NULL) {
113 if (cur->name != NULL) {
114 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
115 xmlAttrPtr attr = cur->properties;
116 int http;
117 const xmlChar *value;
118
119 content = NULL;
120 http = 0;
121 while (attr != NULL) {
122 if ((attr->children != NULL) &&
123 (attr->children->type == XML_TEXT_NODE) &&
124 (attr->children->next == NULL)) {
125#ifndef XML_USE_BUFFER_CONTENT
126 value = attr->children->content;
127#else
128 value = xmlBufferContent(attr->children->content);
129#endif
130 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
131 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
132 http = 1;
133 else if ((value != NULL)
134 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
135 content = value;
136 if ((http != 0) && (content != NULL))
137 goto found_content;
138 }
139 attr = attr->next;
140 }
141 }
142 }
143 cur = cur->next;
144 }
145 return(NULL);
146
147found_content:
148 encoding = xmlStrstr(content, BAD_CAST"charset=");
149 if (encoding == NULL)
150 encoding = xmlStrstr(content, BAD_CAST"Charset=");
151 if (encoding == NULL)
152 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
153 if (encoding != NULL) {
154 encoding += 8;
155 } else {
156 encoding = xmlStrstr(content, BAD_CAST"charset =");
157 if (encoding == NULL)
158 encoding = xmlStrstr(content, BAD_CAST"Charset =");
159 if (encoding == NULL)
160 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
161 if (encoding != NULL)
162 encoding += 9;
163 }
164 if (encoding != NULL) {
165 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
166 }
167 return(encoding);
168}
169
170/**
171 * htmlSetMetaEncoding:
172 * @doc: the document
173 * @encoding: the encoding string
174 *
175 * Sets the current encoding in the Meta tags
176 * NOTE: this will not change the document content encoding, just
177 * the META flag associated.
178 *
179 * Returns 0 in case of success and -1 in case of error
180 */
181int
182htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
183 htmlNodePtr cur, meta;
184 const xmlChar *content;
185 char newcontent[100];
186
187
188 if (doc == NULL)
189 return(-1);
190
191 if (encoding != NULL) {
192#ifdef HAVE_SNPRINTF
193 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
194 encoding);
195#else
196 sprintf(newcontent, "text/html; charset=%s", encoding);
197#endif
198 newcontent[sizeof(newcontent) - 1] = 0;
199 }
200
201 cur = doc->children;
202
203 /*
204 * Search the html
205 */
206 while (cur != NULL) {
207 if (cur->name != NULL) {
208 if (xmlStrEqual(cur->name, BAD_CAST"html"))
209 break;
210 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
211 if (encoding == NULL)
212 return(0);
213 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
214 xmlAddPrevSibling(cur, meta);
215 cur = meta;
216 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
217 xmlAddChild(cur, meta);
218 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
219 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
220 return(0);
221 }
222 if (xmlStrEqual(cur->name, BAD_CAST"head"))
223 goto found_head;
224 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
225 goto found_meta;
226 }
227 cur = cur->next;
228 }
229 if (cur == NULL)
230 return(-1);
231 cur = cur->children;
232
233 /*
234 * Search the head
235 */
236 while (cur != NULL) {
237 if (cur->name != NULL) {
238 if (xmlStrEqual(cur->name, BAD_CAST"head"))
239 break;
240 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
241 if (encoding == NULL)
242 return(0);
243 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
244 xmlAddPrevSibling(cur, meta);
245 cur = meta;
246 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
247 xmlAddChild(cur, meta);
248 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
249 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
250 return(0);
251 }
252 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
253 goto found_meta;
254 }
255 cur = cur->next;
256 }
257 if (cur == NULL)
258 return(-1);
259found_head:
260 if (cur->children == NULL) {
261 if (encoding == NULL)
262 return(0);
263 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
264 xmlAddChild(cur, meta);
265 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
266 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
267 return(0);
268 }
269 cur = cur->children;
270
271found_meta:
272 if (encoding != NULL) {
273 /*
274 * Create a new Meta element with the right aatributes
275 */
276
277 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
278 xmlAddPrevSibling(cur, meta);
279 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
280 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
281 }
282
283 /*
284 * Search and destroy all the remaining the meta elements carrying
285 * encoding informations
286 */
287 while (cur != NULL) {
288 if (cur->name != NULL) {
289 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
290 xmlAttrPtr attr = cur->properties;
291 int http;
292 const xmlChar *value;
293
294 content = NULL;
295 http = 0;
296 while (attr != NULL) {
297 if ((attr->children != NULL) &&
298 (attr->children->type == XML_TEXT_NODE) &&
299 (attr->children->next == NULL)) {
300#ifndef XML_USE_BUFFER_CONTENT
301 value = attr->children->content;
302#else
303 value = xmlBufferContent(attr->children->content);
304#endif
305 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
306 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
307 http = 1;
308 else if ((value != NULL)
309 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
310 content = value;
311 if ((http != 0) && (content != NULL))
312 break;
313 }
314 attr = attr->next;
315 }
316 if ((http != 0) && (content != NULL)) {
317 meta = cur;
318 cur = cur->next;
319 xmlUnlinkNode(meta);
320 xmlFreeNode(meta);
321 continue;
322 }
323
324 }
325 }
326 cur = cur->next;
327 }
328 return(0);
329}
330
331/************************************************************************
332 * *
333 * Dumping HTML tree content to a simple buffer *
334 * *
335 ************************************************************************/
336
337static void
338htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
339
340/**
341 * htmlDtdDump:
342 * @buf: the HTML buffer output
343 * @doc: the document
344 *
345 * Dump the HTML document DTD, if any.
346 */
347static void
348htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
349 xmlDtdPtr cur = doc->intSubset;
350
351 if (cur == NULL) {
352 xmlGenericError(xmlGenericErrorContext,
353 "htmlDtdDump : no internal subset\n");
354 return;
355 }
356 xmlBufferWriteChar(buf, "<!DOCTYPE ");
357 xmlBufferWriteCHAR(buf, cur->name);
358 if (cur->ExternalID != NULL) {
359 xmlBufferWriteChar(buf, " PUBLIC ");
360 xmlBufferWriteQuotedString(buf, cur->ExternalID);
361 if (cur->SystemID != NULL) {
362 xmlBufferWriteChar(buf, " ");
363 xmlBufferWriteQuotedString(buf, cur->SystemID);
364 }
365 } else if (cur->SystemID != NULL) {
366 xmlBufferWriteChar(buf, " SYSTEM ");
367 xmlBufferWriteQuotedString(buf, cur->SystemID);
368 }
369 xmlBufferWriteChar(buf, ">\n");
370}
371
372/**
373 * htmlAttrDump:
374 * @buf: the HTML buffer output
375 * @doc: the document
376 * @cur: the attribute pointer
377 *
378 * Dump an HTML attribute
379 */
380static void
381htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
382 xmlChar *value;
383
384 if (cur == NULL) {
385 xmlGenericError(xmlGenericErrorContext,
386 "htmlAttrDump : property == NULL\n");
387 return;
388 }
389 xmlBufferWriteChar(buf, " ");
390 xmlBufferWriteCHAR(buf, cur->name);
391 if (cur->children != NULL) {
392 value = xmlNodeListGetString(doc, cur->children, 0);
393 if (value) {
394 xmlBufferWriteChar(buf, "=");
395 xmlBufferWriteQuotedString(buf, value);
396 xmlFree(value);
397 } else {
398 xmlBufferWriteChar(buf, "=\"\"");
399 }
400 }
401}
402
403/**
404 * htmlAttrListDump:
405 * @buf: the HTML buffer output
406 * @doc: the document
407 * @cur: the first attribute pointer
408 *
409 * Dump a list of HTML attributes
410 */
411static void
412htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
413 if (cur == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "htmlAttrListDump : property == NULL\n");
416 return;
417 }
418 while (cur != NULL) {
419 htmlAttrDump(buf, doc, cur);
420 cur = cur->next;
421 }
422}
423
424
425void
426htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
427/**
428 * htmlNodeListDump:
429 * @buf: the HTML buffer output
430 * @doc: the document
431 * @cur: the first node
432 *
433 * Dump an HTML node list, recursive behaviour,children are printed too.
434 */
435static void
436htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
437 if (cur == NULL) {
438 xmlGenericError(xmlGenericErrorContext,
439 "htmlNodeListDump : node == NULL\n");
440 return;
441 }
442 while (cur != NULL) {
443 htmlNodeDump(buf, doc, cur);
444 cur = cur->next;
445 }
446}
447
448/**
449 * htmlNodeDump:
450 * @buf: the HTML buffer output
451 * @doc: the document
452 * @cur: the current node
453 *
454 * Dump an HTML node, recursive behaviour,children are printed too.
455 */
456void
457htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
458 htmlElemDescPtr info;
459
460 if (cur == NULL) {
461 xmlGenericError(xmlGenericErrorContext,
462 "htmlNodeDump : node == NULL\n");
463 return;
464 }
465 /*
466 * Special cases.
467 */
468 if (cur->type == XML_DTD_NODE)
469 return;
470 if (cur->type == XML_HTML_DOCUMENT_NODE) {
471 htmlDocContentDump(buf, (xmlDocPtr) cur);
472 return;
473 }
474 if (cur->type == HTML_TEXT_NODE) {
475 if (cur->content != NULL) {
476 if ((cur->name == xmlStringText) ||
477 (cur->name != xmlStringTextNoenc)) {
478 xmlChar *buffer;
479
480#ifndef XML_USE_BUFFER_CONTENT
481 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
482#else
483 buffer = xmlEncodeEntitiesReentrant(doc,
484 xmlBufferContent(cur->content));
485#endif
486 if (buffer != NULL) {
487 xmlBufferWriteCHAR(buf, buffer);
488 xmlFree(buffer);
489 }
490 } else {
491 xmlBufferWriteCHAR(buf, cur->content);
492 }
493 }
494 return;
495 }
496 if (cur->type == HTML_COMMENT_NODE) {
497 if (cur->content != NULL) {
498 xmlBufferWriteChar(buf, "<!--");
499#ifndef XML_USE_BUFFER_CONTENT
500 xmlBufferWriteCHAR(buf, cur->content);
501#else
502 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
503#endif
504 xmlBufferWriteChar(buf, "-->");
505 }
506 return;
507 }
508 if (cur->type == HTML_ENTITY_REF_NODE) {
509 xmlBufferWriteChar(buf, "&");
510 xmlBufferWriteCHAR(buf, cur->name);
511 xmlBufferWriteChar(buf, ";");
512 return;
513 }
514
515 /*
516 * Get specific HTmL info for taht node.
517 */
518 info = htmlTagLookup(cur->name);
519
520 xmlBufferWriteChar(buf, "<");
521 xmlBufferWriteCHAR(buf, cur->name);
522 if (cur->properties != NULL)
523 htmlAttrListDump(buf, doc, cur->properties);
524
525 if ((info != NULL) && (info->empty)) {
526 xmlBufferWriteChar(buf, ">");
527 if (cur->next != NULL) {
528 if ((cur->next->type != HTML_TEXT_NODE) &&
529 (cur->next->type != HTML_ENTITY_REF_NODE))
530 xmlBufferWriteChar(buf, "\n");
531 }
532 return;
533 }
534 if ((cur->content == NULL) && (cur->children == NULL)) {
535 if ((info != NULL) && (info->endTag != 0))
536 xmlBufferWriteChar(buf, ">");
537 else {
538 xmlBufferWriteChar(buf, "></");
539 xmlBufferWriteCHAR(buf, cur->name);
540 xmlBufferWriteChar(buf, ">");
541 }
542 if (cur->next != NULL) {
543 if ((cur->next->type != HTML_TEXT_NODE) &&
544 (cur->next->type != HTML_ENTITY_REF_NODE))
545 xmlBufferWriteChar(buf, "\n");
546 }
547 return;
548 }
549 xmlBufferWriteChar(buf, ">");
550 if (cur->content != NULL) {
551 xmlChar *buffer;
552
553#ifndef XML_USE_BUFFER_CONTENT
554 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
555#else
556 buffer = xmlEncodeEntitiesReentrant(doc,
557 xmlBufferContent(cur->content));
558#endif
559 if (buffer != NULL) {
560 xmlBufferWriteCHAR(buf, buffer);
561 xmlFree(buffer);
562 }
563 }
564 if (cur->children != NULL) {
565 if ((cur->children->type != HTML_TEXT_NODE) &&
566 (cur->children->type != HTML_ENTITY_REF_NODE) &&
567 (cur->children != cur->last))
568 xmlBufferWriteChar(buf, "\n");
569 htmlNodeListDump(buf, doc, cur->children);
570 if ((cur->last->type != HTML_TEXT_NODE) &&
571 (cur->last->type != HTML_ENTITY_REF_NODE) &&
572 (cur->children != cur->last))
573 xmlBufferWriteChar(buf, "\n");
574 }
575 if (!htmlIsAutoClosed(doc, cur)) {
576 xmlBufferWriteChar(buf, "</");
577 xmlBufferWriteCHAR(buf, cur->name);
578 xmlBufferWriteChar(buf, ">");
579 }
580#if 0
581 if (!htmlIsAutoClosed(doc, cur)) {
582 xmlBufferWriteChar(buf, "</");
583 xmlBufferWriteCHAR(buf, cur->name);
584 xmlBufferWriteChar(buf, ">");
585 }
586#else
587 xmlBufferWriteChar(buf, "</");
588 xmlBufferWriteCHAR(buf, cur->name);
589 xmlBufferWriteChar(buf, ">");
590#endif
591 if (cur->next != NULL) {
592 if ((cur->next->type != HTML_TEXT_NODE) &&
593 (cur->next->type != HTML_ENTITY_REF_NODE))
594 xmlBufferWriteChar(buf, "\n");
595 }
596}
597
598/**
599 * htmlNodeDumpFile:
600 * @out: the FILE pointer
601 * @doc: the document
602 * @cur: the current node
603 *
604 * Dump an HTML node, recursive behaviour,children are printed too.
605 */
606void
607htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
608 xmlBufferPtr buf;
609
610 buf = xmlBufferCreate();
611 if (buf == NULL) return;
612 htmlNodeDump(buf, doc, cur);
613 xmlBufferDump(out, buf);
614 xmlBufferFree(buf);
615}
616
617/**
618 * htmlDocContentDump:
619 * @buf: the HTML buffer output
620 * @cur: the document
621 *
622 * Dump an HTML document.
623 */
624static void
625htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
626 int type;
627
628 /*
629 * force to output the stuff as HTML, especially for entities
630 */
631 type = cur->type;
632 cur->type = XML_HTML_DOCUMENT_NODE;
633 if (cur->intSubset != NULL)
634 htmlDtdDump(buf, cur);
635 else {
636 /* Default to HTML-4.0 transitionnal @@@@ */
637 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
638
639 }
640 if (cur->children != NULL) {
641 htmlNodeListDump(buf, cur, cur->children);
642 }
643 xmlBufferWriteChar(buf, "\n");
644 cur->type = (xmlElementType) type;
645}
646
647/**
648 * htmlDocDumpMemory:
649 * @cur: the document
650 * @mem: OUT: the memory pointer
651 * @size: OUT: the memory lenght
652 *
653 * Dump an HTML document in memory and return the xmlChar * and it's size.
654 * It's up to the caller to free the memory.
655 */
656void
657htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
658 xmlBufferPtr buf;
659
660 if (cur == NULL) {
661#ifdef DEBUG_TREE
662 xmlGenericError(xmlGenericErrorContext,
663 "htmlxmlDocDumpMemory : document == NULL\n");
664#endif
665 *mem = NULL;
666 *size = 0;
667 return;
668 }
669 buf = xmlBufferCreate();
670 if (buf == NULL) {
671 *mem = NULL;
672 *size = 0;
673 return;
674 }
675 htmlDocContentDump(buf, cur);
676 *mem = buf->content;
677 *size = buf->use;
Daniel Veillard48b2f892001-02-25 16:11:03 +0000678 MEM_CLEANUP(buf, sizeof(xmlBuffer));
Owen Taylor3473f882001-02-23 17:55:21 +0000679 xmlFree(buf);
680}
681
682
683/************************************************************************
684 * *
685 * Dumping HTML tree content to an I/O output buffer *
686 * *
687 ************************************************************************/
688
689/**
690 * htmlDtdDump:
691 * @buf: the HTML buffer output
692 * @doc: the document
693 * @encoding: the encoding string
694 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000695 * TODO: check whether encoding is needed
696 *
Owen Taylor3473f882001-02-23 17:55:21 +0000697 * Dump the HTML document DTD, if any.
698 */
699static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000700htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
701 const char *encoding UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000702 xmlDtdPtr cur = doc->intSubset;
703
704 if (cur == NULL) {
705 xmlGenericError(xmlGenericErrorContext,
706 "htmlDtdDump : no internal subset\n");
707 return;
708 }
709 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
710 xmlOutputBufferWriteString(buf, (const char *)cur->name);
711 if (cur->ExternalID != NULL) {
712 xmlOutputBufferWriteString(buf, " PUBLIC ");
713 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
714 if (cur->SystemID != NULL) {
715 xmlOutputBufferWriteString(buf, " ");
716 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
717 }
718 } else if (cur->SystemID != NULL) {
719 xmlOutputBufferWriteString(buf, " SYSTEM ");
720 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
721 }
722 xmlOutputBufferWriteString(buf, ">\n");
723}
724
725/**
726 * htmlAttrDump:
727 * @buf: the HTML buffer output
728 * @doc: the document
729 * @cur: the attribute pointer
730 * @encoding: the encoding string
731 *
732 * Dump an HTML attribute
733 */
734static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000735htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
736 const char *encoding UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000737 xmlChar *value;
738
739 if (cur == NULL) {
740 xmlGenericError(xmlGenericErrorContext,
741 "htmlAttrDump : property == NULL\n");
742 return;
743 }
744 xmlOutputBufferWriteString(buf, " ");
745 xmlOutputBufferWriteString(buf, (const char *)cur->name);
746 if (cur->children != NULL) {
747 value = xmlNodeListGetString(doc, cur->children, 0);
748 if (value) {
749 xmlOutputBufferWriteString(buf, "=");
750 xmlBufferWriteQuotedString(buf->buffer, value);
751 xmlFree(value);
752 } else {
753 xmlOutputBufferWriteString(buf, "=\"\"");
754 }
755 }
756}
757
758/**
759 * htmlAttrListDump:
760 * @buf: the HTML buffer output
761 * @doc: the document
762 * @cur: the first attribute pointer
763 * @encoding: the encoding string
764 *
765 * Dump a list of HTML attributes
766 */
767static void
768htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
769 if (cur == NULL) {
770 xmlGenericError(xmlGenericErrorContext,
771 "htmlAttrListDump : property == NULL\n");
772 return;
773 }
774 while (cur != NULL) {
775 htmlAttrDumpOutput(buf, doc, cur, encoding);
776 cur = cur->next;
777 }
778}
779
780
781void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
782 xmlNodePtr cur, const char *encoding);
783
784/**
785 * htmlNodeListDump:
786 * @buf: the HTML buffer output
787 * @doc: the document
788 * @cur: the first node
789 * @encoding: the encoding string
790 *
791 * Dump an HTML node list, recursive behaviour,children are printed too.
792 */
793static void
794htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
795 if (cur == NULL) {
796 xmlGenericError(xmlGenericErrorContext,
797 "htmlNodeListDump : node == NULL\n");
798 return;
799 }
800 while (cur != NULL) {
801 htmlNodeDumpOutput(buf, doc, cur, encoding);
802 cur = cur->next;
803 }
804}
805
806/**
807 * htmlNodeDumpOutput:
808 * @buf: the HTML buffer output
809 * @doc: the document
810 * @cur: the current node
811 * @encoding: the encoding string
812 *
813 * Dump an HTML node, recursive behaviour,children are printed too.
814 */
815void
816htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
817 htmlElemDescPtr info;
818
819 if (cur == NULL) {
820 xmlGenericError(xmlGenericErrorContext,
821 "htmlNodeDump : node == NULL\n");
822 return;
823 }
824 /*
825 * Special cases.
826 */
827 if (cur->type == XML_DTD_NODE)
828 return;
829 if (cur->type == XML_HTML_DOCUMENT_NODE) {
830 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
831 return;
832 }
833 if (cur->type == HTML_TEXT_NODE) {
834 if (cur->content != NULL) {
835 if ((cur->name == xmlStringText) ||
836 (cur->name != xmlStringTextNoenc)) {
837 xmlChar *buffer;
838
839#ifndef XML_USE_BUFFER_CONTENT
840 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
841#else
842 buffer = xmlEncodeEntitiesReentrant(doc,
843 xmlBufferContent(cur->content));
844#endif
845 if (buffer != NULL) {
846 xmlOutputBufferWriteString(buf, (const char *)buffer);
847 xmlFree(buffer);
848 }
849 } else {
850 xmlOutputBufferWriteString(buf, (const char *)cur->content);
851 }
852 }
853 return;
854 }
855 if (cur->type == HTML_COMMENT_NODE) {
856 if (cur->content != NULL) {
857 xmlOutputBufferWriteString(buf, "<!--");
858#ifndef XML_USE_BUFFER_CONTENT
859 xmlOutputBufferWriteString(buf, (const char *)cur->content);
860#else
861 xmlOutputBufferWriteString(buf, (const char *)
862 xmlBufferContent(cur->content));
863#endif
864 xmlOutputBufferWriteString(buf, "-->");
865 }
866 return;
867 }
868 if (cur->type == HTML_ENTITY_REF_NODE) {
869 xmlOutputBufferWriteString(buf, "&");
870 xmlOutputBufferWriteString(buf, (const char *)cur->name);
871 xmlOutputBufferWriteString(buf, ";");
872 return;
873 }
874 if (cur->type == HTML_PRESERVE_NODE) {
875 if (cur->content != NULL) {
876#ifndef XML_USE_BUFFER_CONTENT
877 xmlOutputBufferWriteString(buf, (const char *)cur->content);
878#else
879 xmlOutputBufferWriteString(buf, (const char *)
880 xmlBufferContent(cur->content));
881#endif
882 }
883 return;
884 }
885
886 /*
887 * Get specific HTmL info for taht node.
888 */
889 info = htmlTagLookup(cur->name);
890
891 xmlOutputBufferWriteString(buf, "<");
892 xmlOutputBufferWriteString(buf, (const char *)cur->name);
893 if (cur->properties != NULL)
894 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
895
896 if ((info != NULL) && (info->empty)) {
897 xmlOutputBufferWriteString(buf, ">");
898 if (cur->next != NULL) {
899 if ((cur->next->type != HTML_TEXT_NODE) &&
900 (cur->next->type != HTML_ENTITY_REF_NODE))
901 xmlOutputBufferWriteString(buf, "\n");
902 }
903 return;
904 }
905 if ((cur->content == NULL) && (cur->children == NULL)) {
906 if ((info != NULL) && (info->saveEndTag != 0) &&
907 (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
908 xmlOutputBufferWriteString(buf, ">");
909 } else {
910 xmlOutputBufferWriteString(buf, "></");
911 xmlOutputBufferWriteString(buf, (const char *)cur->name);
912 xmlOutputBufferWriteString(buf, ">");
913 }
914 if (cur->next != NULL) {
915 if ((cur->next->type != HTML_TEXT_NODE) &&
916 (cur->next->type != HTML_ENTITY_REF_NODE))
917 xmlOutputBufferWriteString(buf, "\n");
918 }
919 return;
920 }
921 xmlOutputBufferWriteString(buf, ">");
922 if (cur->content != NULL) {
923 /*
924 * Uses the OutputBuffer property to automatically convert
925 * invalids to charrefs
926 */
927
928#ifndef XML_USE_BUFFER_CONTENT
929 xmlOutputBufferWriteString(buf, (const char *) cur->content);
930#else
931 xmlOutputBufferWriteString(buf,
932 (const char *) xmlBufferContent(cur->content));
933#endif
934 }
935 if (cur->children != NULL) {
936 if ((cur->children->type != HTML_TEXT_NODE) &&
937 (cur->children->type != HTML_ENTITY_REF_NODE) &&
938 (cur->children != cur->last))
939 xmlOutputBufferWriteString(buf, "\n");
940 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
941 if ((cur->last->type != HTML_TEXT_NODE) &&
942 (cur->last->type != HTML_ENTITY_REF_NODE) &&
943 (cur->children != cur->last))
944 xmlOutputBufferWriteString(buf, "\n");
945 }
946#if 0
947 if (!htmlIsAutoClosed(doc, cur)) {
948 xmlOutputBufferWriteString(buf, "</");
949 xmlOutputBufferWriteString(buf, (const char *)cur->name);
950 xmlOutputBufferWriteString(buf, ">");
951 }
952#else
953 xmlOutputBufferWriteString(buf, "</");
954 xmlOutputBufferWriteString(buf, (const char *)cur->name);
955 xmlOutputBufferWriteString(buf, ">");
956#endif
957 if (cur->next != NULL) {
958 if ((cur->next->type != HTML_TEXT_NODE) &&
959 (cur->next->type != HTML_ENTITY_REF_NODE))
960 xmlOutputBufferWriteString(buf, "\n");
961 }
962}
963
964/**
965 * htmlDocContentDump:
966 * @buf: the HTML buffer output
967 * @cur: the document
968 * @encoding: the encoding string
969 *
970 * Dump an HTML document.
971 */
972void
973htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
974 int type;
975
976 /*
977 * force to output the stuff as HTML, especially for entities
978 */
979 type = cur->type;
980 cur->type = XML_HTML_DOCUMENT_NODE;
981 if (cur->intSubset != NULL)
982 htmlDtdDumpOutput(buf, cur, NULL);
983 else {
984 /* Default to HTML-4.0 transitionnal @@@@ */
985 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
986
987 }
988 if (cur->children != NULL) {
989 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
990 }
991 xmlOutputBufferWriteString(buf, "\n");
992 cur->type = (xmlElementType) type;
993}
994
995
996/************************************************************************
997 * *
998 * Saving functions front-ends *
999 * *
1000 ************************************************************************/
1001
1002/**
1003 * htmlDocDump:
1004 * @f: the FILE*
1005 * @cur: the document
1006 *
1007 * Dump an HTML document to an open FILE.
1008 *
1009 * returns: the number of byte written or -1 in case of failure.
1010 */
1011int
1012htmlDocDump(FILE *f, xmlDocPtr cur) {
1013 xmlOutputBufferPtr buf;
1014 xmlCharEncodingHandlerPtr handler = NULL;
1015 const char *encoding;
1016 int ret;
1017
1018 if (cur == NULL) {
1019#ifdef DEBUG_TREE
1020 xmlGenericError(xmlGenericErrorContext,
1021 "htmlDocDump : document == NULL\n");
1022#endif
1023 return(-1);
1024 }
1025
1026 encoding = (const char *) htmlGetMetaEncoding(cur);
1027
1028 if (encoding != NULL) {
1029 xmlCharEncoding enc;
1030
1031 enc = xmlParseCharEncoding(encoding);
1032 if (enc != cur->charset) {
1033 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1034 /*
1035 * Not supported yet
1036 */
1037 return(-1);
1038 }
1039
1040 handler = xmlFindCharEncodingHandler(encoding);
1041 if (handler == NULL)
1042 return(-1);
1043 }
1044 }
1045
1046 /*
1047 * Fallback to HTML or ASCII when the encoding is unspecified
1048 */
1049 if (handler == NULL)
1050 handler = xmlFindCharEncodingHandler("HTML");
1051 if (handler == NULL)
1052 handler = xmlFindCharEncodingHandler("ascii");
1053
1054 buf = xmlOutputBufferCreateFile(f, handler);
1055 if (buf == NULL) return(-1);
1056 htmlDocContentDumpOutput(buf, cur, NULL);
1057
1058 ret = xmlOutputBufferClose(buf);
1059 return(ret);
1060}
1061
1062/**
1063 * htmlSaveFile:
1064 * @filename: the filename (or URL)
1065 * @cur: the document
1066 *
1067 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1068 * used.
1069 * returns: the number of byte written or -1 in case of failure.
1070 */
1071int
1072htmlSaveFile(const char *filename, xmlDocPtr cur) {
1073 xmlOutputBufferPtr buf;
1074 xmlCharEncodingHandlerPtr handler = NULL;
1075 const char *encoding;
1076 int ret;
1077
1078 encoding = (const char *) htmlGetMetaEncoding(cur);
1079
1080 if (encoding != NULL) {
1081 xmlCharEncoding enc;
1082
1083 enc = xmlParseCharEncoding(encoding);
1084 if (enc != cur->charset) {
1085 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1086 /*
1087 * Not supported yet
1088 */
1089 return(-1);
1090 }
1091
1092 handler = xmlFindCharEncodingHandler(encoding);
1093 if (handler == NULL)
1094 return(-1);
1095 }
1096 }
1097
1098 /*
1099 * Fallback to HTML or ASCII when the encoding is unspecified
1100 */
1101 if (handler == NULL)
1102 handler = xmlFindCharEncodingHandler("HTML");
1103 if (handler == NULL)
1104 handler = xmlFindCharEncodingHandler("ascii");
1105
1106 /*
1107 * save the content to a temp buffer.
1108 */
1109 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1110 if (buf == NULL) return(0);
1111
1112 htmlDocContentDumpOutput(buf, cur, NULL);
1113
1114 ret = xmlOutputBufferClose(buf);
1115 return(ret);
1116}
1117
1118/**
1119 * htmlSaveFileEnc:
1120 * @filename: the filename
1121 * @cur: the document
1122 *
1123 * Dump an HTML document to a file using a given encoding.
1124 *
1125 * returns: the number of byte written or -1 in case of failure.
1126 */
1127int
1128htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1129 xmlOutputBufferPtr buf;
1130 xmlCharEncodingHandlerPtr handler = NULL;
1131 int ret;
1132
1133 if (encoding != NULL) {
1134 xmlCharEncoding enc;
1135
1136 enc = xmlParseCharEncoding(encoding);
1137 if (enc != cur->charset) {
1138 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1139 /*
1140 * Not supported yet
1141 */
1142 return(-1);
1143 }
1144
1145 handler = xmlFindCharEncodingHandler(encoding);
1146 if (handler == NULL)
1147 return(-1);
1148 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1149 }
1150 }
1151
1152 /*
1153 * Fallback to HTML or ASCII when the encoding is unspecified
1154 */
1155 if (handler == NULL)
1156 handler = xmlFindCharEncodingHandler("HTML");
1157 if (handler == NULL)
1158 handler = xmlFindCharEncodingHandler("ascii");
1159
1160 /*
1161 * save the content to a temp buffer.
1162 */
1163 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1164 if (buf == NULL) return(0);
1165
1166 htmlDocContentDumpOutput(buf, cur, encoding);
1167
1168 ret = xmlOutputBufferClose(buf);
1169 return(ret);
1170}
1171#endif /* LIBXML_HTML_ENABLED */