blob: d62072ae0551d11f0ccf450c4869d6dd108e87fe [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9
10#ifdef WIN32
11#include "win32config.h"
12#else
13#include "config.h"
14#endif
15
16#include <libxml/xmlversion.h>
17#ifdef LIBXML_HTML_ENABLED
18
19#include <stdio.h>
20#include <string.h> /* for memset() only ! */
21
22#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
29#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
34#include <libxml/xmlerror.h>
35#include <libxml/parserInternals.h>
36
37/************************************************************************
38 * *
39 * Getting/Setting encoding meta tags *
40 * *
41 ************************************************************************/
42
43/**
44 * htmlGetMetaEncoding:
45 * @doc: the document
46 *
47 * Encoding definition lookup in the Meta tags
48 *
49 * Returns the current encoding as flagged in the HTML source
50 */
51const xmlChar *
52htmlGetMetaEncoding(htmlDocPtr doc) {
53 htmlNodePtr cur;
54 const xmlChar *content;
55 const xmlChar *encoding;
56
57 if (doc == NULL)
58 return(NULL);
59 cur = doc->children;
60
61 /*
62 * Search the html
63 */
64 while (cur != NULL) {
65 if (cur->name != NULL) {
66 if (xmlStrEqual(cur->name, BAD_CAST"html"))
67 break;
68 if (xmlStrEqual(cur->name, BAD_CAST"head"))
69 goto found_head;
70 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
71 goto found_meta;
72 }
73 cur = cur->next;
74 }
75 if (cur == NULL)
76 return(NULL);
77 cur = cur->children;
78
79 /*
80 * Search the head
81 */
82 while (cur != NULL) {
83 if (cur->name != NULL) {
84 if (xmlStrEqual(cur->name, BAD_CAST"head"))
85 break;
86 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
87 goto found_meta;
88 }
89 cur = cur->next;
90 }
91 if (cur == NULL)
92 return(NULL);
93found_head:
94 cur = cur->children;
95
96 /*
97 * Search the meta elements
98 */
99found_meta:
100 while (cur != NULL) {
101 if (cur->name != NULL) {
102 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
103 xmlAttrPtr attr = cur->properties;
104 int http;
105 const xmlChar *value;
106
107 content = NULL;
108 http = 0;
109 while (attr != NULL) {
110 if ((attr->children != NULL) &&
111 (attr->children->type == XML_TEXT_NODE) &&
112 (attr->children->next == NULL)) {
113#ifndef XML_USE_BUFFER_CONTENT
114 value = attr->children->content;
115#else
116 value = xmlBufferContent(attr->children->content);
117#endif
118 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
119 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
120 http = 1;
121 else if ((value != NULL)
122 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
123 content = value;
124 if ((http != 0) && (content != NULL))
125 goto found_content;
126 }
127 attr = attr->next;
128 }
129 }
130 }
131 cur = cur->next;
132 }
133 return(NULL);
134
135found_content:
136 encoding = xmlStrstr(content, BAD_CAST"charset=");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset=");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
141 if (encoding != NULL) {
142 encoding += 8;
143 } else {
144 encoding = xmlStrstr(content, BAD_CAST"charset =");
145 if (encoding == NULL)
146 encoding = xmlStrstr(content, BAD_CAST"Charset =");
147 if (encoding == NULL)
148 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
149 if (encoding != NULL)
150 encoding += 9;
151 }
152 if (encoding != NULL) {
153 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
154 }
155 return(encoding);
156}
157
158/**
159 * htmlSetMetaEncoding:
160 * @doc: the document
161 * @encoding: the encoding string
162 *
163 * Sets the current encoding in the Meta tags
164 * NOTE: this will not change the document content encoding, just
165 * the META flag associated.
166 *
167 * Returns 0 in case of success and -1 in case of error
168 */
169int
170htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
171 htmlNodePtr cur, meta;
172 const xmlChar *content;
173 char newcontent[100];
174
175
176 if (doc == NULL)
177 return(-1);
178
179 if (encoding != NULL) {
180#ifdef HAVE_SNPRINTF
181 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
182 encoding);
183#else
184 sprintf(newcontent, "text/html; charset=%s", encoding);
185#endif
186 newcontent[sizeof(newcontent) - 1] = 0;
187 }
188
189 cur = doc->children;
190
191 /*
192 * Search the html
193 */
194 while (cur != NULL) {
195 if (cur->name != NULL) {
196 if (xmlStrEqual(cur->name, BAD_CAST"html"))
197 break;
198 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
199 if (encoding == NULL)
200 return(0);
201 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
202 xmlAddPrevSibling(cur, meta);
203 cur = meta;
204 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
205 xmlAddChild(cur, meta);
206 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
207 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
208 return(0);
209 }
210 if (xmlStrEqual(cur->name, BAD_CAST"head"))
211 goto found_head;
212 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
213 goto found_meta;
214 }
215 cur = cur->next;
216 }
217 if (cur == NULL)
218 return(-1);
219 cur = cur->children;
220
221 /*
222 * Search the head
223 */
224 while (cur != NULL) {
225 if (cur->name != NULL) {
226 if (xmlStrEqual(cur->name, BAD_CAST"head"))
227 break;
228 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
229 if (encoding == NULL)
230 return(0);
231 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
232 xmlAddPrevSibling(cur, meta);
233 cur = meta;
234 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
235 xmlAddChild(cur, meta);
236 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
237 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
238 return(0);
239 }
240 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
241 goto found_meta;
242 }
243 cur = cur->next;
244 }
245 if (cur == NULL)
246 return(-1);
247found_head:
248 if (cur->children == NULL) {
249 if (encoding == NULL)
250 return(0);
251 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
252 xmlAddChild(cur, meta);
253 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
254 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
255 return(0);
256 }
257 cur = cur->children;
258
259found_meta:
260 if (encoding != NULL) {
261 /*
262 * Create a new Meta element with the right aatributes
263 */
264
265 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
266 xmlAddPrevSibling(cur, meta);
267 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
268 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
269 }
270
271 /*
272 * Search and destroy all the remaining the meta elements carrying
273 * encoding informations
274 */
275 while (cur != NULL) {
276 if (cur->name != NULL) {
277 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
278 xmlAttrPtr attr = cur->properties;
279 int http;
280 const xmlChar *value;
281
282 content = NULL;
283 http = 0;
284 while (attr != NULL) {
285 if ((attr->children != NULL) &&
286 (attr->children->type == XML_TEXT_NODE) &&
287 (attr->children->next == NULL)) {
288#ifndef XML_USE_BUFFER_CONTENT
289 value = attr->children->content;
290#else
291 value = xmlBufferContent(attr->children->content);
292#endif
293 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
294 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
295 http = 1;
296 else if ((value != NULL)
297 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
298 content = value;
299 if ((http != 0) && (content != NULL))
300 break;
301 }
302 attr = attr->next;
303 }
304 if ((http != 0) && (content != NULL)) {
305 meta = cur;
306 cur = cur->next;
307 xmlUnlinkNode(meta);
308 xmlFreeNode(meta);
309 continue;
310 }
311
312 }
313 }
314 cur = cur->next;
315 }
316 return(0);
317}
318
319/************************************************************************
320 * *
321 * Dumping HTML tree content to a simple buffer *
322 * *
323 ************************************************************************/
324
325static void
326htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
327
328/**
329 * htmlDtdDump:
330 * @buf: the HTML buffer output
331 * @doc: the document
332 *
333 * Dump the HTML document DTD, if any.
334 */
335static void
336htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
337 xmlDtdPtr cur = doc->intSubset;
338
339 if (cur == NULL) {
340 xmlGenericError(xmlGenericErrorContext,
341 "htmlDtdDump : no internal subset\n");
342 return;
343 }
344 xmlBufferWriteChar(buf, "<!DOCTYPE ");
345 xmlBufferWriteCHAR(buf, cur->name);
346 if (cur->ExternalID != NULL) {
347 xmlBufferWriteChar(buf, " PUBLIC ");
348 xmlBufferWriteQuotedString(buf, cur->ExternalID);
349 if (cur->SystemID != NULL) {
350 xmlBufferWriteChar(buf, " ");
351 xmlBufferWriteQuotedString(buf, cur->SystemID);
352 }
353 } else if (cur->SystemID != NULL) {
354 xmlBufferWriteChar(buf, " SYSTEM ");
355 xmlBufferWriteQuotedString(buf, cur->SystemID);
356 }
357 xmlBufferWriteChar(buf, ">\n");
358}
359
360/**
361 * htmlAttrDump:
362 * @buf: the HTML buffer output
363 * @doc: the document
364 * @cur: the attribute pointer
365 *
366 * Dump an HTML attribute
367 */
368static void
369htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
370 xmlChar *value;
371
372 if (cur == NULL) {
373 xmlGenericError(xmlGenericErrorContext,
374 "htmlAttrDump : property == NULL\n");
375 return;
376 }
377 xmlBufferWriteChar(buf, " ");
378 xmlBufferWriteCHAR(buf, cur->name);
379 if (cur->children != NULL) {
380 value = xmlNodeListGetString(doc, cur->children, 0);
381 if (value) {
382 xmlBufferWriteChar(buf, "=");
383 xmlBufferWriteQuotedString(buf, value);
384 xmlFree(value);
385 } else {
386 xmlBufferWriteChar(buf, "=\"\"");
387 }
388 }
389}
390
391/**
392 * htmlAttrListDump:
393 * @buf: the HTML buffer output
394 * @doc: the document
395 * @cur: the first attribute pointer
396 *
397 * Dump a list of HTML attributes
398 */
399static void
400htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
401 if (cur == NULL) {
402 xmlGenericError(xmlGenericErrorContext,
403 "htmlAttrListDump : property == NULL\n");
404 return;
405 }
406 while (cur != NULL) {
407 htmlAttrDump(buf, doc, cur);
408 cur = cur->next;
409 }
410}
411
412
413void
414htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
415/**
416 * htmlNodeListDump:
417 * @buf: the HTML buffer output
418 * @doc: the document
419 * @cur: the first node
420 *
421 * Dump an HTML node list, recursive behaviour,children are printed too.
422 */
423static void
424htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
425 if (cur == NULL) {
426 xmlGenericError(xmlGenericErrorContext,
427 "htmlNodeListDump : node == NULL\n");
428 return;
429 }
430 while (cur != NULL) {
431 htmlNodeDump(buf, doc, cur);
432 cur = cur->next;
433 }
434}
435
436/**
437 * htmlNodeDump:
438 * @buf: the HTML buffer output
439 * @doc: the document
440 * @cur: the current node
441 *
442 * Dump an HTML node, recursive behaviour,children are printed too.
443 */
444void
445htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
446 htmlElemDescPtr info;
447
448 if (cur == NULL) {
449 xmlGenericError(xmlGenericErrorContext,
450 "htmlNodeDump : node == NULL\n");
451 return;
452 }
453 /*
454 * Special cases.
455 */
456 if (cur->type == XML_DTD_NODE)
457 return;
458 if (cur->type == XML_HTML_DOCUMENT_NODE) {
459 htmlDocContentDump(buf, (xmlDocPtr) cur);
460 return;
461 }
462 if (cur->type == HTML_TEXT_NODE) {
463 if (cur->content != NULL) {
464 if ((cur->name == xmlStringText) ||
465 (cur->name != xmlStringTextNoenc)) {
466 xmlChar *buffer;
467
468#ifndef XML_USE_BUFFER_CONTENT
469 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
470#else
471 buffer = xmlEncodeEntitiesReentrant(doc,
472 xmlBufferContent(cur->content));
473#endif
474 if (buffer != NULL) {
475 xmlBufferWriteCHAR(buf, buffer);
476 xmlFree(buffer);
477 }
478 } else {
479 xmlBufferWriteCHAR(buf, cur->content);
480 }
481 }
482 return;
483 }
484 if (cur->type == HTML_COMMENT_NODE) {
485 if (cur->content != NULL) {
486 xmlBufferWriteChar(buf, "<!--");
487#ifndef XML_USE_BUFFER_CONTENT
488 xmlBufferWriteCHAR(buf, cur->content);
489#else
490 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
491#endif
492 xmlBufferWriteChar(buf, "-->");
493 }
494 return;
495 }
496 if (cur->type == HTML_ENTITY_REF_NODE) {
497 xmlBufferWriteChar(buf, "&");
498 xmlBufferWriteCHAR(buf, cur->name);
499 xmlBufferWriteChar(buf, ";");
500 return;
501 }
502
503 /*
504 * Get specific HTmL info for taht node.
505 */
506 info = htmlTagLookup(cur->name);
507
508 xmlBufferWriteChar(buf, "<");
509 xmlBufferWriteCHAR(buf, cur->name);
510 if (cur->properties != NULL)
511 htmlAttrListDump(buf, doc, cur->properties);
512
513 if ((info != NULL) && (info->empty)) {
514 xmlBufferWriteChar(buf, ">");
515 if (cur->next != NULL) {
516 if ((cur->next->type != HTML_TEXT_NODE) &&
517 (cur->next->type != HTML_ENTITY_REF_NODE))
518 xmlBufferWriteChar(buf, "\n");
519 }
520 return;
521 }
522 if ((cur->content == NULL) && (cur->children == NULL)) {
523 if ((info != NULL) && (info->endTag != 0))
524 xmlBufferWriteChar(buf, ">");
525 else {
526 xmlBufferWriteChar(buf, "></");
527 xmlBufferWriteCHAR(buf, cur->name);
528 xmlBufferWriteChar(buf, ">");
529 }
530 if (cur->next != NULL) {
531 if ((cur->next->type != HTML_TEXT_NODE) &&
532 (cur->next->type != HTML_ENTITY_REF_NODE))
533 xmlBufferWriteChar(buf, "\n");
534 }
535 return;
536 }
537 xmlBufferWriteChar(buf, ">");
538 if (cur->content != NULL) {
539 xmlChar *buffer;
540
541#ifndef XML_USE_BUFFER_CONTENT
542 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
543#else
544 buffer = xmlEncodeEntitiesReentrant(doc,
545 xmlBufferContent(cur->content));
546#endif
547 if (buffer != NULL) {
548 xmlBufferWriteCHAR(buf, buffer);
549 xmlFree(buffer);
550 }
551 }
552 if (cur->children != NULL) {
553 if ((cur->children->type != HTML_TEXT_NODE) &&
554 (cur->children->type != HTML_ENTITY_REF_NODE) &&
555 (cur->children != cur->last))
556 xmlBufferWriteChar(buf, "\n");
557 htmlNodeListDump(buf, doc, cur->children);
558 if ((cur->last->type != HTML_TEXT_NODE) &&
559 (cur->last->type != HTML_ENTITY_REF_NODE) &&
560 (cur->children != cur->last))
561 xmlBufferWriteChar(buf, "\n");
562 }
563 if (!htmlIsAutoClosed(doc, cur)) {
564 xmlBufferWriteChar(buf, "</");
565 xmlBufferWriteCHAR(buf, cur->name);
566 xmlBufferWriteChar(buf, ">");
567 }
568#if 0
569 if (!htmlIsAutoClosed(doc, cur)) {
570 xmlBufferWriteChar(buf, "</");
571 xmlBufferWriteCHAR(buf, cur->name);
572 xmlBufferWriteChar(buf, ">");
573 }
574#else
575 xmlBufferWriteChar(buf, "</");
576 xmlBufferWriteCHAR(buf, cur->name);
577 xmlBufferWriteChar(buf, ">");
578#endif
579 if (cur->next != NULL) {
580 if ((cur->next->type != HTML_TEXT_NODE) &&
581 (cur->next->type != HTML_ENTITY_REF_NODE))
582 xmlBufferWriteChar(buf, "\n");
583 }
584}
585
586/**
587 * htmlNodeDumpFile:
588 * @out: the FILE pointer
589 * @doc: the document
590 * @cur: the current node
591 *
592 * Dump an HTML node, recursive behaviour,children are printed too.
593 */
594void
595htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
596 xmlBufferPtr buf;
597
598 buf = xmlBufferCreate();
599 if (buf == NULL) return;
600 htmlNodeDump(buf, doc, cur);
601 xmlBufferDump(out, buf);
602 xmlBufferFree(buf);
603}
604
605/**
606 * htmlDocContentDump:
607 * @buf: the HTML buffer output
608 * @cur: the document
609 *
610 * Dump an HTML document.
611 */
612static void
613htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
614 int type;
615
616 /*
617 * force to output the stuff as HTML, especially for entities
618 */
619 type = cur->type;
620 cur->type = XML_HTML_DOCUMENT_NODE;
621 if (cur->intSubset != NULL)
622 htmlDtdDump(buf, cur);
623 else {
624 /* Default to HTML-4.0 transitionnal @@@@ */
625 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
626
627 }
628 if (cur->children != NULL) {
629 htmlNodeListDump(buf, cur, cur->children);
630 }
631 xmlBufferWriteChar(buf, "\n");
632 cur->type = (xmlElementType) type;
633}
634
635/**
636 * htmlDocDumpMemory:
637 * @cur: the document
638 * @mem: OUT: the memory pointer
639 * @size: OUT: the memory lenght
640 *
641 * Dump an HTML document in memory and return the xmlChar * and it's size.
642 * It's up to the caller to free the memory.
643 */
644void
645htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
646 xmlBufferPtr buf;
647
648 if (cur == NULL) {
649#ifdef DEBUG_TREE
650 xmlGenericError(xmlGenericErrorContext,
651 "htmlxmlDocDumpMemory : document == NULL\n");
652#endif
653 *mem = NULL;
654 *size = 0;
655 return;
656 }
657 buf = xmlBufferCreate();
658 if (buf == NULL) {
659 *mem = NULL;
660 *size = 0;
661 return;
662 }
663 htmlDocContentDump(buf, cur);
664 *mem = buf->content;
665 *size = buf->use;
Daniel Veillard48b2f892001-02-25 16:11:03 +0000666 MEM_CLEANUP(buf, sizeof(xmlBuffer));
Owen Taylor3473f882001-02-23 17:55:21 +0000667 xmlFree(buf);
668}
669
670
671/************************************************************************
672 * *
673 * Dumping HTML tree content to an I/O output buffer *
674 * *
675 ************************************************************************/
676
677/**
678 * htmlDtdDump:
679 * @buf: the HTML buffer output
680 * @doc: the document
681 * @encoding: the encoding string
682 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000683 * TODO: check whether encoding is needed
684 *
Owen Taylor3473f882001-02-23 17:55:21 +0000685 * Dump the HTML document DTD, if any.
686 */
687static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000688htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000689 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000690 xmlDtdPtr cur = doc->intSubset;
691
692 if (cur == NULL) {
693 xmlGenericError(xmlGenericErrorContext,
694 "htmlDtdDump : no internal subset\n");
695 return;
696 }
697 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
698 xmlOutputBufferWriteString(buf, (const char *)cur->name);
699 if (cur->ExternalID != NULL) {
700 xmlOutputBufferWriteString(buf, " PUBLIC ");
701 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
702 if (cur->SystemID != NULL) {
703 xmlOutputBufferWriteString(buf, " ");
704 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
705 }
706 } else if (cur->SystemID != NULL) {
707 xmlOutputBufferWriteString(buf, " SYSTEM ");
708 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
709 }
710 xmlOutputBufferWriteString(buf, ">\n");
711}
712
713/**
714 * htmlAttrDump:
715 * @buf: the HTML buffer output
716 * @doc: the document
717 * @cur: the attribute pointer
718 * @encoding: the encoding string
719 *
720 * Dump an HTML attribute
721 */
722static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000723htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000724 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000725 xmlChar *value;
726
727 if (cur == NULL) {
728 xmlGenericError(xmlGenericErrorContext,
729 "htmlAttrDump : property == NULL\n");
730 return;
731 }
732 xmlOutputBufferWriteString(buf, " ");
733 xmlOutputBufferWriteString(buf, (const char *)cur->name);
734 if (cur->children != NULL) {
735 value = xmlNodeListGetString(doc, cur->children, 0);
736 if (value) {
737 xmlOutputBufferWriteString(buf, "=");
738 xmlBufferWriteQuotedString(buf->buffer, value);
739 xmlFree(value);
740 } else {
741 xmlOutputBufferWriteString(buf, "=\"\"");
742 }
743 }
744}
745
746/**
747 * htmlAttrListDump:
748 * @buf: the HTML buffer output
749 * @doc: the document
750 * @cur: the first attribute pointer
751 * @encoding: the encoding string
752 *
753 * Dump a list of HTML attributes
754 */
755static void
756htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
757 if (cur == NULL) {
758 xmlGenericError(xmlGenericErrorContext,
759 "htmlAttrListDump : property == NULL\n");
760 return;
761 }
762 while (cur != NULL) {
763 htmlAttrDumpOutput(buf, doc, cur, encoding);
764 cur = cur->next;
765 }
766}
767
768
769void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
770 xmlNodePtr cur, const char *encoding);
771
772/**
773 * htmlNodeListDump:
774 * @buf: the HTML buffer output
775 * @doc: the document
776 * @cur: the first node
777 * @encoding: the encoding string
778 *
779 * Dump an HTML node list, recursive behaviour,children are printed too.
780 */
781static void
782htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
783 if (cur == NULL) {
784 xmlGenericError(xmlGenericErrorContext,
785 "htmlNodeListDump : node == NULL\n");
786 return;
787 }
788 while (cur != NULL) {
789 htmlNodeDumpOutput(buf, doc, cur, encoding);
790 cur = cur->next;
791 }
792}
793
794/**
795 * htmlNodeDumpOutput:
796 * @buf: the HTML buffer output
797 * @doc: the document
798 * @cur: the current node
799 * @encoding: the encoding string
800 *
801 * Dump an HTML node, recursive behaviour,children are printed too.
802 */
803void
804htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
805 htmlElemDescPtr info;
806
807 if (cur == NULL) {
808 xmlGenericError(xmlGenericErrorContext,
809 "htmlNodeDump : node == NULL\n");
810 return;
811 }
812 /*
813 * Special cases.
814 */
815 if (cur->type == XML_DTD_NODE)
816 return;
817 if (cur->type == XML_HTML_DOCUMENT_NODE) {
818 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
819 return;
820 }
821 if (cur->type == HTML_TEXT_NODE) {
822 if (cur->content != NULL) {
823 if ((cur->name == xmlStringText) ||
824 (cur->name != xmlStringTextNoenc)) {
825 xmlChar *buffer;
826
827#ifndef XML_USE_BUFFER_CONTENT
828 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
829#else
830 buffer = xmlEncodeEntitiesReentrant(doc,
831 xmlBufferContent(cur->content));
832#endif
833 if (buffer != NULL) {
834 xmlOutputBufferWriteString(buf, (const char *)buffer);
835 xmlFree(buffer);
836 }
837 } else {
838 xmlOutputBufferWriteString(buf, (const char *)cur->content);
839 }
840 }
841 return;
842 }
843 if (cur->type == HTML_COMMENT_NODE) {
844 if (cur->content != NULL) {
845 xmlOutputBufferWriteString(buf, "<!--");
846#ifndef XML_USE_BUFFER_CONTENT
847 xmlOutputBufferWriteString(buf, (const char *)cur->content);
848#else
849 xmlOutputBufferWriteString(buf, (const char *)
850 xmlBufferContent(cur->content));
851#endif
852 xmlOutputBufferWriteString(buf, "-->");
853 }
854 return;
855 }
856 if (cur->type == HTML_ENTITY_REF_NODE) {
857 xmlOutputBufferWriteString(buf, "&");
858 xmlOutputBufferWriteString(buf, (const char *)cur->name);
859 xmlOutputBufferWriteString(buf, ";");
860 return;
861 }
862 if (cur->type == HTML_PRESERVE_NODE) {
863 if (cur->content != NULL) {
864#ifndef XML_USE_BUFFER_CONTENT
865 xmlOutputBufferWriteString(buf, (const char *)cur->content);
866#else
867 xmlOutputBufferWriteString(buf, (const char *)
868 xmlBufferContent(cur->content));
869#endif
870 }
871 return;
872 }
873
874 /*
875 * Get specific HTmL info for taht node.
876 */
877 info = htmlTagLookup(cur->name);
878
879 xmlOutputBufferWriteString(buf, "<");
880 xmlOutputBufferWriteString(buf, (const char *)cur->name);
881 if (cur->properties != NULL)
882 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
883
884 if ((info != NULL) && (info->empty)) {
885 xmlOutputBufferWriteString(buf, ">");
886 if (cur->next != NULL) {
887 if ((cur->next->type != HTML_TEXT_NODE) &&
888 (cur->next->type != HTML_ENTITY_REF_NODE))
889 xmlOutputBufferWriteString(buf, "\n");
890 }
891 return;
892 }
893 if ((cur->content == NULL) && (cur->children == NULL)) {
894 if ((info != NULL) && (info->saveEndTag != 0) &&
895 (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
896 xmlOutputBufferWriteString(buf, ">");
897 } else {
898 xmlOutputBufferWriteString(buf, "></");
899 xmlOutputBufferWriteString(buf, (const char *)cur->name);
900 xmlOutputBufferWriteString(buf, ">");
901 }
902 if (cur->next != NULL) {
903 if ((cur->next->type != HTML_TEXT_NODE) &&
904 (cur->next->type != HTML_ENTITY_REF_NODE))
905 xmlOutputBufferWriteString(buf, "\n");
906 }
907 return;
908 }
909 xmlOutputBufferWriteString(buf, ">");
910 if (cur->content != NULL) {
911 /*
912 * Uses the OutputBuffer property to automatically convert
913 * invalids to charrefs
914 */
915
916#ifndef XML_USE_BUFFER_CONTENT
917 xmlOutputBufferWriteString(buf, (const char *) cur->content);
918#else
919 xmlOutputBufferWriteString(buf,
920 (const char *) xmlBufferContent(cur->content));
921#endif
922 }
923 if (cur->children != NULL) {
924 if ((cur->children->type != HTML_TEXT_NODE) &&
925 (cur->children->type != HTML_ENTITY_REF_NODE) &&
926 (cur->children != cur->last))
927 xmlOutputBufferWriteString(buf, "\n");
928 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
929 if ((cur->last->type != HTML_TEXT_NODE) &&
930 (cur->last->type != HTML_ENTITY_REF_NODE) &&
931 (cur->children != cur->last))
932 xmlOutputBufferWriteString(buf, "\n");
933 }
934#if 0
935 if (!htmlIsAutoClosed(doc, cur)) {
936 xmlOutputBufferWriteString(buf, "</");
937 xmlOutputBufferWriteString(buf, (const char *)cur->name);
938 xmlOutputBufferWriteString(buf, ">");
939 }
940#else
941 xmlOutputBufferWriteString(buf, "</");
942 xmlOutputBufferWriteString(buf, (const char *)cur->name);
943 xmlOutputBufferWriteString(buf, ">");
944#endif
945 if (cur->next != NULL) {
946 if ((cur->next->type != HTML_TEXT_NODE) &&
947 (cur->next->type != HTML_ENTITY_REF_NODE))
948 xmlOutputBufferWriteString(buf, "\n");
949 }
950}
951
952/**
953 * htmlDocContentDump:
954 * @buf: the HTML buffer output
955 * @cur: the document
956 * @encoding: the encoding string
957 *
958 * Dump an HTML document.
959 */
960void
961htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
962 int type;
963
964 /*
965 * force to output the stuff as HTML, especially for entities
966 */
967 type = cur->type;
968 cur->type = XML_HTML_DOCUMENT_NODE;
969 if (cur->intSubset != NULL)
970 htmlDtdDumpOutput(buf, cur, NULL);
971 else {
972 /* Default to HTML-4.0 transitionnal @@@@ */
973 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
974
975 }
976 if (cur->children != NULL) {
977 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
978 }
979 xmlOutputBufferWriteString(buf, "\n");
980 cur->type = (xmlElementType) type;
981}
982
983
984/************************************************************************
985 * *
986 * Saving functions front-ends *
987 * *
988 ************************************************************************/
989
990/**
991 * htmlDocDump:
992 * @f: the FILE*
993 * @cur: the document
994 *
995 * Dump an HTML document to an open FILE.
996 *
997 * returns: the number of byte written or -1 in case of failure.
998 */
999int
1000htmlDocDump(FILE *f, xmlDocPtr cur) {
1001 xmlOutputBufferPtr buf;
1002 xmlCharEncodingHandlerPtr handler = NULL;
1003 const char *encoding;
1004 int ret;
1005
1006 if (cur == NULL) {
1007#ifdef DEBUG_TREE
1008 xmlGenericError(xmlGenericErrorContext,
1009 "htmlDocDump : document == NULL\n");
1010#endif
1011 return(-1);
1012 }
1013
1014 encoding = (const char *) htmlGetMetaEncoding(cur);
1015
1016 if (encoding != NULL) {
1017 xmlCharEncoding enc;
1018
1019 enc = xmlParseCharEncoding(encoding);
1020 if (enc != cur->charset) {
1021 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1022 /*
1023 * Not supported yet
1024 */
1025 return(-1);
1026 }
1027
1028 handler = xmlFindCharEncodingHandler(encoding);
1029 if (handler == NULL)
1030 return(-1);
1031 }
1032 }
1033
1034 /*
1035 * Fallback to HTML or ASCII when the encoding is unspecified
1036 */
1037 if (handler == NULL)
1038 handler = xmlFindCharEncodingHandler("HTML");
1039 if (handler == NULL)
1040 handler = xmlFindCharEncodingHandler("ascii");
1041
1042 buf = xmlOutputBufferCreateFile(f, handler);
1043 if (buf == NULL) return(-1);
1044 htmlDocContentDumpOutput(buf, cur, NULL);
1045
1046 ret = xmlOutputBufferClose(buf);
1047 return(ret);
1048}
1049
1050/**
1051 * htmlSaveFile:
1052 * @filename: the filename (or URL)
1053 * @cur: the document
1054 *
1055 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1056 * used.
1057 * returns: the number of byte written or -1 in case of failure.
1058 */
1059int
1060htmlSaveFile(const char *filename, xmlDocPtr cur) {
1061 xmlOutputBufferPtr buf;
1062 xmlCharEncodingHandlerPtr handler = NULL;
1063 const char *encoding;
1064 int ret;
1065
1066 encoding = (const char *) htmlGetMetaEncoding(cur);
1067
1068 if (encoding != NULL) {
1069 xmlCharEncoding enc;
1070
1071 enc = xmlParseCharEncoding(encoding);
1072 if (enc != cur->charset) {
1073 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1074 /*
1075 * Not supported yet
1076 */
1077 return(-1);
1078 }
1079
1080 handler = xmlFindCharEncodingHandler(encoding);
1081 if (handler == NULL)
1082 return(-1);
1083 }
1084 }
1085
1086 /*
1087 * Fallback to HTML or ASCII when the encoding is unspecified
1088 */
1089 if (handler == NULL)
1090 handler = xmlFindCharEncodingHandler("HTML");
1091 if (handler == NULL)
1092 handler = xmlFindCharEncodingHandler("ascii");
1093
1094 /*
1095 * save the content to a temp buffer.
1096 */
1097 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1098 if (buf == NULL) return(0);
1099
1100 htmlDocContentDumpOutput(buf, cur, NULL);
1101
1102 ret = xmlOutputBufferClose(buf);
1103 return(ret);
1104}
1105
1106/**
1107 * htmlSaveFileEnc:
1108 * @filename: the filename
1109 * @cur: the document
1110 *
1111 * Dump an HTML document to a file using a given encoding.
1112 *
1113 * returns: the number of byte written or -1 in case of failure.
1114 */
1115int
1116htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1117 xmlOutputBufferPtr buf;
1118 xmlCharEncodingHandlerPtr handler = NULL;
1119 int ret;
1120
1121 if (encoding != NULL) {
1122 xmlCharEncoding enc;
1123
1124 enc = xmlParseCharEncoding(encoding);
1125 if (enc != cur->charset) {
1126 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1127 /*
1128 * Not supported yet
1129 */
1130 return(-1);
1131 }
1132
1133 handler = xmlFindCharEncodingHandler(encoding);
1134 if (handler == NULL)
1135 return(-1);
1136 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1137 }
1138 }
1139
1140 /*
1141 * Fallback to HTML or ASCII when the encoding is unspecified
1142 */
1143 if (handler == NULL)
1144 handler = xmlFindCharEncodingHandler("HTML");
1145 if (handler == NULL)
1146 handler = xmlFindCharEncodingHandler("ascii");
1147
1148 /*
1149 * save the content to a temp buffer.
1150 */
1151 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1152 if (buf == NULL) return(0);
1153
1154 htmlDocContentDumpOutput(buf, cur, encoding);
1155
1156 ret = xmlOutputBufferClose(buf);
1157 return(ret);
1158}
1159#endif /* LIBXML_HTML_ENABLED */