blob: 66ccdca3403a9c517ea161700de6331d2b059db5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9
10#ifdef WIN32
11#include "win32config.h"
12#else
13#include "config.h"
14#endif
15
16#include <libxml/xmlversion.h>
17#ifdef LIBXML_HTML_ENABLED
18
19#include <stdio.h>
20#include <string.h> /* for memset() only ! */
21
22#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
29#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
34#include <libxml/xmlerror.h>
35#include <libxml/parserInternals.h>
36
37/************************************************************************
38 * *
39 * Getting/Setting encoding meta tags *
40 * *
41 ************************************************************************/
42
43/**
44 * htmlGetMetaEncoding:
45 * @doc: the document
46 *
47 * Encoding definition lookup in the Meta tags
48 *
49 * Returns the current encoding as flagged in the HTML source
50 */
51const xmlChar *
52htmlGetMetaEncoding(htmlDocPtr doc) {
53 htmlNodePtr cur;
54 const xmlChar *content;
55 const xmlChar *encoding;
56
57 if (doc == NULL)
58 return(NULL);
59 cur = doc->children;
60
61 /*
62 * Search the html
63 */
64 while (cur != NULL) {
65 if (cur->name != NULL) {
66 if (xmlStrEqual(cur->name, BAD_CAST"html"))
67 break;
68 if (xmlStrEqual(cur->name, BAD_CAST"head"))
69 goto found_head;
70 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
71 goto found_meta;
72 }
73 cur = cur->next;
74 }
75 if (cur == NULL)
76 return(NULL);
77 cur = cur->children;
78
79 /*
80 * Search the head
81 */
82 while (cur != NULL) {
83 if (cur->name != NULL) {
84 if (xmlStrEqual(cur->name, BAD_CAST"head"))
85 break;
86 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
87 goto found_meta;
88 }
89 cur = cur->next;
90 }
91 if (cur == NULL)
92 return(NULL);
93found_head:
94 cur = cur->children;
95
96 /*
97 * Search the meta elements
98 */
99found_meta:
100 while (cur != NULL) {
101 if (cur->name != NULL) {
102 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
103 xmlAttrPtr attr = cur->properties;
104 int http;
105 const xmlChar *value;
106
107 content = NULL;
108 http = 0;
109 while (attr != NULL) {
110 if ((attr->children != NULL) &&
111 (attr->children->type == XML_TEXT_NODE) &&
112 (attr->children->next == NULL)) {
113#ifndef XML_USE_BUFFER_CONTENT
114 value = attr->children->content;
115#else
116 value = xmlBufferContent(attr->children->content);
117#endif
118 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
119 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
120 http = 1;
121 else if ((value != NULL)
122 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
123 content = value;
124 if ((http != 0) && (content != NULL))
125 goto found_content;
126 }
127 attr = attr->next;
128 }
129 }
130 }
131 cur = cur->next;
132 }
133 return(NULL);
134
135found_content:
136 encoding = xmlStrstr(content, BAD_CAST"charset=");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset=");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
141 if (encoding != NULL) {
142 encoding += 8;
143 } else {
144 encoding = xmlStrstr(content, BAD_CAST"charset =");
145 if (encoding == NULL)
146 encoding = xmlStrstr(content, BAD_CAST"Charset =");
147 if (encoding == NULL)
148 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
149 if (encoding != NULL)
150 encoding += 9;
151 }
152 if (encoding != NULL) {
153 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
154 }
155 return(encoding);
156}
157
158/**
159 * htmlSetMetaEncoding:
160 * @doc: the document
161 * @encoding: the encoding string
162 *
163 * Sets the current encoding in the Meta tags
164 * NOTE: this will not change the document content encoding, just
165 * the META flag associated.
166 *
167 * Returns 0 in case of success and -1 in case of error
168 */
169int
170htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
171 htmlNodePtr cur, meta;
172 const xmlChar *content;
173 char newcontent[100];
174
175
176 if (doc == NULL)
177 return(-1);
178
179 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000180 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
181 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000182 newcontent[sizeof(newcontent) - 1] = 0;
183 }
184
185 cur = doc->children;
186
187 /*
188 * Search the html
189 */
190 while (cur != NULL) {
191 if (cur->name != NULL) {
192 if (xmlStrEqual(cur->name, BAD_CAST"html"))
193 break;
194 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
195 if (encoding == NULL)
196 return(0);
197 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
198 xmlAddPrevSibling(cur, meta);
199 cur = meta;
200 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
201 xmlAddChild(cur, meta);
202 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
203 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
204 return(0);
205 }
206 if (xmlStrEqual(cur->name, BAD_CAST"head"))
207 goto found_head;
208 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
209 goto found_meta;
210 }
211 cur = cur->next;
212 }
213 if (cur == NULL)
214 return(-1);
215 cur = cur->children;
216
217 /*
218 * Search the head
219 */
220 while (cur != NULL) {
221 if (cur->name != NULL) {
222 if (xmlStrEqual(cur->name, BAD_CAST"head"))
223 break;
224 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
225 if (encoding == NULL)
226 return(0);
227 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
228 xmlAddPrevSibling(cur, meta);
229 cur = meta;
230 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
231 xmlAddChild(cur, meta);
232 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
233 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
234 return(0);
235 }
236 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
237 goto found_meta;
238 }
239 cur = cur->next;
240 }
241 if (cur == NULL)
242 return(-1);
243found_head:
244 if (cur->children == NULL) {
245 if (encoding == NULL)
246 return(0);
247 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
248 xmlAddChild(cur, meta);
249 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
250 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
251 return(0);
252 }
253 cur = cur->children;
254
255found_meta:
256 if (encoding != NULL) {
257 /*
258 * Create a new Meta element with the right aatributes
259 */
260
261 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
262 xmlAddPrevSibling(cur, meta);
263 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
264 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
265 }
266
267 /*
268 * Search and destroy all the remaining the meta elements carrying
269 * encoding informations
270 */
271 while (cur != NULL) {
272 if (cur->name != NULL) {
273 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
274 xmlAttrPtr attr = cur->properties;
275 int http;
276 const xmlChar *value;
277
278 content = NULL;
279 http = 0;
280 while (attr != NULL) {
281 if ((attr->children != NULL) &&
282 (attr->children->type == XML_TEXT_NODE) &&
283 (attr->children->next == NULL)) {
284#ifndef XML_USE_BUFFER_CONTENT
285 value = attr->children->content;
286#else
287 value = xmlBufferContent(attr->children->content);
288#endif
289 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
290 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
291 http = 1;
292 else if ((value != NULL)
293 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
294 content = value;
295 if ((http != 0) && (content != NULL))
296 break;
297 }
298 attr = attr->next;
299 }
300 if ((http != 0) && (content != NULL)) {
301 meta = cur;
302 cur = cur->next;
303 xmlUnlinkNode(meta);
304 xmlFreeNode(meta);
305 continue;
306 }
307
308 }
309 }
310 cur = cur->next;
311 }
312 return(0);
313}
314
315/************************************************************************
316 * *
317 * Dumping HTML tree content to a simple buffer *
318 * *
319 ************************************************************************/
320
321static void
322htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
323
324/**
325 * htmlDtdDump:
326 * @buf: the HTML buffer output
327 * @doc: the document
328 *
329 * Dump the HTML document DTD, if any.
330 */
331static void
332htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
333 xmlDtdPtr cur = doc->intSubset;
334
335 if (cur == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "htmlDtdDump : no internal subset\n");
338 return;
339 }
340 xmlBufferWriteChar(buf, "<!DOCTYPE ");
341 xmlBufferWriteCHAR(buf, cur->name);
342 if (cur->ExternalID != NULL) {
343 xmlBufferWriteChar(buf, " PUBLIC ");
344 xmlBufferWriteQuotedString(buf, cur->ExternalID);
345 if (cur->SystemID != NULL) {
346 xmlBufferWriteChar(buf, " ");
347 xmlBufferWriteQuotedString(buf, cur->SystemID);
348 }
349 } else if (cur->SystemID != NULL) {
350 xmlBufferWriteChar(buf, " SYSTEM ");
351 xmlBufferWriteQuotedString(buf, cur->SystemID);
352 }
353 xmlBufferWriteChar(buf, ">\n");
354}
355
356/**
357 * htmlAttrDump:
358 * @buf: the HTML buffer output
359 * @doc: the document
360 * @cur: the attribute pointer
361 *
362 * Dump an HTML attribute
363 */
364static void
365htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
366 xmlChar *value;
367
368 if (cur == NULL) {
369 xmlGenericError(xmlGenericErrorContext,
370 "htmlAttrDump : property == NULL\n");
371 return;
372 }
373 xmlBufferWriteChar(buf, " ");
374 xmlBufferWriteCHAR(buf, cur->name);
375 if (cur->children != NULL) {
376 value = xmlNodeListGetString(doc, cur->children, 0);
377 if (value) {
378 xmlBufferWriteChar(buf, "=");
379 xmlBufferWriteQuotedString(buf, value);
380 xmlFree(value);
381 } else {
382 xmlBufferWriteChar(buf, "=\"\"");
383 }
384 }
385}
386
387/**
388 * htmlAttrListDump:
389 * @buf: the HTML buffer output
390 * @doc: the document
391 * @cur: the first attribute pointer
392 *
393 * Dump a list of HTML attributes
394 */
395static void
396htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
397 if (cur == NULL) {
398 xmlGenericError(xmlGenericErrorContext,
399 "htmlAttrListDump : property == NULL\n");
400 return;
401 }
402 while (cur != NULL) {
403 htmlAttrDump(buf, doc, cur);
404 cur = cur->next;
405 }
406}
407
408
409void
410htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
411/**
412 * htmlNodeListDump:
413 * @buf: the HTML buffer output
414 * @doc: the document
415 * @cur: the first node
416 *
417 * Dump an HTML node list, recursive behaviour,children are printed too.
418 */
419static void
420htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
421 if (cur == NULL) {
422 xmlGenericError(xmlGenericErrorContext,
423 "htmlNodeListDump : node == NULL\n");
424 return;
425 }
426 while (cur != NULL) {
427 htmlNodeDump(buf, doc, cur);
428 cur = cur->next;
429 }
430}
431
432/**
433 * htmlNodeDump:
434 * @buf: the HTML buffer output
435 * @doc: the document
436 * @cur: the current node
437 *
438 * Dump an HTML node, recursive behaviour,children are printed too.
439 */
440void
441htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
442 htmlElemDescPtr info;
443
444 if (cur == NULL) {
445 xmlGenericError(xmlGenericErrorContext,
446 "htmlNodeDump : node == NULL\n");
447 return;
448 }
449 /*
450 * Special cases.
451 */
452 if (cur->type == XML_DTD_NODE)
453 return;
454 if (cur->type == XML_HTML_DOCUMENT_NODE) {
455 htmlDocContentDump(buf, (xmlDocPtr) cur);
456 return;
457 }
458 if (cur->type == HTML_TEXT_NODE) {
459 if (cur->content != NULL) {
460 if ((cur->name == xmlStringText) ||
461 (cur->name != xmlStringTextNoenc)) {
462 xmlChar *buffer;
463
464#ifndef XML_USE_BUFFER_CONTENT
465 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
466#else
467 buffer = xmlEncodeEntitiesReentrant(doc,
468 xmlBufferContent(cur->content));
469#endif
470 if (buffer != NULL) {
471 xmlBufferWriteCHAR(buf, buffer);
472 xmlFree(buffer);
473 }
474 } else {
475 xmlBufferWriteCHAR(buf, cur->content);
476 }
477 }
478 return;
479 }
480 if (cur->type == HTML_COMMENT_NODE) {
481 if (cur->content != NULL) {
482 xmlBufferWriteChar(buf, "<!--");
483#ifndef XML_USE_BUFFER_CONTENT
484 xmlBufferWriteCHAR(buf, cur->content);
485#else
486 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
487#endif
488 xmlBufferWriteChar(buf, "-->");
489 }
490 return;
491 }
492 if (cur->type == HTML_ENTITY_REF_NODE) {
493 xmlBufferWriteChar(buf, "&");
494 xmlBufferWriteCHAR(buf, cur->name);
495 xmlBufferWriteChar(buf, ";");
496 return;
497 }
498
499 /*
500 * Get specific HTmL info for taht node.
501 */
502 info = htmlTagLookup(cur->name);
503
504 xmlBufferWriteChar(buf, "<");
505 xmlBufferWriteCHAR(buf, cur->name);
506 if (cur->properties != NULL)
507 htmlAttrListDump(buf, doc, cur->properties);
508
509 if ((info != NULL) && (info->empty)) {
510 xmlBufferWriteChar(buf, ">");
511 if (cur->next != NULL) {
512 if ((cur->next->type != HTML_TEXT_NODE) &&
513 (cur->next->type != HTML_ENTITY_REF_NODE))
514 xmlBufferWriteChar(buf, "\n");
515 }
516 return;
517 }
518 if ((cur->content == NULL) && (cur->children == NULL)) {
519 if ((info != NULL) && (info->endTag != 0))
520 xmlBufferWriteChar(buf, ">");
521 else {
522 xmlBufferWriteChar(buf, "></");
523 xmlBufferWriteCHAR(buf, cur->name);
524 xmlBufferWriteChar(buf, ">");
525 }
526 if (cur->next != NULL) {
527 if ((cur->next->type != HTML_TEXT_NODE) &&
528 (cur->next->type != HTML_ENTITY_REF_NODE))
529 xmlBufferWriteChar(buf, "\n");
530 }
531 return;
532 }
533 xmlBufferWriteChar(buf, ">");
534 if (cur->content != NULL) {
535 xmlChar *buffer;
536
537#ifndef XML_USE_BUFFER_CONTENT
538 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
539#else
540 buffer = xmlEncodeEntitiesReentrant(doc,
541 xmlBufferContent(cur->content));
542#endif
543 if (buffer != NULL) {
544 xmlBufferWriteCHAR(buf, buffer);
545 xmlFree(buffer);
546 }
547 }
548 if (cur->children != NULL) {
549 if ((cur->children->type != HTML_TEXT_NODE) &&
550 (cur->children->type != HTML_ENTITY_REF_NODE) &&
551 (cur->children != cur->last))
552 xmlBufferWriteChar(buf, "\n");
553 htmlNodeListDump(buf, doc, cur->children);
554 if ((cur->last->type != HTML_TEXT_NODE) &&
555 (cur->last->type != HTML_ENTITY_REF_NODE) &&
556 (cur->children != cur->last))
557 xmlBufferWriteChar(buf, "\n");
558 }
559 if (!htmlIsAutoClosed(doc, cur)) {
560 xmlBufferWriteChar(buf, "</");
561 xmlBufferWriteCHAR(buf, cur->name);
562 xmlBufferWriteChar(buf, ">");
563 }
564#if 0
565 if (!htmlIsAutoClosed(doc, cur)) {
566 xmlBufferWriteChar(buf, "</");
567 xmlBufferWriteCHAR(buf, cur->name);
568 xmlBufferWriteChar(buf, ">");
569 }
570#else
571 xmlBufferWriteChar(buf, "</");
572 xmlBufferWriteCHAR(buf, cur->name);
573 xmlBufferWriteChar(buf, ">");
574#endif
575 if (cur->next != NULL) {
576 if ((cur->next->type != HTML_TEXT_NODE) &&
577 (cur->next->type != HTML_ENTITY_REF_NODE))
578 xmlBufferWriteChar(buf, "\n");
579 }
580}
581
582/**
583 * htmlNodeDumpFile:
584 * @out: the FILE pointer
585 * @doc: the document
586 * @cur: the current node
587 *
588 * Dump an HTML node, recursive behaviour,children are printed too.
589 */
590void
591htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
592 xmlBufferPtr buf;
593
594 buf = xmlBufferCreate();
595 if (buf == NULL) return;
596 htmlNodeDump(buf, doc, cur);
597 xmlBufferDump(out, buf);
598 xmlBufferFree(buf);
599}
600
601/**
602 * htmlDocContentDump:
603 * @buf: the HTML buffer output
604 * @cur: the document
605 *
606 * Dump an HTML document.
607 */
608static void
609htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
610 int type;
611
612 /*
613 * force to output the stuff as HTML, especially for entities
614 */
615 type = cur->type;
616 cur->type = XML_HTML_DOCUMENT_NODE;
617 if (cur->intSubset != NULL)
618 htmlDtdDump(buf, cur);
619 else {
620 /* Default to HTML-4.0 transitionnal @@@@ */
621 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
622
623 }
624 if (cur->children != NULL) {
625 htmlNodeListDump(buf, cur, cur->children);
626 }
627 xmlBufferWriteChar(buf, "\n");
628 cur->type = (xmlElementType) type;
629}
630
631/**
632 * htmlDocDumpMemory:
633 * @cur: the document
634 * @mem: OUT: the memory pointer
635 * @size: OUT: the memory lenght
636 *
637 * Dump an HTML document in memory and return the xmlChar * and it's size.
638 * It's up to the caller to free the memory.
639 */
640void
641htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
642 xmlBufferPtr buf;
643
644 if (cur == NULL) {
645#ifdef DEBUG_TREE
646 xmlGenericError(xmlGenericErrorContext,
647 "htmlxmlDocDumpMemory : document == NULL\n");
648#endif
649 *mem = NULL;
650 *size = 0;
651 return;
652 }
653 buf = xmlBufferCreate();
654 if (buf == NULL) {
655 *mem = NULL;
656 *size = 0;
657 return;
658 }
659 htmlDocContentDump(buf, cur);
660 *mem = buf->content;
661 *size = buf->use;
Owen Taylor3473f882001-02-23 17:55:21 +0000662 xmlFree(buf);
663}
664
665
666/************************************************************************
667 * *
668 * Dumping HTML tree content to an I/O output buffer *
669 * *
670 ************************************************************************/
671
672/**
673 * htmlDtdDump:
674 * @buf: the HTML buffer output
675 * @doc: the document
676 * @encoding: the encoding string
677 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000678 * TODO: check whether encoding is needed
679 *
Owen Taylor3473f882001-02-23 17:55:21 +0000680 * Dump the HTML document DTD, if any.
681 */
682static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000683htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000684 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000685 xmlDtdPtr cur = doc->intSubset;
686
687 if (cur == NULL) {
688 xmlGenericError(xmlGenericErrorContext,
689 "htmlDtdDump : no internal subset\n");
690 return;
691 }
692 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
693 xmlOutputBufferWriteString(buf, (const char *)cur->name);
694 if (cur->ExternalID != NULL) {
695 xmlOutputBufferWriteString(buf, " PUBLIC ");
696 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
697 if (cur->SystemID != NULL) {
698 xmlOutputBufferWriteString(buf, " ");
699 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
700 }
701 } else if (cur->SystemID != NULL) {
702 xmlOutputBufferWriteString(buf, " SYSTEM ");
703 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
704 }
705 xmlOutputBufferWriteString(buf, ">\n");
706}
707
708/**
709 * htmlAttrDump:
710 * @buf: the HTML buffer output
711 * @doc: the document
712 * @cur: the attribute pointer
713 * @encoding: the encoding string
714 *
715 * Dump an HTML attribute
716 */
717static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000718htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000719 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000720 xmlChar *value;
721
722 if (cur == NULL) {
723 xmlGenericError(xmlGenericErrorContext,
724 "htmlAttrDump : property == NULL\n");
725 return;
726 }
727 xmlOutputBufferWriteString(buf, " ");
728 xmlOutputBufferWriteString(buf, (const char *)cur->name);
729 if (cur->children != NULL) {
730 value = xmlNodeListGetString(doc, cur->children, 0);
731 if (value) {
732 xmlOutputBufferWriteString(buf, "=");
733 xmlBufferWriteQuotedString(buf->buffer, value);
734 xmlFree(value);
735 } else {
736 xmlOutputBufferWriteString(buf, "=\"\"");
737 }
738 }
739}
740
741/**
742 * htmlAttrListDump:
743 * @buf: the HTML buffer output
744 * @doc: the document
745 * @cur: the first attribute pointer
746 * @encoding: the encoding string
747 *
748 * Dump a list of HTML attributes
749 */
750static void
751htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
752 if (cur == NULL) {
753 xmlGenericError(xmlGenericErrorContext,
754 "htmlAttrListDump : property == NULL\n");
755 return;
756 }
757 while (cur != NULL) {
758 htmlAttrDumpOutput(buf, doc, cur, encoding);
759 cur = cur->next;
760 }
761}
762
763
764void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
765 xmlNodePtr cur, const char *encoding);
766
767/**
768 * htmlNodeListDump:
769 * @buf: the HTML buffer output
770 * @doc: the document
771 * @cur: the first node
772 * @encoding: the encoding string
773 *
774 * Dump an HTML node list, recursive behaviour,children are printed too.
775 */
776static void
777htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
778 if (cur == NULL) {
779 xmlGenericError(xmlGenericErrorContext,
780 "htmlNodeListDump : node == NULL\n");
781 return;
782 }
783 while (cur != NULL) {
784 htmlNodeDumpOutput(buf, doc, cur, encoding);
785 cur = cur->next;
786 }
787}
788
789/**
790 * htmlNodeDumpOutput:
791 * @buf: the HTML buffer output
792 * @doc: the document
793 * @cur: the current node
794 * @encoding: the encoding string
795 *
796 * Dump an HTML node, recursive behaviour,children are printed too.
797 */
798void
799htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
800 htmlElemDescPtr info;
801
802 if (cur == NULL) {
803 xmlGenericError(xmlGenericErrorContext,
804 "htmlNodeDump : node == NULL\n");
805 return;
806 }
807 /*
808 * Special cases.
809 */
810 if (cur->type == XML_DTD_NODE)
811 return;
812 if (cur->type == XML_HTML_DOCUMENT_NODE) {
813 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
814 return;
815 }
816 if (cur->type == HTML_TEXT_NODE) {
817 if (cur->content != NULL) {
818 if ((cur->name == xmlStringText) ||
819 (cur->name != xmlStringTextNoenc)) {
820 xmlChar *buffer;
821
822#ifndef XML_USE_BUFFER_CONTENT
823 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
824#else
825 buffer = xmlEncodeEntitiesReentrant(doc,
826 xmlBufferContent(cur->content));
827#endif
828 if (buffer != NULL) {
829 xmlOutputBufferWriteString(buf, (const char *)buffer);
830 xmlFree(buffer);
831 }
832 } else {
833 xmlOutputBufferWriteString(buf, (const char *)cur->content);
834 }
835 }
836 return;
837 }
838 if (cur->type == HTML_COMMENT_NODE) {
839 if (cur->content != NULL) {
840 xmlOutputBufferWriteString(buf, "<!--");
841#ifndef XML_USE_BUFFER_CONTENT
842 xmlOutputBufferWriteString(buf, (const char *)cur->content);
843#else
844 xmlOutputBufferWriteString(buf, (const char *)
845 xmlBufferContent(cur->content));
846#endif
847 xmlOutputBufferWriteString(buf, "-->");
848 }
849 return;
850 }
851 if (cur->type == HTML_ENTITY_REF_NODE) {
852 xmlOutputBufferWriteString(buf, "&");
853 xmlOutputBufferWriteString(buf, (const char *)cur->name);
854 xmlOutputBufferWriteString(buf, ";");
855 return;
856 }
857 if (cur->type == HTML_PRESERVE_NODE) {
858 if (cur->content != NULL) {
859#ifndef XML_USE_BUFFER_CONTENT
860 xmlOutputBufferWriteString(buf, (const char *)cur->content);
861#else
862 xmlOutputBufferWriteString(buf, (const char *)
863 xmlBufferContent(cur->content));
864#endif
865 }
866 return;
867 }
868
869 /*
870 * Get specific HTmL info for taht node.
871 */
872 info = htmlTagLookup(cur->name);
873
874 xmlOutputBufferWriteString(buf, "<");
875 xmlOutputBufferWriteString(buf, (const char *)cur->name);
876 if (cur->properties != NULL)
877 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
878
879 if ((info != NULL) && (info->empty)) {
880 xmlOutputBufferWriteString(buf, ">");
881 if (cur->next != NULL) {
882 if ((cur->next->type != HTML_TEXT_NODE) &&
883 (cur->next->type != HTML_ENTITY_REF_NODE))
884 xmlOutputBufferWriteString(buf, "\n");
885 }
886 return;
887 }
888 if ((cur->content == NULL) && (cur->children == NULL)) {
889 if ((info != NULL) && (info->saveEndTag != 0) &&
890 (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) {
891 xmlOutputBufferWriteString(buf, ">");
892 } else {
893 xmlOutputBufferWriteString(buf, "></");
894 xmlOutputBufferWriteString(buf, (const char *)cur->name);
895 xmlOutputBufferWriteString(buf, ">");
896 }
897 if (cur->next != NULL) {
898 if ((cur->next->type != HTML_TEXT_NODE) &&
899 (cur->next->type != HTML_ENTITY_REF_NODE))
900 xmlOutputBufferWriteString(buf, "\n");
901 }
902 return;
903 }
904 xmlOutputBufferWriteString(buf, ">");
905 if (cur->content != NULL) {
906 /*
907 * Uses the OutputBuffer property to automatically convert
908 * invalids to charrefs
909 */
910
911#ifndef XML_USE_BUFFER_CONTENT
912 xmlOutputBufferWriteString(buf, (const char *) cur->content);
913#else
914 xmlOutputBufferWriteString(buf,
915 (const char *) xmlBufferContent(cur->content));
916#endif
917 }
918 if (cur->children != NULL) {
919 if ((cur->children->type != HTML_TEXT_NODE) &&
920 (cur->children->type != HTML_ENTITY_REF_NODE) &&
921 (cur->children != cur->last))
922 xmlOutputBufferWriteString(buf, "\n");
923 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
924 if ((cur->last->type != HTML_TEXT_NODE) &&
925 (cur->last->type != HTML_ENTITY_REF_NODE) &&
926 (cur->children != cur->last))
927 xmlOutputBufferWriteString(buf, "\n");
928 }
929#if 0
930 if (!htmlIsAutoClosed(doc, cur)) {
931 xmlOutputBufferWriteString(buf, "</");
932 xmlOutputBufferWriteString(buf, (const char *)cur->name);
933 xmlOutputBufferWriteString(buf, ">");
934 }
935#else
936 xmlOutputBufferWriteString(buf, "</");
937 xmlOutputBufferWriteString(buf, (const char *)cur->name);
938 xmlOutputBufferWriteString(buf, ">");
939#endif
940 if (cur->next != NULL) {
941 if ((cur->next->type != HTML_TEXT_NODE) &&
942 (cur->next->type != HTML_ENTITY_REF_NODE))
943 xmlOutputBufferWriteString(buf, "\n");
944 }
945}
946
947/**
948 * htmlDocContentDump:
949 * @buf: the HTML buffer output
950 * @cur: the document
951 * @encoding: the encoding string
952 *
953 * Dump an HTML document.
954 */
955void
956htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
957 int type;
958
959 /*
960 * force to output the stuff as HTML, especially for entities
961 */
962 type = cur->type;
963 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +0000964 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000965 htmlDtdDumpOutput(buf, cur, NULL);
Daniel Veillard4dd93462001-04-02 15:16:19 +0000966#if 0
967 /* Disabled for XSLT output */
968 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000969 /* Default to HTML-4.0 transitionnal @@@@ */
970 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
971
Daniel Veillard4dd93462001-04-02 15:16:19 +0000972#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000973 }
974 if (cur->children != NULL) {
975 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
976 }
977 xmlOutputBufferWriteString(buf, "\n");
978 cur->type = (xmlElementType) type;
979}
980
Owen Taylor3473f882001-02-23 17:55:21 +0000981/************************************************************************
982 * *
983 * Saving functions front-ends *
984 * *
985 ************************************************************************/
986
987/**
988 * htmlDocDump:
989 * @f: the FILE*
990 * @cur: the document
991 *
992 * Dump an HTML document to an open FILE.
993 *
994 * returns: the number of byte written or -1 in case of failure.
995 */
996int
997htmlDocDump(FILE *f, xmlDocPtr cur) {
998 xmlOutputBufferPtr buf;
999 xmlCharEncodingHandlerPtr handler = NULL;
1000 const char *encoding;
1001 int ret;
1002
1003 if (cur == NULL) {
1004#ifdef DEBUG_TREE
1005 xmlGenericError(xmlGenericErrorContext,
1006 "htmlDocDump : document == NULL\n");
1007#endif
1008 return(-1);
1009 }
1010
1011 encoding = (const char *) htmlGetMetaEncoding(cur);
1012
1013 if (encoding != NULL) {
1014 xmlCharEncoding enc;
1015
1016 enc = xmlParseCharEncoding(encoding);
1017 if (enc != cur->charset) {
1018 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1019 /*
1020 * Not supported yet
1021 */
1022 return(-1);
1023 }
1024
1025 handler = xmlFindCharEncodingHandler(encoding);
1026 if (handler == NULL)
1027 return(-1);
1028 }
1029 }
1030
1031 /*
1032 * Fallback to HTML or ASCII when the encoding is unspecified
1033 */
1034 if (handler == NULL)
1035 handler = xmlFindCharEncodingHandler("HTML");
1036 if (handler == NULL)
1037 handler = xmlFindCharEncodingHandler("ascii");
1038
1039 buf = xmlOutputBufferCreateFile(f, handler);
1040 if (buf == NULL) return(-1);
1041 htmlDocContentDumpOutput(buf, cur, NULL);
1042
1043 ret = xmlOutputBufferClose(buf);
1044 return(ret);
1045}
1046
1047/**
1048 * htmlSaveFile:
1049 * @filename: the filename (or URL)
1050 * @cur: the document
1051 *
1052 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1053 * used.
1054 * returns: the number of byte written or -1 in case of failure.
1055 */
1056int
1057htmlSaveFile(const char *filename, xmlDocPtr cur) {
1058 xmlOutputBufferPtr buf;
1059 xmlCharEncodingHandlerPtr handler = NULL;
1060 const char *encoding;
1061 int ret;
1062
1063 encoding = (const char *) htmlGetMetaEncoding(cur);
1064
1065 if (encoding != NULL) {
1066 xmlCharEncoding enc;
1067
1068 enc = xmlParseCharEncoding(encoding);
1069 if (enc != cur->charset) {
1070 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1071 /*
1072 * Not supported yet
1073 */
1074 return(-1);
1075 }
1076
1077 handler = xmlFindCharEncodingHandler(encoding);
1078 if (handler == NULL)
1079 return(-1);
1080 }
1081 }
1082
1083 /*
1084 * Fallback to HTML or ASCII when the encoding is unspecified
1085 */
1086 if (handler == NULL)
1087 handler = xmlFindCharEncodingHandler("HTML");
1088 if (handler == NULL)
1089 handler = xmlFindCharEncodingHandler("ascii");
1090
1091 /*
1092 * save the content to a temp buffer.
1093 */
1094 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1095 if (buf == NULL) return(0);
1096
1097 htmlDocContentDumpOutput(buf, cur, NULL);
1098
1099 ret = xmlOutputBufferClose(buf);
1100 return(ret);
1101}
1102
1103/**
1104 * htmlSaveFileEnc:
1105 * @filename: the filename
1106 * @cur: the document
1107 *
1108 * Dump an HTML document to a file using a given encoding.
1109 *
1110 * returns: the number of byte written or -1 in case of failure.
1111 */
1112int
1113htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1114 xmlOutputBufferPtr buf;
1115 xmlCharEncodingHandlerPtr handler = NULL;
1116 int ret;
1117
1118 if (encoding != NULL) {
1119 xmlCharEncoding enc;
1120
1121 enc = xmlParseCharEncoding(encoding);
1122 if (enc != cur->charset) {
1123 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1124 /*
1125 * Not supported yet
1126 */
1127 return(-1);
1128 }
1129
1130 handler = xmlFindCharEncodingHandler(encoding);
1131 if (handler == NULL)
1132 return(-1);
1133 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1134 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001135 } else {
1136 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001137 }
1138
1139 /*
1140 * Fallback to HTML or ASCII when the encoding is unspecified
1141 */
1142 if (handler == NULL)
1143 handler = xmlFindCharEncodingHandler("HTML");
1144 if (handler == NULL)
1145 handler = xmlFindCharEncodingHandler("ascii");
1146
1147 /*
1148 * save the content to a temp buffer.
1149 */
1150 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1151 if (buf == NULL) return(0);
1152
1153 htmlDocContentDumpOutput(buf, cur, encoding);
1154
1155 ret = xmlOutputBufferClose(buf);
1156 return(ret);
1157}
1158#endif /* LIBXML_HTML_ENABLED */