blob: 2d3b8a496ac6b534363298ef1a28598fa924035f [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
16#include "xmlversion.h"
17#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillard32bc74e2000-07-14 14:49:25 +000035/************************************************************************
36 * *
37 * Getting/Setting encoding meta tags *
38 * *
39 ************************************************************************/
40
41/**
42 * htmlGetMetaEncoding:
43 * @doc: the document
44 *
45 * Encoding definition lookup in the Meta tags
46 *
47 * Returns the current encoding as flagged in the HTML source
48 */
49const xmlChar *
50htmlGetMetaEncoding(htmlDocPtr doc) {
51 htmlNodePtr cur;
52 const xmlChar *content;
53 const xmlChar *encoding;
54
55 if (doc == NULL)
56 return(NULL);
57 cur = doc->children;
58
59 /*
60 * Search the html
61 */
62 while (cur != NULL) {
63 if (cur->name != NULL) {
64 if (!xmlStrcmp(cur->name, BAD_CAST"html"))
65 break;
66 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
67 goto found_head;
68 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
69 goto found_meta;
70 }
71 cur = cur->next;
72 }
73 if (cur == NULL)
74 return(NULL);
75 cur = cur->children;
76
77 /*
78 * Search the head
79 */
80 while (cur != NULL) {
81 if (cur->name != NULL) {
82 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
83 break;
84 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
85 goto found_meta;
86 }
87 cur = cur->next;
88 }
89 if (cur == NULL)
90 return(NULL);
91found_head:
92 cur = cur->children;
93
94 /*
95 * Search the meta elements
96 */
97found_meta:
98 while (cur != NULL) {
99 if (cur->name != NULL) {
100 if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
101 xmlAttrPtr attr = cur->properties;
102 int http;
103 const xmlChar *value;
104
105 content = NULL;
106 http = 0;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
111#ifndef XML_USE_BUFFER_CONTENT
112 value = attr->children->content;
113#else
114 value = xmlBufferContent(attr->children->content);
115#endif
116 if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
117 (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
118 (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
119 ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
120 (!xmlStrcmp(value, BAD_CAST"content-type")) ||
121 (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
122 http = 1;
123 else if ((value != NULL) &&
124 ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
125 (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
126 (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
127 content = value;
128 if ((http != 0) && (content != NULL))
129 goto found_content;
130 }
131 attr = attr->next;
132 }
133 }
134 }
135 cur = cur->next;
136 }
137 return(NULL);
138
139found_content:
140 encoding = xmlStrstr(content, BAD_CAST"charset=");
141 if (encoding == NULL)
142 encoding = xmlStrstr(content, BAD_CAST"Charset=");
143 if (encoding == NULL)
144 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
145 if (encoding != NULL) {
146 encoding += 8;
147 } else {
148 encoding = xmlStrstr(content, BAD_CAST"charset =");
149 if (encoding == NULL)
150 encoding = xmlStrstr(content, BAD_CAST"Charset =");
151 if (encoding == NULL)
152 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
153 if (encoding != NULL)
154 encoding += 9;
155 }
156 if (encoding != NULL) {
157 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
158 }
159 return(encoding);
160}
161
162/**
163 * htmlSetMetaEncoding:
164 * @doc: the document
165 * @encoding: the encoding string
166 *
167 * Sets the current encoding in the Meta tags
168 * NOTE: this will not change the document content encoding, just
169 * the META flag associated.
170 *
171 * Returns 0 in case of success and -1 in case of error
172 */
173int
174htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
175 htmlNodePtr cur, meta;
176 const xmlChar *content;
177 char newcontent[100];
178
179
180 if (doc == NULL)
181 return(-1);
182
183 if (encoding != NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +0000184#ifdef HAVE_SNPRINTF
185 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
186 encoding);
187#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000188 sprintf(newcontent, "text/html; charset=%s", encoding);
Daniel Veillard39c7d712000-09-10 16:14:55 +0000189#endif
190 newcontent[sizeof(newcontent) - 1] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000191 }
192
193 cur = doc->children;
194
195 /*
196 * Search the html
197 */
198 while (cur != NULL) {
199 if (cur->name != NULL) {
200 if (!xmlStrcmp(cur->name, BAD_CAST"html"))
201 break;
202 if (!xmlStrcmp(cur->name, BAD_CAST"body")) {
203 if (encoding == NULL)
204 return(0);
205 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
206 xmlAddPrevSibling(cur, meta);
207 cur = meta;
208 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
209 xmlAddChild(cur, meta);
210 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
211 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
212 return(0);
213 }
214 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
215 goto found_head;
216 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
217 goto found_meta;
218 }
219 cur = cur->next;
220 }
221 if (cur == NULL)
222 return(-1);
223 cur = cur->children;
224
225 /*
226 * Search the head
227 */
228 while (cur != NULL) {
229 if (cur->name != NULL) {
230 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
231 break;
232 if (!xmlStrcmp(cur->name, BAD_CAST"body")) {
233 if (encoding == NULL)
234 return(0);
235 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
236 xmlAddPrevSibling(cur, meta);
237 cur = meta;
238 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
239 xmlAddChild(cur, meta);
240 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
241 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
242 return(0);
243 }
244 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
245 goto found_meta;
246 }
247 cur = cur->next;
248 }
249 if (cur == NULL)
250 return(-1);
251found_head:
252 if (cur->children == NULL) {
253 if (encoding == NULL)
254 return(0);
255 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
256 xmlAddChild(cur, meta);
257 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
258 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
259 return(0);
260 }
261 cur = cur->children;
262
263found_meta:
264 if (encoding != NULL) {
265 /*
266 * Create a new Meta element with the right aatributes
267 */
268
269 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
270 xmlAddPrevSibling(cur, meta);
271 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
272 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
273 }
274
275 /*
276 * Search and destroy all the remaining the meta elements carrying
277 * encoding informations
278 */
279 while (cur != NULL) {
280 if (cur->name != NULL) {
281 if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
282 xmlAttrPtr attr = cur->properties;
283 int http;
284 const xmlChar *value;
285
286 content = NULL;
287 http = 0;
288 while (attr != NULL) {
289 if ((attr->children != NULL) &&
290 (attr->children->type == XML_TEXT_NODE) &&
291 (attr->children->next == NULL)) {
292#ifndef XML_USE_BUFFER_CONTENT
293 value = attr->children->content;
294#else
295 value = xmlBufferContent(attr->children->content);
296#endif
297 if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
298 (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
299 (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
300 ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
301 (!xmlStrcmp(value, BAD_CAST"content-type")) ||
302 (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
303 http = 1;
304 else if ((value != NULL) &&
305 ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
306 (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
307 (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
308 content = value;
309 if ((http != 0) && (content != NULL))
310 break;
311 }
312 attr = attr->next;
313 }
314 if ((http != 0) && (content != NULL)) {
315 meta = cur;
316 cur = cur->next;
317 xmlUnlinkNode(meta);
318 xmlFreeNode(meta);
319 continue;
320 }
321
322 }
323 }
324 cur = cur->next;
325 }
326 return(0);
327}
328
329/************************************************************************
330 * *
331 * Dumping HTML tree content to a simple buffer *
332 * *
333 ************************************************************************/
334
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000335static void
336htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
337
Daniel Veillard167b5091999-07-07 04:19:20 +0000338/**
339 * htmlDtdDump:
340 * @buf: the HTML buffer output
341 * @doc: the document
342 *
343 * Dump the HTML document DTD, if any.
344 */
345static void
346htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
347 xmlDtdPtr cur = doc->intSubset;
348
349 if (cur == NULL) {
350 fprintf(stderr, "htmlDtdDump : no internal subset\n");
351 return;
352 }
353 xmlBufferWriteChar(buf, "<!DOCTYPE ");
354 xmlBufferWriteCHAR(buf, cur->name);
355 if (cur->ExternalID != NULL) {
356 xmlBufferWriteChar(buf, " PUBLIC ");
357 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000358 if (cur->SystemID != NULL) {
359 xmlBufferWriteChar(buf, " ");
360 xmlBufferWriteQuotedString(buf, cur->SystemID);
361 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000362 } else if (cur->SystemID != NULL) {
363 xmlBufferWriteChar(buf, " SYSTEM ");
364 xmlBufferWriteQuotedString(buf, cur->SystemID);
365 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000366 xmlBufferWriteChar(buf, ">\n");
367}
368
369/**
370 * htmlAttrDump:
371 * @buf: the HTML buffer output
372 * @doc: the document
373 * @cur: the attribute pointer
374 *
375 * Dump an HTML attribute
376 */
377static void
378htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000379 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +0000380
381 if (cur == NULL) {
382 fprintf(stderr, "htmlAttrDump : property == NULL\n");
383 return;
384 }
385 xmlBufferWriteChar(buf, " ");
386 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +0000387 if (cur->children != NULL) {
388 value = xmlNodeListGetString(doc, cur->children, 0);
389 if (value) {
390 xmlBufferWriteChar(buf, "=");
391 xmlBufferWriteQuotedString(buf, value);
392 xmlFree(value);
393 } else {
394 xmlBufferWriteChar(buf, "=\"\"");
395 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000396 }
397}
398
399/**
400 * htmlAttrListDump:
401 * @buf: the HTML buffer output
402 * @doc: the document
403 * @cur: the first attribute pointer
404 *
405 * Dump a list of HTML attributes
406 */
407static void
408htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
409 if (cur == NULL) {
410 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
411 return;
412 }
413 while (cur != NULL) {
414 htmlAttrDump(buf, doc, cur);
415 cur = cur->next;
416 }
417}
418
419
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000420void
Daniel Veillard82150d81999-07-07 07:32:15 +0000421htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000422/**
423 * htmlNodeListDump:
424 * @buf: the HTML buffer output
425 * @doc: the document
426 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000427 *
428 * Dump an HTML node list, recursive behaviour,children are printed too.
429 */
430static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000431htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000432 if (cur == NULL) {
433 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
434 return;
435 }
436 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000437 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000438 cur = cur->next;
439 }
440}
441
442/**
443 * htmlNodeDump:
444 * @buf: the HTML buffer output
445 * @doc: the document
446 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000447 *
448 * Dump an HTML node, recursive behaviour,children are printed too.
449 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000450void
Daniel Veillard82150d81999-07-07 07:32:15 +0000451htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000452 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000453
454 if (cur == NULL) {
455 fprintf(stderr, "htmlNodeDump : node == NULL\n");
456 return;
457 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000458 /*
459 * Special cases.
460 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000461 if (cur->type == XML_DTD_NODE)
462 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000463 if (cur->type == XML_HTML_DOCUMENT_NODE) {
464 htmlDocContentDump(buf, (xmlDocPtr) cur);
465 return;
466 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000467 if (cur->type == HTML_TEXT_NODE) {
468 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000469 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000470
Daniel Veillardd293fd11999-12-01 09:51:45 +0000471#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000472 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000473#else
474 buffer = xmlEncodeEntitiesReentrant(doc,
475 xmlBufferContent(cur->content));
476#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000477 if (buffer != NULL) {
478 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000479 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000480 }
481 }
482 return;
483 }
484 if (cur->type == HTML_COMMENT_NODE) {
485 if (cur->content != NULL) {
486 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000487#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000488 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000489#else
490 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
491#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000492 xmlBufferWriteChar(buf, "-->");
493 }
494 return;
495 }
496 if (cur->type == HTML_ENTITY_REF_NODE) {
497 xmlBufferWriteChar(buf, "&");
498 xmlBufferWriteCHAR(buf, cur->name);
499 xmlBufferWriteChar(buf, ";");
500 return;
501 }
502
Daniel Veillard82150d81999-07-07 07:32:15 +0000503 /*
504 * Get specific HTmL info for taht node.
505 */
506 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000507
Daniel Veillard82150d81999-07-07 07:32:15 +0000508 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000509 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000510 if (cur->properties != NULL)
511 htmlAttrListDump(buf, doc, cur->properties);
512
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000513 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000514 xmlBufferWriteChar(buf, ">");
515 if (cur->next != NULL) {
516 if ((cur->next->type != HTML_TEXT_NODE) &&
517 (cur->next->type != HTML_ENTITY_REF_NODE))
518 xmlBufferWriteChar(buf, "\n");
519 }
520 return;
521 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000522 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000523 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000524 xmlBufferWriteChar(buf, ">");
525 else {
526 xmlBufferWriteChar(buf, "></");
527 xmlBufferWriteCHAR(buf, cur->name);
528 xmlBufferWriteChar(buf, ">");
529 }
530 if (cur->next != NULL) {
531 if ((cur->next->type != HTML_TEXT_NODE) &&
532 (cur->next->type != HTML_ENTITY_REF_NODE))
533 xmlBufferWriteChar(buf, "\n");
534 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000535 return;
536 }
537 xmlBufferWriteChar(buf, ">");
538 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000539 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000540
Daniel Veillardd293fd11999-12-01 09:51:45 +0000541#ifndef XML_USE_BUFFER_CONTENT
542 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
543#else
544 buffer = xmlEncodeEntitiesReentrant(doc,
545 xmlBufferContent(cur->content));
546#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000547 if (buffer != NULL) {
548 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000549 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000550 }
551 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000552 if (cur->children != NULL) {
553 if ((cur->children->type != HTML_TEXT_NODE) &&
554 (cur->children->type != HTML_ENTITY_REF_NODE) &&
555 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000556 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000557 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000558 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000559 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000560 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000561 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000562 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000563 if (!htmlIsAutoClosed(doc, cur)) {
564 xmlBufferWriteChar(buf, "</");
565 xmlBufferWriteCHAR(buf, cur->name);
566 xmlBufferWriteChar(buf, ">");
567 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000568 if (cur->next != NULL) {
569 if ((cur->next->type != HTML_TEXT_NODE) &&
570 (cur->next->type != HTML_ENTITY_REF_NODE))
571 xmlBufferWriteChar(buf, "\n");
572 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000573}
574
575/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000576 * htmlNodeDumpFile:
577 * @out: the FILE pointer
578 * @doc: the document
579 * @cur: the current node
580 *
581 * Dump an HTML node, recursive behaviour,children are printed too.
582 */
583void
584htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
585 xmlBufferPtr buf;
586
587 buf = xmlBufferCreate();
588 if (buf == NULL) return;
589 htmlNodeDump(buf, doc, cur);
590 xmlBufferDump(out, buf);
591 xmlBufferFree(buf);
592}
593
594/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000595 * htmlDocContentDump:
596 * @buf: the HTML buffer output
597 * @cur: the document
598 *
599 * Dump an HTML document.
600 */
601static void
602htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000603 int type;
604
605 /*
606 * force to output the stuff as HTML, especially for entities
607 */
608 type = cur->type;
609 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000610 if (cur->intSubset != NULL)
611 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000612 else {
613 /* Default to HTML-4.0 transitionnal @@@@ */
614 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
615
616 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000617 if (cur->children != NULL) {
618 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000619 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000620 xmlBufferWriteChar(buf, "\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000621 cur->type = (xmlElementType) type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000622}
623
624/**
625 * htmlDocDumpMemory:
626 * @cur: the document
627 * @mem: OUT: the memory pointer
628 * @size: OUT: the memory lenght
629 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000630 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000631 * It's up to the caller to free the memory.
632 */
633void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000634htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000635 xmlBufferPtr buf;
636
637 if (cur == NULL) {
638#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000639 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000640#endif
641 *mem = NULL;
642 *size = 0;
643 return;
644 }
645 buf = xmlBufferCreate();
646 if (buf == NULL) {
647 *mem = NULL;
648 *size = 0;
649 return;
650 }
651 htmlDocContentDump(buf, cur);
652 *mem = buf->content;
653 *size = buf->use;
654 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000655 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000656}
657
658
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000659/************************************************************************
660 * *
661 * Dumping HTML tree content to an I/O output buffer *
662 * *
663 ************************************************************************/
664
665static void
666htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
667
668/**
669 * htmlDtdDump:
670 * @buf: the HTML buffer output
671 * @doc: the document
672 *
673 * Dump the HTML document DTD, if any.
674 */
675static void
676htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
677 xmlDtdPtr cur = doc->intSubset;
678
679 if (cur == NULL) {
680 fprintf(stderr, "htmlDtdDump : no internal subset\n");
681 return;
682 }
683 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
684 xmlOutputBufferWriteString(buf, (const char *)cur->name);
685 if (cur->ExternalID != NULL) {
686 xmlOutputBufferWriteString(buf, " PUBLIC ");
687 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
688 if (cur->SystemID != NULL) {
689 xmlOutputBufferWriteString(buf, " ");
690 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
691 }
692 } else if (cur->SystemID != NULL) {
693 xmlOutputBufferWriteString(buf, " SYSTEM ");
694 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
695 }
696 xmlOutputBufferWriteString(buf, ">\n");
697}
698
699/**
700 * htmlAttrDump:
701 * @buf: the HTML buffer output
702 * @doc: the document
703 * @cur: the attribute pointer
704 *
705 * Dump an HTML attribute
706 */
707static void
708htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
709 xmlChar *value;
710
711 if (cur == NULL) {
712 fprintf(stderr, "htmlAttrDump : property == NULL\n");
713 return;
714 }
715 xmlOutputBufferWriteString(buf, " ");
716 xmlOutputBufferWriteString(buf, (const char *)cur->name);
717 if (cur->children != NULL) {
718 value = xmlNodeListGetString(doc, cur->children, 0);
719 if (value) {
720 xmlOutputBufferWriteString(buf, "=");
721 xmlBufferWriteQuotedString(buf->buffer, value);
722 xmlFree(value);
723 } else {
724 xmlOutputBufferWriteString(buf, "=\"\"");
725 }
726 }
727}
728
729/**
730 * htmlAttrListDump:
731 * @buf: the HTML buffer output
732 * @doc: the document
733 * @cur: the first attribute pointer
734 *
735 * Dump a list of HTML attributes
736 */
737static void
738htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
739 if (cur == NULL) {
740 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
741 return;
742 }
743 while (cur != NULL) {
744 htmlAttrDumpOutput(buf, doc, cur, encoding);
745 cur = cur->next;
746 }
747}
748
749
750void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
751 xmlNodePtr cur, const char *encoding);
752
753/**
754 * htmlNodeListDump:
755 * @buf: the HTML buffer output
756 * @doc: the document
757 * @cur: the first node
758 *
759 * Dump an HTML node list, recursive behaviour,children are printed too.
760 */
761static void
762htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
763 if (cur == NULL) {
764 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
765 return;
766 }
767 while (cur != NULL) {
768 htmlNodeDumpOutput(buf, doc, cur, encoding);
769 cur = cur->next;
770 }
771}
772
773/**
774 * htmlNodeDump:
775 * @buf: the HTML buffer output
776 * @doc: the document
777 * @cur: the current node
778 *
779 * Dump an HTML node, recursive behaviour,children are printed too.
780 */
781void
782htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
783 htmlElemDescPtr info;
784
785 if (cur == NULL) {
786 fprintf(stderr, "htmlNodeDump : node == NULL\n");
787 return;
788 }
789 /*
790 * Special cases.
791 */
792 if (cur->type == XML_DTD_NODE)
793 return;
794 if (cur->type == XML_HTML_DOCUMENT_NODE) {
795 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
796 return;
797 }
798 if (cur->type == HTML_TEXT_NODE) {
799 if (cur->content != NULL) {
800 xmlChar *buffer;
801
802#ifndef XML_USE_BUFFER_CONTENT
803 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
804#else
805 buffer = xmlEncodeEntitiesReentrant(doc,
806 xmlBufferContent(cur->content));
807#endif
808 if (buffer != NULL) {
809 xmlOutputBufferWriteString(buf, (const char *)buffer);
810 xmlFree(buffer);
811 }
812 }
813 return;
814 }
815 if (cur->type == HTML_COMMENT_NODE) {
816 if (cur->content != NULL) {
817 xmlOutputBufferWriteString(buf, "<!--");
818#ifndef XML_USE_BUFFER_CONTENT
819 xmlOutputBufferWriteString(buf, (const char *)cur->content);
820#else
821 xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
822#endif
823 xmlOutputBufferWriteString(buf, "-->");
824 }
825 return;
826 }
827 if (cur->type == HTML_ENTITY_REF_NODE) {
828 xmlOutputBufferWriteString(buf, "&");
829 xmlOutputBufferWriteString(buf, (const char *)cur->name);
830 xmlOutputBufferWriteString(buf, ";");
831 return;
832 }
833
834 /*
835 * Get specific HTmL info for taht node.
836 */
837 info = htmlTagLookup(cur->name);
838
839 xmlOutputBufferWriteString(buf, "<");
840 xmlOutputBufferWriteString(buf, (const char *)cur->name);
841 if (cur->properties != NULL)
842 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
843
844 if ((info != NULL) && (info->empty)) {
845 xmlOutputBufferWriteString(buf, ">");
846 if (cur->next != NULL) {
847 if ((cur->next->type != HTML_TEXT_NODE) &&
848 (cur->next->type != HTML_ENTITY_REF_NODE))
849 xmlOutputBufferWriteString(buf, "\n");
850 }
851 return;
852 }
853 if ((cur->content == NULL) && (cur->children == NULL)) {
854 if ((info != NULL) && (info->endTag != 0))
855 xmlOutputBufferWriteString(buf, ">");
856 else {
857 xmlOutputBufferWriteString(buf, "></");
858 xmlOutputBufferWriteString(buf, (const char *)cur->name);
859 xmlOutputBufferWriteString(buf, ">");
860 }
861 if (cur->next != NULL) {
862 if ((cur->next->type != HTML_TEXT_NODE) &&
863 (cur->next->type != HTML_ENTITY_REF_NODE))
864 xmlOutputBufferWriteString(buf, "\n");
865 }
866 return;
867 }
868 xmlOutputBufferWriteString(buf, ">");
869 if (cur->content != NULL) {
870#if 0
871 xmlChar *buffer;
872
873#ifndef XML_USE_BUFFER_CONTENT
874 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
875#else
876 buffer = xmlEncodeEntitiesReentrant(doc,
877 xmlBufferContent(cur->content));
878#endif
879 if (buffer != NULL) {
880 xmlOutputBufferWriteString(buf, buffer);
881 xmlFree(buffer);
882 }
883#else
884 /*
885 * Uses the OutputBuffer property to automatically convert
886 * invalids to charrefs
887 */
888
889#ifndef XML_USE_BUFFER_CONTENT
890 xmlOutputBufferWriteString(buf, (const char *) cur->content);
891#else
892 xmlOutputBufferWriteString(buf,
893 (const char *) xmlBufferContent(cur->content));
894#endif
895#endif
896 }
897 if (cur->children != NULL) {
898 if ((cur->children->type != HTML_TEXT_NODE) &&
899 (cur->children->type != HTML_ENTITY_REF_NODE) &&
900 (cur->children != cur->last))
901 xmlOutputBufferWriteString(buf, "\n");
902 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
903 if ((cur->last->type != HTML_TEXT_NODE) &&
904 (cur->last->type != HTML_ENTITY_REF_NODE) &&
905 (cur->children != cur->last))
906 xmlOutputBufferWriteString(buf, "\n");
907 }
908 if (!htmlIsAutoClosed(doc, cur)) {
909 xmlOutputBufferWriteString(buf, "</");
910 xmlOutputBufferWriteString(buf, (const char *)cur->name);
911 xmlOutputBufferWriteString(buf, ">");
912 }
913 if (cur->next != NULL) {
914 if ((cur->next->type != HTML_TEXT_NODE) &&
915 (cur->next->type != HTML_ENTITY_REF_NODE))
916 xmlOutputBufferWriteString(buf, "\n");
917 }
918}
919
920/**
921 * htmlDocContentDump:
922 * @buf: the HTML buffer output
923 * @cur: the document
924 *
925 * Dump an HTML document.
926 */
927static void
928htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
929 int type;
930
931 /*
932 * force to output the stuff as HTML, especially for entities
933 */
934 type = cur->type;
935 cur->type = XML_HTML_DOCUMENT_NODE;
936 if (cur->intSubset != NULL)
937 htmlDtdDumpOutput(buf, cur, NULL);
938 else {
939 /* Default to HTML-4.0 transitionnal @@@@ */
940 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
941
942 }
943 if (cur->children != NULL) {
944 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
945 }
946 xmlOutputBufferWriteString(buf, "\n");
947 cur->type = (xmlElementType) type;
948}
949
950
951/************************************************************************
952 * *
953 * Saving functions front-ends *
954 * *
955 ************************************************************************/
956
Daniel Veillard167b5091999-07-07 04:19:20 +0000957/**
958 * htmlDocDump:
959 * @f: the FILE*
960 * @cur: the document
961 *
962 * Dump an HTML document to an open FILE.
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000963 *
964 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard167b5091999-07-07 04:19:20 +0000965 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000966int
Daniel Veillard167b5091999-07-07 04:19:20 +0000967htmlDocDump(FILE *f, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000968 xmlOutputBufferPtr buf;
969 xmlCharEncodingHandlerPtr handler = NULL;
970 const char *encoding;
971 int ret;
Daniel Veillard167b5091999-07-07 04:19:20 +0000972
973 if (cur == NULL) {
974#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000975 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000976#endif
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000977 return(-1);
Daniel Veillard167b5091999-07-07 04:19:20 +0000978 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000979
980 encoding = (const char *) htmlGetMetaEncoding(cur);
981
982 if (encoding != NULL) {
983 xmlCharEncoding enc;
984
985 enc = xmlParseCharEncoding(encoding);
986 if (enc != cur->charset) {
987 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
988 /*
989 * Not supported yet
990 */
991 return(-1);
992 }
993
994 handler = xmlFindCharEncodingHandler(encoding);
995 if (handler == NULL)
996 return(-1);
997 }
998 }
999
1000 /*
1001 * Fallback to HTML or ASCII when the encoding is unspecified
1002 */
1003 if (handler == NULL)
1004 handler = xmlFindCharEncodingHandler("HTML");
1005 if (handler == NULL)
1006 handler = xmlFindCharEncodingHandler("ascii");
1007
1008 buf = xmlOutputBufferCreateFile(f, handler);
1009 if (buf == NULL) return(-1);
1010 htmlDocContentDumpOutput(buf, cur, NULL);
1011
1012 ret = xmlOutputBufferClose(buf);
1013 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001014}
1015
1016/**
1017 * htmlSaveFile:
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001018 * @filename: the filename (or URL)
Daniel Veillard167b5091999-07-07 04:19:20 +00001019 * @cur: the document
1020 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001021 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1022 * used.
Daniel Veillard167b5091999-07-07 04:19:20 +00001023 * returns: the number of byte written or -1 in case of failure.
1024 */
1025int
1026htmlSaveFile(const char *filename, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001027 xmlOutputBufferPtr buf;
1028 xmlCharEncodingHandlerPtr handler = NULL;
1029 const char *encoding;
Daniel Veillard167b5091999-07-07 04:19:20 +00001030 int ret;
1031
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001032 encoding = (const char *) htmlGetMetaEncoding(cur);
1033
1034 if (encoding != NULL) {
1035 xmlCharEncoding enc;
1036
1037 enc = xmlParseCharEncoding(encoding);
1038 if (enc != cur->charset) {
1039 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1040 /*
1041 * Not supported yet
1042 */
1043 return(-1);
1044 }
1045
1046 handler = xmlFindCharEncodingHandler(encoding);
1047 if (handler == NULL)
1048 return(-1);
1049 }
1050 }
1051
1052 /*
1053 * Fallback to HTML or ASCII when the encoding is unspecified
1054 */
1055 if (handler == NULL)
1056 handler = xmlFindCharEncodingHandler("HTML");
1057 if (handler == NULL)
1058 handler = xmlFindCharEncodingHandler("ascii");
1059
Daniel Veillard167b5091999-07-07 04:19:20 +00001060 /*
1061 * save the content to a temp buffer.
1062 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001063 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
Daniel Veillard167b5091999-07-07 04:19:20 +00001064 if (buf == NULL) return(0);
Daniel Veillard167b5091999-07-07 04:19:20 +00001065
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001066 htmlDocContentDumpOutput(buf, cur, NULL);
Daniel Veillard167b5091999-07-07 04:19:20 +00001067
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001068 ret = xmlOutputBufferClose(buf);
1069 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001070}
1071
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001072/**
1073 * htmlSaveFileEnc:
1074 * @filename: the filename
1075 * @cur: the document
1076 *
1077 * Dump an HTML document to a file using a given encoding.
1078 *
1079 * returns: the number of byte written or -1 in case of failure.
1080 */
1081int
1082htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1083 xmlOutputBufferPtr buf;
1084 xmlCharEncodingHandlerPtr handler = NULL;
1085 int ret;
1086
1087 if (encoding != NULL) {
1088 xmlCharEncoding enc;
1089
1090 enc = xmlParseCharEncoding(encoding);
1091 if (enc != cur->charset) {
1092 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1093 /*
1094 * Not supported yet
1095 */
1096 return(-1);
1097 }
1098
1099 handler = xmlFindCharEncodingHandler(encoding);
1100 if (handler == NULL)
1101 return(-1);
1102 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1103 }
1104 }
1105
1106 /*
1107 * Fallback to HTML or ASCII when the encoding is unspecified
1108 */
1109 if (handler == NULL)
1110 handler = xmlFindCharEncodingHandler("HTML");
1111 if (handler == NULL)
1112 handler = xmlFindCharEncodingHandler("ascii");
1113
1114 /*
1115 * save the content to a temp buffer.
1116 */
1117 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1118 if (buf == NULL) return(0);
1119
1120 htmlDocContentDumpOutput(buf, cur, encoding);
1121
1122 ret = xmlOutputBufferClose(buf);
1123 return(ret);
1124}
Daniel Veillard361d8452000-04-03 19:48:13 +00001125#endif /* LIBXML_HTML_ENABLED */