blob: d8c5dc6fe87e6b92a47676b4a27d8c40aedd426a [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
16#include "xmlversion.h"
17#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillard32bc74e2000-07-14 14:49:25 +000035/************************************************************************
36 * *
37 * Getting/Setting encoding meta tags *
38 * *
39 ************************************************************************/
40
41/**
42 * htmlGetMetaEncoding:
43 * @doc: the document
44 *
45 * Encoding definition lookup in the Meta tags
46 *
47 * Returns the current encoding as flagged in the HTML source
48 */
49const xmlChar *
50htmlGetMetaEncoding(htmlDocPtr doc) {
51 htmlNodePtr cur;
52 const xmlChar *content;
53 const xmlChar *encoding;
54
55 if (doc == NULL)
56 return(NULL);
57 cur = doc->children;
58
59 /*
60 * Search the html
61 */
62 while (cur != NULL) {
63 if (cur->name != NULL) {
64 if (!xmlStrcmp(cur->name, BAD_CAST"html"))
65 break;
66 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
67 goto found_head;
68 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
69 goto found_meta;
70 }
71 cur = cur->next;
72 }
73 if (cur == NULL)
74 return(NULL);
75 cur = cur->children;
76
77 /*
78 * Search the head
79 */
80 while (cur != NULL) {
81 if (cur->name != NULL) {
82 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
83 break;
84 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
85 goto found_meta;
86 }
87 cur = cur->next;
88 }
89 if (cur == NULL)
90 return(NULL);
91found_head:
92 cur = cur->children;
93
94 /*
95 * Search the meta elements
96 */
97found_meta:
98 while (cur != NULL) {
99 if (cur->name != NULL) {
100 if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
101 xmlAttrPtr attr = cur->properties;
102 int http;
103 const xmlChar *value;
104
105 content = NULL;
106 http = 0;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
111#ifndef XML_USE_BUFFER_CONTENT
112 value = attr->children->content;
113#else
114 value = xmlBufferContent(attr->children->content);
115#endif
116 if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
117 (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
118 (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
119 ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
120 (!xmlStrcmp(value, BAD_CAST"content-type")) ||
121 (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
122 http = 1;
123 else if ((value != NULL) &&
124 ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
125 (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
126 (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
127 content = value;
128 if ((http != 0) && (content != NULL))
129 goto found_content;
130 }
131 attr = attr->next;
132 }
133 }
134 }
135 cur = cur->next;
136 }
137 return(NULL);
138
139found_content:
140 encoding = xmlStrstr(content, BAD_CAST"charset=");
141 if (encoding == NULL)
142 encoding = xmlStrstr(content, BAD_CAST"Charset=");
143 if (encoding == NULL)
144 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
145 if (encoding != NULL) {
146 encoding += 8;
147 } else {
148 encoding = xmlStrstr(content, BAD_CAST"charset =");
149 if (encoding == NULL)
150 encoding = xmlStrstr(content, BAD_CAST"Charset =");
151 if (encoding == NULL)
152 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
153 if (encoding != NULL)
154 encoding += 9;
155 }
156 if (encoding != NULL) {
157 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
158 }
159 return(encoding);
160}
161
162/**
163 * htmlSetMetaEncoding:
164 * @doc: the document
165 * @encoding: the encoding string
166 *
167 * Sets the current encoding in the Meta tags
168 * NOTE: this will not change the document content encoding, just
169 * the META flag associated.
170 *
171 * Returns 0 in case of success and -1 in case of error
172 */
173int
174htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
175 htmlNodePtr cur, meta;
176 const xmlChar *content;
177 char newcontent[100];
178
179
180 if (doc == NULL)
181 return(-1);
182
183 if (encoding != NULL) {
184#ifndef HAVE_SNPRINTF
185 sprintf(newcontent, "text/html; charset=%s", encoding);
186#else /* HAVE_SNPRINTF */
187 snprintf(newcontent, 99, "text/html; charset=%s", encoding);
188#endif /* HAVE_SNPRINTF */
189 newcontent[99] = 0;
190 }
191
192 cur = doc->children;
193
194 /*
195 * Search the html
196 */
197 while (cur != NULL) {
198 if (cur->name != NULL) {
199 if (!xmlStrcmp(cur->name, BAD_CAST"html"))
200 break;
201 if (!xmlStrcmp(cur->name, BAD_CAST"body")) {
202 if (encoding == NULL)
203 return(0);
204 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
205 xmlAddPrevSibling(cur, meta);
206 cur = meta;
207 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
208 xmlAddChild(cur, meta);
209 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
210 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
211 return(0);
212 }
213 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
214 goto found_head;
215 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
216 goto found_meta;
217 }
218 cur = cur->next;
219 }
220 if (cur == NULL)
221 return(-1);
222 cur = cur->children;
223
224 /*
225 * Search the head
226 */
227 while (cur != NULL) {
228 if (cur->name != NULL) {
229 if (!xmlStrcmp(cur->name, BAD_CAST"head"))
230 break;
231 if (!xmlStrcmp(cur->name, BAD_CAST"body")) {
232 if (encoding == NULL)
233 return(0);
234 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
235 xmlAddPrevSibling(cur, meta);
236 cur = meta;
237 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
238 xmlAddChild(cur, meta);
239 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
240 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
241 return(0);
242 }
243 if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
244 goto found_meta;
245 }
246 cur = cur->next;
247 }
248 if (cur == NULL)
249 return(-1);
250found_head:
251 if (cur->children == NULL) {
252 if (encoding == NULL)
253 return(0);
254 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
255 xmlAddChild(cur, meta);
256 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
257 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
258 return(0);
259 }
260 cur = cur->children;
261
262found_meta:
263 if (encoding != NULL) {
264 /*
265 * Create a new Meta element with the right aatributes
266 */
267
268 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
269 xmlAddPrevSibling(cur, meta);
270 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
271 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
272 }
273
274 /*
275 * Search and destroy all the remaining the meta elements carrying
276 * encoding informations
277 */
278 while (cur != NULL) {
279 if (cur->name != NULL) {
280 if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
281 xmlAttrPtr attr = cur->properties;
282 int http;
283 const xmlChar *value;
284
285 content = NULL;
286 http = 0;
287 while (attr != NULL) {
288 if ((attr->children != NULL) &&
289 (attr->children->type == XML_TEXT_NODE) &&
290 (attr->children->next == NULL)) {
291#ifndef XML_USE_BUFFER_CONTENT
292 value = attr->children->content;
293#else
294 value = xmlBufferContent(attr->children->content);
295#endif
296 if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
297 (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
298 (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
299 ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
300 (!xmlStrcmp(value, BAD_CAST"content-type")) ||
301 (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
302 http = 1;
303 else if ((value != NULL) &&
304 ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
305 (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
306 (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
307 content = value;
308 if ((http != 0) && (content != NULL))
309 break;
310 }
311 attr = attr->next;
312 }
313 if ((http != 0) && (content != NULL)) {
314 meta = cur;
315 cur = cur->next;
316 xmlUnlinkNode(meta);
317 xmlFreeNode(meta);
318 continue;
319 }
320
321 }
322 }
323 cur = cur->next;
324 }
325 return(0);
326}
327
328/************************************************************************
329 * *
330 * Dumping HTML tree content to a simple buffer *
331 * *
332 ************************************************************************/
333
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000334static void
335htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
336
Daniel Veillard167b5091999-07-07 04:19:20 +0000337/**
338 * htmlDtdDump:
339 * @buf: the HTML buffer output
340 * @doc: the document
341 *
342 * Dump the HTML document DTD, if any.
343 */
344static void
345htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
346 xmlDtdPtr cur = doc->intSubset;
347
348 if (cur == NULL) {
349 fprintf(stderr, "htmlDtdDump : no internal subset\n");
350 return;
351 }
352 xmlBufferWriteChar(buf, "<!DOCTYPE ");
353 xmlBufferWriteCHAR(buf, cur->name);
354 if (cur->ExternalID != NULL) {
355 xmlBufferWriteChar(buf, " PUBLIC ");
356 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000357 if (cur->SystemID != NULL) {
358 xmlBufferWriteChar(buf, " ");
359 xmlBufferWriteQuotedString(buf, cur->SystemID);
360 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000361 } else if (cur->SystemID != NULL) {
362 xmlBufferWriteChar(buf, " SYSTEM ");
363 xmlBufferWriteQuotedString(buf, cur->SystemID);
364 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000365 xmlBufferWriteChar(buf, ">\n");
366}
367
368/**
369 * htmlAttrDump:
370 * @buf: the HTML buffer output
371 * @doc: the document
372 * @cur: the attribute pointer
373 *
374 * Dump an HTML attribute
375 */
376static void
377htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000378 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +0000379
380 if (cur == NULL) {
381 fprintf(stderr, "htmlAttrDump : property == NULL\n");
382 return;
383 }
384 xmlBufferWriteChar(buf, " ");
385 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +0000386 if (cur->children != NULL) {
387 value = xmlNodeListGetString(doc, cur->children, 0);
388 if (value) {
389 xmlBufferWriteChar(buf, "=");
390 xmlBufferWriteQuotedString(buf, value);
391 xmlFree(value);
392 } else {
393 xmlBufferWriteChar(buf, "=\"\"");
394 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000395 }
396}
397
398/**
399 * htmlAttrListDump:
400 * @buf: the HTML buffer output
401 * @doc: the document
402 * @cur: the first attribute pointer
403 *
404 * Dump a list of HTML attributes
405 */
406static void
407htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
408 if (cur == NULL) {
409 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
410 return;
411 }
412 while (cur != NULL) {
413 htmlAttrDump(buf, doc, cur);
414 cur = cur->next;
415 }
416}
417
418
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000419void
Daniel Veillard82150d81999-07-07 07:32:15 +0000420htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000421/**
422 * htmlNodeListDump:
423 * @buf: the HTML buffer output
424 * @doc: the document
425 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000426 *
427 * Dump an HTML node list, recursive behaviour,children are printed too.
428 */
429static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000430htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000431 if (cur == NULL) {
432 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
433 return;
434 }
435 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000436 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000437 cur = cur->next;
438 }
439}
440
441/**
442 * htmlNodeDump:
443 * @buf: the HTML buffer output
444 * @doc: the document
445 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000446 *
447 * Dump an HTML node, recursive behaviour,children are printed too.
448 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000449void
Daniel Veillard82150d81999-07-07 07:32:15 +0000450htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000451 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000452
453 if (cur == NULL) {
454 fprintf(stderr, "htmlNodeDump : node == NULL\n");
455 return;
456 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000457 /*
458 * Special cases.
459 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000460 if (cur->type == XML_DTD_NODE)
461 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000462 if (cur->type == XML_HTML_DOCUMENT_NODE) {
463 htmlDocContentDump(buf, (xmlDocPtr) cur);
464 return;
465 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000466 if (cur->type == HTML_TEXT_NODE) {
467 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000468 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000469
Daniel Veillardd293fd11999-12-01 09:51:45 +0000470#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000471 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000472#else
473 buffer = xmlEncodeEntitiesReentrant(doc,
474 xmlBufferContent(cur->content));
475#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000476 if (buffer != NULL) {
477 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000478 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000479 }
480 }
481 return;
482 }
483 if (cur->type == HTML_COMMENT_NODE) {
484 if (cur->content != NULL) {
485 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000486#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000487 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000488#else
489 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
490#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000491 xmlBufferWriteChar(buf, "-->");
492 }
493 return;
494 }
495 if (cur->type == HTML_ENTITY_REF_NODE) {
496 xmlBufferWriteChar(buf, "&");
497 xmlBufferWriteCHAR(buf, cur->name);
498 xmlBufferWriteChar(buf, ";");
499 return;
500 }
501
Daniel Veillard82150d81999-07-07 07:32:15 +0000502 /*
503 * Get specific HTmL info for taht node.
504 */
505 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000506
Daniel Veillard82150d81999-07-07 07:32:15 +0000507 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000508 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000509 if (cur->properties != NULL)
510 htmlAttrListDump(buf, doc, cur->properties);
511
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000512 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000513 xmlBufferWriteChar(buf, ">");
514 if (cur->next != NULL) {
515 if ((cur->next->type != HTML_TEXT_NODE) &&
516 (cur->next->type != HTML_ENTITY_REF_NODE))
517 xmlBufferWriteChar(buf, "\n");
518 }
519 return;
520 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000521 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000522 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000523 xmlBufferWriteChar(buf, ">");
524 else {
525 xmlBufferWriteChar(buf, "></");
526 xmlBufferWriteCHAR(buf, cur->name);
527 xmlBufferWriteChar(buf, ">");
528 }
529 if (cur->next != NULL) {
530 if ((cur->next->type != HTML_TEXT_NODE) &&
531 (cur->next->type != HTML_ENTITY_REF_NODE))
532 xmlBufferWriteChar(buf, "\n");
533 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000534 return;
535 }
536 xmlBufferWriteChar(buf, ">");
537 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000538 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000539
Daniel Veillardd293fd11999-12-01 09:51:45 +0000540#ifndef XML_USE_BUFFER_CONTENT
541 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
542#else
543 buffer = xmlEncodeEntitiesReentrant(doc,
544 xmlBufferContent(cur->content));
545#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000546 if (buffer != NULL) {
547 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000548 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000549 }
550 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000551 if (cur->children != NULL) {
552 if ((cur->children->type != HTML_TEXT_NODE) &&
553 (cur->children->type != HTML_ENTITY_REF_NODE) &&
554 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000555 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000556 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000557 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000558 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000559 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000560 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000561 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000562 if (!htmlIsAutoClosed(doc, cur)) {
563 xmlBufferWriteChar(buf, "</");
564 xmlBufferWriteCHAR(buf, cur->name);
565 xmlBufferWriteChar(buf, ">");
566 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000567 if (cur->next != NULL) {
568 if ((cur->next->type != HTML_TEXT_NODE) &&
569 (cur->next->type != HTML_ENTITY_REF_NODE))
570 xmlBufferWriteChar(buf, "\n");
571 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000572}
573
574/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000575 * htmlNodeDumpFile:
576 * @out: the FILE pointer
577 * @doc: the document
578 * @cur: the current node
579 *
580 * Dump an HTML node, recursive behaviour,children are printed too.
581 */
582void
583htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
584 xmlBufferPtr buf;
585
586 buf = xmlBufferCreate();
587 if (buf == NULL) return;
588 htmlNodeDump(buf, doc, cur);
589 xmlBufferDump(out, buf);
590 xmlBufferFree(buf);
591}
592
593/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000594 * htmlDocContentDump:
595 * @buf: the HTML buffer output
596 * @cur: the document
597 *
598 * Dump an HTML document.
599 */
600static void
601htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000602 int type;
603
604 /*
605 * force to output the stuff as HTML, especially for entities
606 */
607 type = cur->type;
608 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000609 if (cur->intSubset != NULL)
610 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000611 else {
612 /* Default to HTML-4.0 transitionnal @@@@ */
613 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
614
615 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000616 if (cur->children != NULL) {
617 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000618 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000619 xmlBufferWriteChar(buf, "\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000620 cur->type = (xmlElementType) type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000621}
622
623/**
624 * htmlDocDumpMemory:
625 * @cur: the document
626 * @mem: OUT: the memory pointer
627 * @size: OUT: the memory lenght
628 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000629 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000630 * It's up to the caller to free the memory.
631 */
632void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000633htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000634 xmlBufferPtr buf;
635
636 if (cur == NULL) {
637#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000638 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000639#endif
640 *mem = NULL;
641 *size = 0;
642 return;
643 }
644 buf = xmlBufferCreate();
645 if (buf == NULL) {
646 *mem = NULL;
647 *size = 0;
648 return;
649 }
650 htmlDocContentDump(buf, cur);
651 *mem = buf->content;
652 *size = buf->use;
653 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000654 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000655}
656
657
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000658/************************************************************************
659 * *
660 * Dumping HTML tree content to an I/O output buffer *
661 * *
662 ************************************************************************/
663
664static void
665htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
666
667/**
668 * htmlDtdDump:
669 * @buf: the HTML buffer output
670 * @doc: the document
671 *
672 * Dump the HTML document DTD, if any.
673 */
674static void
675htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
676 xmlDtdPtr cur = doc->intSubset;
677
678 if (cur == NULL) {
679 fprintf(stderr, "htmlDtdDump : no internal subset\n");
680 return;
681 }
682 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
683 xmlOutputBufferWriteString(buf, (const char *)cur->name);
684 if (cur->ExternalID != NULL) {
685 xmlOutputBufferWriteString(buf, " PUBLIC ");
686 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
687 if (cur->SystemID != NULL) {
688 xmlOutputBufferWriteString(buf, " ");
689 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
690 }
691 } else if (cur->SystemID != NULL) {
692 xmlOutputBufferWriteString(buf, " SYSTEM ");
693 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
694 }
695 xmlOutputBufferWriteString(buf, ">\n");
696}
697
698/**
699 * htmlAttrDump:
700 * @buf: the HTML buffer output
701 * @doc: the document
702 * @cur: the attribute pointer
703 *
704 * Dump an HTML attribute
705 */
706static void
707htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
708 xmlChar *value;
709
710 if (cur == NULL) {
711 fprintf(stderr, "htmlAttrDump : property == NULL\n");
712 return;
713 }
714 xmlOutputBufferWriteString(buf, " ");
715 xmlOutputBufferWriteString(buf, (const char *)cur->name);
716 if (cur->children != NULL) {
717 value = xmlNodeListGetString(doc, cur->children, 0);
718 if (value) {
719 xmlOutputBufferWriteString(buf, "=");
720 xmlBufferWriteQuotedString(buf->buffer, value);
721 xmlFree(value);
722 } else {
723 xmlOutputBufferWriteString(buf, "=\"\"");
724 }
725 }
726}
727
728/**
729 * htmlAttrListDump:
730 * @buf: the HTML buffer output
731 * @doc: the document
732 * @cur: the first attribute pointer
733 *
734 * Dump a list of HTML attributes
735 */
736static void
737htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
738 if (cur == NULL) {
739 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
740 return;
741 }
742 while (cur != NULL) {
743 htmlAttrDumpOutput(buf, doc, cur, encoding);
744 cur = cur->next;
745 }
746}
747
748
749void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
750 xmlNodePtr cur, const char *encoding);
751
752/**
753 * htmlNodeListDump:
754 * @buf: the HTML buffer output
755 * @doc: the document
756 * @cur: the first node
757 *
758 * Dump an HTML node list, recursive behaviour,children are printed too.
759 */
760static void
761htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
762 if (cur == NULL) {
763 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
764 return;
765 }
766 while (cur != NULL) {
767 htmlNodeDumpOutput(buf, doc, cur, encoding);
768 cur = cur->next;
769 }
770}
771
772/**
773 * htmlNodeDump:
774 * @buf: the HTML buffer output
775 * @doc: the document
776 * @cur: the current node
777 *
778 * Dump an HTML node, recursive behaviour,children are printed too.
779 */
780void
781htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
782 htmlElemDescPtr info;
783
784 if (cur == NULL) {
785 fprintf(stderr, "htmlNodeDump : node == NULL\n");
786 return;
787 }
788 /*
789 * Special cases.
790 */
791 if (cur->type == XML_DTD_NODE)
792 return;
793 if (cur->type == XML_HTML_DOCUMENT_NODE) {
794 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
795 return;
796 }
797 if (cur->type == HTML_TEXT_NODE) {
798 if (cur->content != NULL) {
799 xmlChar *buffer;
800
801#ifndef XML_USE_BUFFER_CONTENT
802 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
803#else
804 buffer = xmlEncodeEntitiesReentrant(doc,
805 xmlBufferContent(cur->content));
806#endif
807 if (buffer != NULL) {
808 xmlOutputBufferWriteString(buf, (const char *)buffer);
809 xmlFree(buffer);
810 }
811 }
812 return;
813 }
814 if (cur->type == HTML_COMMENT_NODE) {
815 if (cur->content != NULL) {
816 xmlOutputBufferWriteString(buf, "<!--");
817#ifndef XML_USE_BUFFER_CONTENT
818 xmlOutputBufferWriteString(buf, (const char *)cur->content);
819#else
820 xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
821#endif
822 xmlOutputBufferWriteString(buf, "-->");
823 }
824 return;
825 }
826 if (cur->type == HTML_ENTITY_REF_NODE) {
827 xmlOutputBufferWriteString(buf, "&");
828 xmlOutputBufferWriteString(buf, (const char *)cur->name);
829 xmlOutputBufferWriteString(buf, ";");
830 return;
831 }
832
833 /*
834 * Get specific HTmL info for taht node.
835 */
836 info = htmlTagLookup(cur->name);
837
838 xmlOutputBufferWriteString(buf, "<");
839 xmlOutputBufferWriteString(buf, (const char *)cur->name);
840 if (cur->properties != NULL)
841 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
842
843 if ((info != NULL) && (info->empty)) {
844 xmlOutputBufferWriteString(buf, ">");
845 if (cur->next != NULL) {
846 if ((cur->next->type != HTML_TEXT_NODE) &&
847 (cur->next->type != HTML_ENTITY_REF_NODE))
848 xmlOutputBufferWriteString(buf, "\n");
849 }
850 return;
851 }
852 if ((cur->content == NULL) && (cur->children == NULL)) {
853 if ((info != NULL) && (info->endTag != 0))
854 xmlOutputBufferWriteString(buf, ">");
855 else {
856 xmlOutputBufferWriteString(buf, "></");
857 xmlOutputBufferWriteString(buf, (const char *)cur->name);
858 xmlOutputBufferWriteString(buf, ">");
859 }
860 if (cur->next != NULL) {
861 if ((cur->next->type != HTML_TEXT_NODE) &&
862 (cur->next->type != HTML_ENTITY_REF_NODE))
863 xmlOutputBufferWriteString(buf, "\n");
864 }
865 return;
866 }
867 xmlOutputBufferWriteString(buf, ">");
868 if (cur->content != NULL) {
869#if 0
870 xmlChar *buffer;
871
872#ifndef XML_USE_BUFFER_CONTENT
873 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
874#else
875 buffer = xmlEncodeEntitiesReentrant(doc,
876 xmlBufferContent(cur->content));
877#endif
878 if (buffer != NULL) {
879 xmlOutputBufferWriteString(buf, buffer);
880 xmlFree(buffer);
881 }
882#else
883 /*
884 * Uses the OutputBuffer property to automatically convert
885 * invalids to charrefs
886 */
887
888#ifndef XML_USE_BUFFER_CONTENT
889 xmlOutputBufferWriteString(buf, (const char *) cur->content);
890#else
891 xmlOutputBufferWriteString(buf,
892 (const char *) xmlBufferContent(cur->content));
893#endif
894#endif
895 }
896 if (cur->children != NULL) {
897 if ((cur->children->type != HTML_TEXT_NODE) &&
898 (cur->children->type != HTML_ENTITY_REF_NODE) &&
899 (cur->children != cur->last))
900 xmlOutputBufferWriteString(buf, "\n");
901 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
902 if ((cur->last->type != HTML_TEXT_NODE) &&
903 (cur->last->type != HTML_ENTITY_REF_NODE) &&
904 (cur->children != cur->last))
905 xmlOutputBufferWriteString(buf, "\n");
906 }
907 if (!htmlIsAutoClosed(doc, cur)) {
908 xmlOutputBufferWriteString(buf, "</");
909 xmlOutputBufferWriteString(buf, (const char *)cur->name);
910 xmlOutputBufferWriteString(buf, ">");
911 }
912 if (cur->next != NULL) {
913 if ((cur->next->type != HTML_TEXT_NODE) &&
914 (cur->next->type != HTML_ENTITY_REF_NODE))
915 xmlOutputBufferWriteString(buf, "\n");
916 }
917}
918
919/**
920 * htmlDocContentDump:
921 * @buf: the HTML buffer output
922 * @cur: the document
923 *
924 * Dump an HTML document.
925 */
926static void
927htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
928 int type;
929
930 /*
931 * force to output the stuff as HTML, especially for entities
932 */
933 type = cur->type;
934 cur->type = XML_HTML_DOCUMENT_NODE;
935 if (cur->intSubset != NULL)
936 htmlDtdDumpOutput(buf, cur, NULL);
937 else {
938 /* Default to HTML-4.0 transitionnal @@@@ */
939 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
940
941 }
942 if (cur->children != NULL) {
943 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
944 }
945 xmlOutputBufferWriteString(buf, "\n");
946 cur->type = (xmlElementType) type;
947}
948
949
950/************************************************************************
951 * *
952 * Saving functions front-ends *
953 * *
954 ************************************************************************/
955
Daniel Veillard167b5091999-07-07 04:19:20 +0000956/**
957 * htmlDocDump:
958 * @f: the FILE*
959 * @cur: the document
960 *
961 * Dump an HTML document to an open FILE.
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000962 *
963 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard167b5091999-07-07 04:19:20 +0000964 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000965int
Daniel Veillard167b5091999-07-07 04:19:20 +0000966htmlDocDump(FILE *f, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000967 xmlOutputBufferPtr buf;
968 xmlCharEncodingHandlerPtr handler = NULL;
969 const char *encoding;
970 int ret;
Daniel Veillard167b5091999-07-07 04:19:20 +0000971
972 if (cur == NULL) {
973#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000974 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000975#endif
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000976 return(-1);
Daniel Veillard167b5091999-07-07 04:19:20 +0000977 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000978
979 encoding = (const char *) htmlGetMetaEncoding(cur);
980
981 if (encoding != NULL) {
982 xmlCharEncoding enc;
983
984 enc = xmlParseCharEncoding(encoding);
985 if (enc != cur->charset) {
986 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
987 /*
988 * Not supported yet
989 */
990 return(-1);
991 }
992
993 handler = xmlFindCharEncodingHandler(encoding);
994 if (handler == NULL)
995 return(-1);
996 }
997 }
998
999 /*
1000 * Fallback to HTML or ASCII when the encoding is unspecified
1001 */
1002 if (handler == NULL)
1003 handler = xmlFindCharEncodingHandler("HTML");
1004 if (handler == NULL)
1005 handler = xmlFindCharEncodingHandler("ascii");
1006
1007 buf = xmlOutputBufferCreateFile(f, handler);
1008 if (buf == NULL) return(-1);
1009 htmlDocContentDumpOutput(buf, cur, NULL);
1010
1011 ret = xmlOutputBufferClose(buf);
1012 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001013}
1014
1015/**
1016 * htmlSaveFile:
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001017 * @filename: the filename (or URL)
Daniel Veillard167b5091999-07-07 04:19:20 +00001018 * @cur: the document
1019 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001020 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1021 * used.
Daniel Veillard167b5091999-07-07 04:19:20 +00001022 * returns: the number of byte written or -1 in case of failure.
1023 */
1024int
1025htmlSaveFile(const char *filename, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001026 xmlOutputBufferPtr buf;
1027 xmlCharEncodingHandlerPtr handler = NULL;
1028 const char *encoding;
Daniel Veillard167b5091999-07-07 04:19:20 +00001029 int ret;
1030
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001031 encoding = (const char *) htmlGetMetaEncoding(cur);
1032
1033 if (encoding != NULL) {
1034 xmlCharEncoding enc;
1035
1036 enc = xmlParseCharEncoding(encoding);
1037 if (enc != cur->charset) {
1038 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1039 /*
1040 * Not supported yet
1041 */
1042 return(-1);
1043 }
1044
1045 handler = xmlFindCharEncodingHandler(encoding);
1046 if (handler == NULL)
1047 return(-1);
1048 }
1049 }
1050
1051 /*
1052 * Fallback to HTML or ASCII when the encoding is unspecified
1053 */
1054 if (handler == NULL)
1055 handler = xmlFindCharEncodingHandler("HTML");
1056 if (handler == NULL)
1057 handler = xmlFindCharEncodingHandler("ascii");
1058
Daniel Veillard167b5091999-07-07 04:19:20 +00001059 /*
1060 * save the content to a temp buffer.
1061 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001062 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
Daniel Veillard167b5091999-07-07 04:19:20 +00001063 if (buf == NULL) return(0);
Daniel Veillard167b5091999-07-07 04:19:20 +00001064
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001065 htmlDocContentDumpOutput(buf, cur, NULL);
Daniel Veillard167b5091999-07-07 04:19:20 +00001066
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001067 ret = xmlOutputBufferClose(buf);
1068 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001069}
1070
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001071/**
1072 * htmlSaveFileEnc:
1073 * @filename: the filename
1074 * @cur: the document
1075 *
1076 * Dump an HTML document to a file using a given encoding.
1077 *
1078 * returns: the number of byte written or -1 in case of failure.
1079 */
1080int
1081htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1082 xmlOutputBufferPtr buf;
1083 xmlCharEncodingHandlerPtr handler = NULL;
1084 int ret;
1085
1086 if (encoding != NULL) {
1087 xmlCharEncoding enc;
1088
1089 enc = xmlParseCharEncoding(encoding);
1090 if (enc != cur->charset) {
1091 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1092 /*
1093 * Not supported yet
1094 */
1095 return(-1);
1096 }
1097
1098 handler = xmlFindCharEncodingHandler(encoding);
1099 if (handler == NULL)
1100 return(-1);
1101 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1102 }
1103 }
1104
1105 /*
1106 * Fallback to HTML or ASCII when the encoding is unspecified
1107 */
1108 if (handler == NULL)
1109 handler = xmlFindCharEncodingHandler("HTML");
1110 if (handler == NULL)
1111 handler = xmlFindCharEncodingHandler("ascii");
1112
1113 /*
1114 * save the content to a temp buffer.
1115 */
1116 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1117 if (buf == NULL) return(0);
1118
1119 htmlDocContentDumpOutput(buf, cur, encoding);
1120
1121 ret = xmlOutputBufferClose(buf);
1122 return(ret);
1123}
Daniel Veillard361d8452000-04-03 19:48:13 +00001124#endif /* LIBXML_HTML_ENABLED */