blob: f57f65a0b1d9d1e26bfc7193cb63faedaa51fb71 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000027#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29/************************************************************************
30 * *
31 * Getting/Setting encoding meta tags *
32 * *
33 ************************************************************************/
34
35/**
36 * htmlGetMetaEncoding:
37 * @doc: the document
38 *
39 * Encoding definition lookup in the Meta tags
40 *
41 * Returns the current encoding as flagged in the HTML source
42 */
43const xmlChar *
44htmlGetMetaEncoding(htmlDocPtr doc) {
45 htmlNodePtr cur;
46 const xmlChar *content;
47 const xmlChar *encoding;
48
49 if (doc == NULL)
50 return(NULL);
51 cur = doc->children;
52
53 /*
54 * Search the html
55 */
56 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000057 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000058 if (xmlStrEqual(cur->name, BAD_CAST"html"))
59 break;
60 if (xmlStrEqual(cur->name, BAD_CAST"head"))
61 goto found_head;
62 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
63 goto found_meta;
64 }
65 cur = cur->next;
66 }
67 if (cur == NULL)
68 return(NULL);
69 cur = cur->children;
70
71 /*
72 * Search the head
73 */
74 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000075 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000076 if (xmlStrEqual(cur->name, BAD_CAST"head"))
77 break;
78 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 goto found_meta;
80 }
81 cur = cur->next;
82 }
83 if (cur == NULL)
84 return(NULL);
85found_head:
86 cur = cur->children;
87
88 /*
89 * Search the meta elements
90 */
91found_meta:
92 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000093 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000094 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
95 xmlAttrPtr attr = cur->properties;
96 int http;
97 const xmlChar *value;
98
99 content = NULL;
100 http = 0;
101 while (attr != NULL) {
102 if ((attr->children != NULL) &&
103 (attr->children->type == XML_TEXT_NODE) &&
104 (attr->children->next == NULL)) {
105#ifndef XML_USE_BUFFER_CONTENT
106 value = attr->children->content;
107#else
108 value = xmlBufferContent(attr->children->content);
109#endif
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000218 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000232 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000245 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000246
247 content = NULL;
248 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000249 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000250 while (attr != NULL) {
251 if ((attr->children != NULL) &&
252 (attr->children->type == XML_TEXT_NODE) &&
253 (attr->children->next == NULL)) {
254#ifndef XML_USE_BUFFER_CONTENT
255 value = attr->children->content;
256#else
257 value = xmlBufferContent(attr->children->content);
258#endif
259 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
260 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
261 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000262 else
263 {
264 if ((value != NULL) &&
265 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
266 content = value;
267 else
268 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
269 && (!xmlStrcasecmp(value, encoding)))
270 same_charset = 1;
271 }
272 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000273 break;
274 }
275 attr = attr->next;
276 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000277 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000278 meta = cur;
279 cur = cur->next;
280 xmlUnlinkNode(meta);
281 xmlFreeNode(meta);
282 continue;
283 }
284
285 }
286 }
287 cur = cur->next;
288 }
289 return(0);
290}
291
292/************************************************************************
293 * *
294 * Dumping HTML tree content to a simple buffer *
295 * *
296 ************************************************************************/
297
298static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000299htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000300static void
301htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
302 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304/**
305 * htmlDtdDump:
306 * @buf: the HTML buffer output
307 * @doc: the document
308 *
309 * Dump the HTML document DTD, if any.
310 */
311static void
312htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
313 xmlDtdPtr cur = doc->intSubset;
314
315 if (cur == NULL) {
316 xmlGenericError(xmlGenericErrorContext,
317 "htmlDtdDump : no internal subset\n");
318 return;
319 }
320 xmlBufferWriteChar(buf, "<!DOCTYPE ");
321 xmlBufferWriteCHAR(buf, cur->name);
322 if (cur->ExternalID != NULL) {
323 xmlBufferWriteChar(buf, " PUBLIC ");
324 xmlBufferWriteQuotedString(buf, cur->ExternalID);
325 if (cur->SystemID != NULL) {
326 xmlBufferWriteChar(buf, " ");
327 xmlBufferWriteQuotedString(buf, cur->SystemID);
328 }
329 } else if (cur->SystemID != NULL) {
330 xmlBufferWriteChar(buf, " SYSTEM ");
331 xmlBufferWriteQuotedString(buf, cur->SystemID);
332 }
333 xmlBufferWriteChar(buf, ">\n");
334}
335
336/**
337 * htmlAttrDump:
338 * @buf: the HTML buffer output
339 * @doc: the document
340 * @cur: the attribute pointer
341 *
342 * Dump an HTML attribute
343 */
344static void
345htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
346 xmlChar *value;
347
Daniel Veillardeca60d02001-06-13 07:45:41 +0000348 /*
349 * TODO: The html output method should not escape a & character
350 * occurring in an attribute value immediately followed by
351 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
352 */
353
Owen Taylor3473f882001-02-23 17:55:21 +0000354 if (cur == NULL) {
355 xmlGenericError(xmlGenericErrorContext,
356 "htmlAttrDump : property == NULL\n");
357 return;
358 }
359 xmlBufferWriteChar(buf, " ");
360 xmlBufferWriteCHAR(buf, cur->name);
361 if (cur->children != NULL) {
362 value = xmlNodeListGetString(doc, cur->children, 0);
363 if (value) {
364 xmlBufferWriteChar(buf, "=");
365 xmlBufferWriteQuotedString(buf, value);
366 xmlFree(value);
367 } else {
368 xmlBufferWriteChar(buf, "=\"\"");
369 }
370 }
371}
372
373/**
374 * htmlAttrListDump:
375 * @buf: the HTML buffer output
376 * @doc: the document
377 * @cur: the first attribute pointer
378 *
379 * Dump a list of HTML attributes
380 */
381static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000382htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
383 int i = 0;
384
Owen Taylor3473f882001-02-23 17:55:21 +0000385 if (cur == NULL) {
386 xmlGenericError(xmlGenericErrorContext,
387 "htmlAttrListDump : property == NULL\n");
388 return;
389 }
390 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000391 i++;
392 if ((format) && (i >= 5)) {
393 i = 0;
394 xmlBufferWriteChar(buf, "\n");
395 }
Owen Taylor3473f882001-02-23 17:55:21 +0000396 htmlAttrDump(buf, doc, cur);
397 cur = cur->next;
398 }
399}
400
Owen Taylor3473f882001-02-23 17:55:21 +0000401/**
402 * htmlNodeListDump:
403 * @buf: the HTML buffer output
404 * @doc: the document
405 * @cur: the first node
406 *
407 * Dump an HTML node list, recursive behaviour,children are printed too.
408 */
409static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000410htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000411 if (cur == NULL) {
412 xmlGenericError(xmlGenericErrorContext,
413 "htmlNodeListDump : node == NULL\n");
414 return;
415 }
416 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000417 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000418 cur = cur->next;
419 }
420}
421
422/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000423 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000424 * @buf: the HTML buffer output
425 * @doc: the document
426 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000427 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000428 *
429 * Dump an HTML node, recursive behaviour,children are printed too.
430 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000431static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000432htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
433 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000434 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000435
436 if (cur == NULL) {
437 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000438 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000439 return;
440 }
441 /*
442 * Special cases.
443 */
444 if (cur->type == XML_DTD_NODE)
445 return;
446 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000447 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000448 return;
449 }
450 if (cur->type == HTML_TEXT_NODE) {
451 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000452 if (((cur->name == (const xmlChar *)xmlStringText) ||
453 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000454 ((cur->parent == NULL) ||
455 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000456 xmlChar *buffer;
457
458#ifndef XML_USE_BUFFER_CONTENT
459 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
460#else
461 buffer = xmlEncodeEntitiesReentrant(doc,
462 xmlBufferContent(cur->content));
463#endif
464 if (buffer != NULL) {
465 xmlBufferWriteCHAR(buf, buffer);
466 xmlFree(buffer);
467 }
468 } else {
469 xmlBufferWriteCHAR(buf, cur->content);
470 }
471 }
472 return;
473 }
474 if (cur->type == HTML_COMMENT_NODE) {
475 if (cur->content != NULL) {
476 xmlBufferWriteChar(buf, "<!--");
477#ifndef XML_USE_BUFFER_CONTENT
478 xmlBufferWriteCHAR(buf, cur->content);
479#else
480 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
481#endif
482 xmlBufferWriteChar(buf, "-->");
483 }
484 return;
485 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000486 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000487 if (cur->name == NULL)
488 return;
489 xmlBufferWriteChar(buf, "<?");
490 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000491 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000492 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000493#ifndef XML_USE_BUFFER_CONTENT
494 xmlBufferWriteCHAR(buf, cur->content);
495#else
496 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
497#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000498 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000499 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000500 return;
501 }
Owen Taylor3473f882001-02-23 17:55:21 +0000502 if (cur->type == HTML_ENTITY_REF_NODE) {
503 xmlBufferWriteChar(buf, "&");
504 xmlBufferWriteCHAR(buf, cur->name);
505 xmlBufferWriteChar(buf, ";");
506 return;
507 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000508 if (cur->type == HTML_PRESERVE_NODE) {
509 if (cur->content != NULL) {
510#ifndef XML_USE_BUFFER_CONTENT
511 xmlBufferWriteCHAR(buf, cur->content);
512#else
513 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
514#endif
515 }
516 return;
517 }
Owen Taylor3473f882001-02-23 17:55:21 +0000518
519 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000520 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000521 */
522 info = htmlTagLookup(cur->name);
523
524 xmlBufferWriteChar(buf, "<");
525 xmlBufferWriteCHAR(buf, cur->name);
526 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000527 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000528
529 if ((info != NULL) && (info->empty)) {
530 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000531 if ((format) && (info != NULL) && (!info->isinline) &&
532 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000533 if ((cur->next->type != HTML_TEXT_NODE) &&
534 (cur->next->type != HTML_ENTITY_REF_NODE))
535 xmlBufferWriteChar(buf, "\n");
536 }
537 return;
538 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000539 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
540 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000541 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000542 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
543 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000544 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000545 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000546 xmlBufferWriteChar(buf, "></");
547 xmlBufferWriteCHAR(buf, cur->name);
548 xmlBufferWriteChar(buf, ">");
549 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000550 if ((format) && (info != NULL) && (!info->isinline) &&
551 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000552 if ((cur->next->type != HTML_TEXT_NODE) &&
553 (cur->next->type != HTML_ENTITY_REF_NODE))
554 xmlBufferWriteChar(buf, "\n");
555 }
556 return;
557 }
558 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000559 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000560 xmlChar *buffer;
561
562#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000563 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000564#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000565 buffer = xmlEncodeEntitiesReentrant(doc,
566 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000567#endif
568 if (buffer != NULL) {
569 xmlBufferWriteCHAR(buf, buffer);
570 xmlFree(buffer);
571 }
572 }
573 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000574 if ((format) && (info != NULL) && (!info->isinline) &&
575 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000576 (cur->children->type != HTML_ENTITY_REF_NODE) &&
577 (cur->children != cur->last))
578 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000579 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000580 if ((format) && (info != NULL) && (!info->isinline) &&
581 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000582 (cur->last->type != HTML_ENTITY_REF_NODE) &&
583 (cur->children != cur->last))
584 xmlBufferWriteChar(buf, "\n");
585 }
Owen Taylor3473f882001-02-23 17:55:21 +0000586 xmlBufferWriteChar(buf, "</");
587 xmlBufferWriteCHAR(buf, cur->name);
588 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000589 if ((format) && (info != NULL) && (!info->isinline) &&
590 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000591 if ((cur->next->type != HTML_TEXT_NODE) &&
592 (cur->next->type != HTML_ENTITY_REF_NODE))
593 xmlBufferWriteChar(buf, "\n");
594 }
595}
596
597/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000598 * htmlNodeDump:
599 * @buf: the HTML buffer output
600 * @doc: the document
601 * @cur: the current node
602 *
603 * Dump an HTML node, recursive behaviour,children are printed too,
604 * and formatting returns are added.
605 */
606void
607htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
608 htmlNodeDumpFormat(buf, doc, cur, 1);
609}
610
611/**
612 * htmlNodeDumpFileFormat:
613 * @out: the FILE pointer
614 * @doc: the document
615 * @cur: the current node
616 * @encoding: the document encoding
617 * @format: should formatting spaces been added
618 *
619 * Dump an HTML node, recursive behaviour,children are printed too.
620 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000621 * TODO: if encoding == NULL try to save in the doc encoding
622 *
623 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000624 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000625int
626htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
627 xmlNodePtr cur, const char *encoding, int format) {
628 xmlOutputBufferPtr buf;
629 xmlCharEncodingHandlerPtr handler = NULL;
630 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000631
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000632 if (encoding != NULL) {
633 xmlCharEncoding enc;
634
635 enc = xmlParseCharEncoding(encoding);
636 if (enc != XML_CHAR_ENCODING_UTF8) {
637 handler = xmlFindCharEncodingHandler(encoding);
638 if (handler == NULL)
639 return(-1);
640 }
641 }
642
643 /*
644 * Fallback to HTML or ASCII when the encoding is unspecified
645 */
646 if (handler == NULL)
647 handler = xmlFindCharEncodingHandler("HTML");
648 if (handler == NULL)
649 handler = xmlFindCharEncodingHandler("ascii");
650
651 /*
652 * save the content to a temp buffer.
653 */
654 buf = xmlOutputBufferCreateFile(out, handler);
655 if (buf == NULL) return(0);
656
657 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
658
659 ret = xmlOutputBufferClose(buf);
660 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000661}
662
663/**
Owen Taylor3473f882001-02-23 17:55:21 +0000664 * htmlNodeDumpFile:
665 * @out: the FILE pointer
666 * @doc: the document
667 * @cur: the current node
668 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000669 * Dump an HTML node, recursive behaviour,children are printed too,
670 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000671 */
672void
673htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000674 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000675}
676
677/**
678 * htmlDocContentDump:
679 * @buf: the HTML buffer output
680 * @cur: the document
681 *
682 * Dump an HTML document.
683 */
684static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000685htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000686 int type;
687
688 /*
689 * force to output the stuff as HTML, especially for entities
690 */
691 type = cur->type;
692 cur->type = XML_HTML_DOCUMENT_NODE;
693 if (cur->intSubset != NULL)
694 htmlDtdDump(buf, cur);
695 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000696 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000697 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
698
699 }
700 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000701 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000702 }
703 xmlBufferWriteChar(buf, "\n");
704 cur->type = (xmlElementType) type;
705}
706
707/**
708 * htmlDocDumpMemory:
709 * @cur: the document
710 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000711 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000712 *
713 * Dump an HTML document in memory and return the xmlChar * and it's size.
714 * It's up to the caller to free the memory.
715 */
716void
717htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000718 xmlOutputBufferPtr buf;
719 xmlCharEncodingHandlerPtr handler = NULL;
720 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000721
722 if (cur == NULL) {
723#ifdef DEBUG_TREE
724 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000725 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000726#endif
727 *mem = NULL;
728 *size = 0;
729 return;
730 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000731
732 encoding = (const char *) htmlGetMetaEncoding(cur);
733
734 if (encoding != NULL) {
735 xmlCharEncoding enc;
736
737 enc = xmlParseCharEncoding(encoding);
738 if (enc != cur->charset) {
739 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
740 /*
741 * Not supported yet
742 */
743 *mem = NULL;
744 *size = 0;
745 return;
746 }
747
748 handler = xmlFindCharEncodingHandler(encoding);
749 if (handler == NULL) {
750 *mem = NULL;
751 *size = 0;
752 return;
753 }
754 }
755 }
756
757 /*
758 * Fallback to HTML or ASCII when the encoding is unspecified
759 */
760 if (handler == NULL)
761 handler = xmlFindCharEncodingHandler("HTML");
762 if (handler == NULL)
763 handler = xmlFindCharEncodingHandler("ascii");
764
765 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000766 if (buf == NULL) {
767 *mem = NULL;
768 *size = 0;
769 return;
770 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000771
772 htmlDocContentDumpOutput(buf, cur, NULL);
773 xmlOutputBufferFlush(buf);
774 if (buf->conv != NULL) {
775 *size = buf->conv->use;
776 *mem = xmlStrndup(buf->conv->content, *size);
777 } else {
778 *size = buf->buffer->use;
779 *mem = xmlStrndup(buf->buffer->content, *size);
780 }
781 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000782}
783
784
785/************************************************************************
786 * *
787 * Dumping HTML tree content to an I/O output buffer *
788 * *
789 ************************************************************************/
790
Daniel Veillard95d845f2001-06-13 13:48:46 +0000791void
792htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
793 const char *encoding, int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000794/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000795 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000796 * @buf: the HTML buffer output
797 * @doc: the document
798 * @encoding: the encoding string
799 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000800 * TODO: check whether encoding is needed
801 *
Owen Taylor3473f882001-02-23 17:55:21 +0000802 * Dump the HTML document DTD, if any.
803 */
804static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000805htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000806 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000807 xmlDtdPtr cur = doc->intSubset;
808
809 if (cur == NULL) {
810 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000811 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000812 return;
813 }
814 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
815 xmlOutputBufferWriteString(buf, (const char *)cur->name);
816 if (cur->ExternalID != NULL) {
817 xmlOutputBufferWriteString(buf, " PUBLIC ");
818 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
819 if (cur->SystemID != NULL) {
820 xmlOutputBufferWriteString(buf, " ");
821 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
822 }
823 } else if (cur->SystemID != NULL) {
824 xmlOutputBufferWriteString(buf, " SYSTEM ");
825 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
826 }
827 xmlOutputBufferWriteString(buf, ">\n");
828}
829
830/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000831 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000832 * @buf: the HTML buffer output
833 * @doc: the document
834 * @cur: the attribute pointer
835 * @encoding: the encoding string
836 *
837 * Dump an HTML attribute
838 */
839static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000840htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000841 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000842 xmlChar *value;
843
Daniel Veillardeca60d02001-06-13 07:45:41 +0000844 /*
845 * TODO: The html output method should not escape a & character
846 * occurring in an attribute value immediately followed by
847 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
848 */
849
Owen Taylor3473f882001-02-23 17:55:21 +0000850 if (cur == NULL) {
851 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000852 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000853 return;
854 }
855 xmlOutputBufferWriteString(buf, " ");
856 xmlOutputBufferWriteString(buf, (const char *)cur->name);
857 if (cur->children != NULL) {
858 value = xmlNodeListGetString(doc, cur->children, 0);
859 if (value) {
860 xmlOutputBufferWriteString(buf, "=");
861 xmlBufferWriteQuotedString(buf->buffer, value);
862 xmlFree(value);
863 } else {
864 xmlOutputBufferWriteString(buf, "=\"\"");
865 }
866 }
867}
868
869/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000870 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000871 * @buf: the HTML buffer output
872 * @doc: the document
873 * @cur: the first attribute pointer
874 * @encoding: the encoding string
875 *
876 * Dump a list of HTML attributes
877 */
878static void
879htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
880 if (cur == NULL) {
881 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000882 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000883 return;
884 }
885 while (cur != NULL) {
886 htmlAttrDumpOutput(buf, doc, cur, encoding);
887 cur = cur->next;
888 }
889}
890
891
892void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
893 xmlNodePtr cur, const char *encoding);
894
895/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000896 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000897 * @buf: the HTML buffer output
898 * @doc: the document
899 * @cur: the first node
900 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000901 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000902 *
903 * Dump an HTML node list, recursive behaviour,children are printed too.
904 */
905static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000906htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
907 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000908 if (cur == NULL) {
909 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000910 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000911 return;
912 }
913 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000914 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000915 cur = cur->next;
916 }
917}
918
919/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000920 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000921 * @buf: the HTML buffer output
922 * @doc: the document
923 * @cur: the current node
924 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000925 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000926 *
927 * Dump an HTML node, recursive behaviour,children are printed too.
928 */
929void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000930htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
931 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000932 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000933
934 if (cur == NULL) {
935 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000936 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000937 return;
938 }
939 /*
940 * Special cases.
941 */
942 if (cur->type == XML_DTD_NODE)
943 return;
944 if (cur->type == XML_HTML_DOCUMENT_NODE) {
945 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
946 return;
947 }
948 if (cur->type == HTML_TEXT_NODE) {
949 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000950 if (((cur->name == (const xmlChar *)xmlStringText) ||
951 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000952 ((cur->parent == NULL) ||
953 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000954 xmlChar *buffer;
955
956#ifndef XML_USE_BUFFER_CONTENT
957 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
958#else
959 buffer = xmlEncodeEntitiesReentrant(doc,
960 xmlBufferContent(cur->content));
961#endif
962 if (buffer != NULL) {
963 xmlOutputBufferWriteString(buf, (const char *)buffer);
964 xmlFree(buffer);
965 }
966 } else {
967 xmlOutputBufferWriteString(buf, (const char *)cur->content);
968 }
969 }
970 return;
971 }
972 if (cur->type == HTML_COMMENT_NODE) {
973 if (cur->content != NULL) {
974 xmlOutputBufferWriteString(buf, "<!--");
975#ifndef XML_USE_BUFFER_CONTENT
976 xmlOutputBufferWriteString(buf, (const char *)cur->content);
977#else
978 xmlOutputBufferWriteString(buf, (const char *)
979 xmlBufferContent(cur->content));
980#endif
981 xmlOutputBufferWriteString(buf, "-->");
982 }
983 return;
984 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000985 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000986 if (cur->name == NULL)
987 return;
988 xmlOutputBufferWriteString(buf, "<?");
989 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000990 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000991 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000992#ifndef XML_USE_BUFFER_CONTENT
993 xmlOutputBufferWriteString(buf, (const char *)cur->content);
994#else
995 xmlOutputBufferWriteString(buf, (const char *)
996 xmlBufferContent(cur->content));
997#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000998 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000999 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +00001000 return;
1001 }
Owen Taylor3473f882001-02-23 17:55:21 +00001002 if (cur->type == HTML_ENTITY_REF_NODE) {
1003 xmlOutputBufferWriteString(buf, "&");
1004 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1005 xmlOutputBufferWriteString(buf, ";");
1006 return;
1007 }
1008 if (cur->type == HTML_PRESERVE_NODE) {
1009 if (cur->content != NULL) {
1010#ifndef XML_USE_BUFFER_CONTENT
1011 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1012#else
1013 xmlOutputBufferWriteString(buf, (const char *)
1014 xmlBufferContent(cur->content));
1015#endif
1016 }
1017 return;
1018 }
1019
1020 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001021 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +00001022 */
1023 info = htmlTagLookup(cur->name);
1024
1025 xmlOutputBufferWriteString(buf, "<");
1026 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1027 if (cur->properties != NULL)
1028 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1029
1030 if ((info != NULL) && (info->empty)) {
1031 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001032 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001033 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001034 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1035 (cur->parent != NULL) &&
1036 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001037 xmlOutputBufferWriteString(buf, "\n");
1038 }
1039 return;
1040 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001041 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1042 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001043 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001044 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1045 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001046 xmlOutputBufferWriteString(buf, ">");
1047 } else {
1048 xmlOutputBufferWriteString(buf, "></");
1049 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1050 xmlOutputBufferWriteString(buf, ">");
1051 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001052 if ((format) && (cur->next != NULL) &&
1053 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001054 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001055 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1056 (cur->parent != NULL) &&
1057 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001058 xmlOutputBufferWriteString(buf, "\n");
1059 }
1060 return;
1061 }
1062 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001063 if ((cur->type != XML_ELEMENT_NODE) &&
1064 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001065 /*
1066 * Uses the OutputBuffer property to automatically convert
1067 * invalids to charrefs
1068 */
1069
1070#ifndef XML_USE_BUFFER_CONTENT
1071 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1072#else
1073 xmlOutputBufferWriteString(buf,
1074 (const char *) xmlBufferContent(cur->content));
1075#endif
1076 }
1077 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001078 if ((format) && (info != NULL) && (!info->isinline) &&
1079 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001080 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001081 (cur->children != cur->last) &&
1082 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001083 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001084 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001085 if ((format) && (info != NULL) && (!info->isinline) &&
1086 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001087 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001088 (cur->children != cur->last) &&
1089 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001090 xmlOutputBufferWriteString(buf, "\n");
1091 }
Owen Taylor3473f882001-02-23 17:55:21 +00001092 xmlOutputBufferWriteString(buf, "</");
1093 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1094 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001095 if ((format) && (info != NULL) && (!info->isinline) &&
1096 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001097 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001098 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1099 (cur->parent != NULL) &&
1100 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001101 xmlOutputBufferWriteString(buf, "\n");
1102 }
1103}
1104
1105/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001106 * htmlNodeDumpOutput:
1107 * @buf: the HTML buffer output
1108 * @doc: the document
1109 * @cur: the current node
1110 * @encoding: the encoding string
1111 *
1112 * Dump an HTML node, recursive behaviour,children are printed too,
1113 * and formatting returns/spaces are added.
1114 */
1115void
1116htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1117 xmlNodePtr cur, const char *encoding) {
1118 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1119}
1120
1121/**
1122 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001123 * @buf: the HTML buffer output
1124 * @cur: the document
1125 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001126 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001127 *
1128 * Dump an HTML document.
1129 */
1130void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001131htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1132 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001133 int type;
1134
1135 /*
1136 * force to output the stuff as HTML, especially for entities
1137 */
1138 type = cur->type;
1139 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001140 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001141 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001142 }
1143 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001144 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001145 }
1146 xmlOutputBufferWriteString(buf, "\n");
1147 cur->type = (xmlElementType) type;
1148}
1149
Daniel Veillard95d845f2001-06-13 13:48:46 +00001150/**
1151 * htmlDocContentDumpOutput:
1152 * @buf: the HTML buffer output
1153 * @cur: the document
1154 * @encoding: the encoding string
1155 *
1156 * Dump an HTML document. Formating return/spaces are added.
1157 */
1158void
1159htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1160 const char *encoding) {
1161 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1162}
1163
Owen Taylor3473f882001-02-23 17:55:21 +00001164/************************************************************************
1165 * *
1166 * Saving functions front-ends *
1167 * *
1168 ************************************************************************/
1169
1170/**
1171 * htmlDocDump:
1172 * @f: the FILE*
1173 * @cur: the document
1174 *
1175 * Dump an HTML document to an open FILE.
1176 *
1177 * returns: the number of byte written or -1 in case of failure.
1178 */
1179int
1180htmlDocDump(FILE *f, xmlDocPtr cur) {
1181 xmlOutputBufferPtr buf;
1182 xmlCharEncodingHandlerPtr handler = NULL;
1183 const char *encoding;
1184 int ret;
1185
1186 if (cur == NULL) {
1187#ifdef DEBUG_TREE
1188 xmlGenericError(xmlGenericErrorContext,
1189 "htmlDocDump : document == NULL\n");
1190#endif
1191 return(-1);
1192 }
1193
1194 encoding = (const char *) htmlGetMetaEncoding(cur);
1195
1196 if (encoding != NULL) {
1197 xmlCharEncoding enc;
1198
1199 enc = xmlParseCharEncoding(encoding);
1200 if (enc != cur->charset) {
1201 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1202 /*
1203 * Not supported yet
1204 */
1205 return(-1);
1206 }
1207
1208 handler = xmlFindCharEncodingHandler(encoding);
1209 if (handler == NULL)
1210 return(-1);
1211 }
1212 }
1213
1214 /*
1215 * Fallback to HTML or ASCII when the encoding is unspecified
1216 */
1217 if (handler == NULL)
1218 handler = xmlFindCharEncodingHandler("HTML");
1219 if (handler == NULL)
1220 handler = xmlFindCharEncodingHandler("ascii");
1221
1222 buf = xmlOutputBufferCreateFile(f, handler);
1223 if (buf == NULL) return(-1);
1224 htmlDocContentDumpOutput(buf, cur, NULL);
1225
1226 ret = xmlOutputBufferClose(buf);
1227 return(ret);
1228}
1229
1230/**
1231 * htmlSaveFile:
1232 * @filename: the filename (or URL)
1233 * @cur: the document
1234 *
1235 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1236 * used.
1237 * returns: the number of byte written or -1 in case of failure.
1238 */
1239int
1240htmlSaveFile(const char *filename, xmlDocPtr cur) {
1241 xmlOutputBufferPtr buf;
1242 xmlCharEncodingHandlerPtr handler = NULL;
1243 const char *encoding;
1244 int ret;
1245
1246 encoding = (const char *) htmlGetMetaEncoding(cur);
1247
1248 if (encoding != NULL) {
1249 xmlCharEncoding enc;
1250
1251 enc = xmlParseCharEncoding(encoding);
1252 if (enc != cur->charset) {
1253 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1254 /*
1255 * Not supported yet
1256 */
1257 return(-1);
1258 }
1259
1260 handler = xmlFindCharEncodingHandler(encoding);
1261 if (handler == NULL)
1262 return(-1);
1263 }
1264 }
1265
1266 /*
1267 * Fallback to HTML or ASCII when the encoding is unspecified
1268 */
1269 if (handler == NULL)
1270 handler = xmlFindCharEncodingHandler("HTML");
1271 if (handler == NULL)
1272 handler = xmlFindCharEncodingHandler("ascii");
1273
1274 /*
1275 * save the content to a temp buffer.
1276 */
1277 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1278 if (buf == NULL) return(0);
1279
1280 htmlDocContentDumpOutput(buf, cur, NULL);
1281
1282 ret = xmlOutputBufferClose(buf);
1283 return(ret);
1284}
1285
1286/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001287 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001288 * @filename: the filename
1289 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001290 * @format: should formatting spaces been added
1291 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001292 *
1293 * Dump an HTML document to a file using a given encoding.
1294 *
1295 * returns: the number of byte written or -1 in case of failure.
1296 */
1297int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001298htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1299 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001300 xmlOutputBufferPtr buf;
1301 xmlCharEncodingHandlerPtr handler = NULL;
1302 int ret;
1303
1304 if (encoding != NULL) {
1305 xmlCharEncoding enc;
1306
1307 enc = xmlParseCharEncoding(encoding);
1308 if (enc != cur->charset) {
1309 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1310 /*
1311 * Not supported yet
1312 */
1313 return(-1);
1314 }
1315
1316 handler = xmlFindCharEncodingHandler(encoding);
1317 if (handler == NULL)
1318 return(-1);
1319 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1320 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001321 } else {
1322 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001323 }
1324
1325 /*
1326 * Fallback to HTML or ASCII when the encoding is unspecified
1327 */
1328 if (handler == NULL)
1329 handler = xmlFindCharEncodingHandler("HTML");
1330 if (handler == NULL)
1331 handler = xmlFindCharEncodingHandler("ascii");
1332
1333 /*
1334 * save the content to a temp buffer.
1335 */
1336 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1337 if (buf == NULL) return(0);
1338
Daniel Veillard95d845f2001-06-13 13:48:46 +00001339 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001340
1341 ret = xmlOutputBufferClose(buf);
1342 return(ret);
1343}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001344
1345/**
1346 * htmlSaveFileEnc:
1347 * @filename: the filename
1348 * @cur: the document
1349 * @encoding: the document encoding
1350 *
1351 * Dump an HTML document to a file using a given encoding
1352 * and formatting returns/spaces are added.
1353 *
1354 * returns: the number of byte written or -1 in case of failure.
1355 */
1356int
1357htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1358 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1359}
1360
Owen Taylor3473f882001-02-23 17:55:21 +00001361#endif /* LIBXML_HTML_ENABLED */