blob: c0e005011ea85c9062943cb1a7a47993ca3d0076 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000027#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29/************************************************************************
30 * *
31 * Getting/Setting encoding meta tags *
32 * *
33 ************************************************************************/
34
35/**
36 * htmlGetMetaEncoding:
37 * @doc: the document
38 *
39 * Encoding definition lookup in the Meta tags
40 *
41 * Returns the current encoding as flagged in the HTML source
42 */
43const xmlChar *
44htmlGetMetaEncoding(htmlDocPtr doc) {
45 htmlNodePtr cur;
46 const xmlChar *content;
47 const xmlChar *encoding;
48
49 if (doc == NULL)
50 return(NULL);
51 cur = doc->children;
52
53 /*
54 * Search the html
55 */
56 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000057 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000058 if (xmlStrEqual(cur->name, BAD_CAST"html"))
59 break;
60 if (xmlStrEqual(cur->name, BAD_CAST"head"))
61 goto found_head;
62 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
63 goto found_meta;
64 }
65 cur = cur->next;
66 }
67 if (cur == NULL)
68 return(NULL);
69 cur = cur->children;
70
71 /*
72 * Search the head
73 */
74 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000075 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000076 if (xmlStrEqual(cur->name, BAD_CAST"head"))
77 break;
78 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 goto found_meta;
80 }
81 cur = cur->next;
82 }
83 if (cur == NULL)
84 return(NULL);
85found_head:
86 cur = cur->children;
87
88 /*
89 * Search the meta elements
90 */
91found_meta:
92 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000093 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000094 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
95 xmlAttrPtr attr = cur->properties;
96 int http;
97 const xmlChar *value;
98
99 content = NULL;
100 http = 0;
101 while (attr != NULL) {
102 if ((attr->children != NULL) &&
103 (attr->children->type == XML_TEXT_NODE) &&
104 (attr->children->next == NULL)) {
105#ifndef XML_USE_BUFFER_CONTENT
106 value = attr->children->content;
107#else
108 value = xmlBufferContent(attr->children->content);
109#endif
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000218 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000232 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
245
246 content = NULL;
247 http = 0;
248 while (attr != NULL) {
249 if ((attr->children != NULL) &&
250 (attr->children->type == XML_TEXT_NODE) &&
251 (attr->children->next == NULL)) {
252#ifndef XML_USE_BUFFER_CONTENT
253 value = attr->children->content;
254#else
255 value = xmlBufferContent(attr->children->content);
256#endif
257 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
258 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
259 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000260 else
261 {
262 if ((value != NULL) &&
263 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
264 content = value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000265 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000266 if ((http != 0) && (content != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +0000267 break;
268 }
269 attr = attr->next;
270 }
Daniel Veillard4e0e2972002-03-06 21:39:42 +0000271 if ((http != 0) && (content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000272 meta = cur;
273 cur = cur->next;
274 xmlUnlinkNode(meta);
275 xmlFreeNode(meta);
276 continue;
277 }
278
279 }
280 }
281 cur = cur->next;
282 }
283 return(0);
284}
285
286/************************************************************************
287 * *
288 * Dumping HTML tree content to a simple buffer *
289 * *
290 ************************************************************************/
291
292static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000293htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000294static void
295htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
296 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000297
298/**
299 * htmlDtdDump:
300 * @buf: the HTML buffer output
301 * @doc: the document
302 *
303 * Dump the HTML document DTD, if any.
304 */
305static void
306htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
307 xmlDtdPtr cur = doc->intSubset;
308
309 if (cur == NULL) {
310 xmlGenericError(xmlGenericErrorContext,
311 "htmlDtdDump : no internal subset\n");
312 return;
313 }
314 xmlBufferWriteChar(buf, "<!DOCTYPE ");
315 xmlBufferWriteCHAR(buf, cur->name);
316 if (cur->ExternalID != NULL) {
317 xmlBufferWriteChar(buf, " PUBLIC ");
318 xmlBufferWriteQuotedString(buf, cur->ExternalID);
319 if (cur->SystemID != NULL) {
320 xmlBufferWriteChar(buf, " ");
321 xmlBufferWriteQuotedString(buf, cur->SystemID);
322 }
323 } else if (cur->SystemID != NULL) {
324 xmlBufferWriteChar(buf, " SYSTEM ");
325 xmlBufferWriteQuotedString(buf, cur->SystemID);
326 }
327 xmlBufferWriteChar(buf, ">\n");
328}
329
330/**
331 * htmlAttrDump:
332 * @buf: the HTML buffer output
333 * @doc: the document
334 * @cur: the attribute pointer
335 *
336 * Dump an HTML attribute
337 */
338static void
339htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
340 xmlChar *value;
341
Daniel Veillardeca60d02001-06-13 07:45:41 +0000342 /*
343 * TODO: The html output method should not escape a & character
344 * occurring in an attribute value immediately followed by
345 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
346 */
347
Owen Taylor3473f882001-02-23 17:55:21 +0000348 if (cur == NULL) {
349 xmlGenericError(xmlGenericErrorContext,
350 "htmlAttrDump : property == NULL\n");
351 return;
352 }
353 xmlBufferWriteChar(buf, " ");
354 xmlBufferWriteCHAR(buf, cur->name);
355 if (cur->children != NULL) {
356 value = xmlNodeListGetString(doc, cur->children, 0);
357 if (value) {
358 xmlBufferWriteChar(buf, "=");
359 xmlBufferWriteQuotedString(buf, value);
360 xmlFree(value);
361 } else {
362 xmlBufferWriteChar(buf, "=\"\"");
363 }
364 }
365}
366
367/**
368 * htmlAttrListDump:
369 * @buf: the HTML buffer output
370 * @doc: the document
371 * @cur: the first attribute pointer
372 *
373 * Dump a list of HTML attributes
374 */
375static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000376htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
377 int i = 0;
378
Owen Taylor3473f882001-02-23 17:55:21 +0000379 if (cur == NULL) {
380 xmlGenericError(xmlGenericErrorContext,
381 "htmlAttrListDump : property == NULL\n");
382 return;
383 }
384 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000385 i++;
386 if ((format) && (i >= 5)) {
387 i = 0;
388 xmlBufferWriteChar(buf, "\n");
389 }
Owen Taylor3473f882001-02-23 17:55:21 +0000390 htmlAttrDump(buf, doc, cur);
391 cur = cur->next;
392 }
393}
394
Owen Taylor3473f882001-02-23 17:55:21 +0000395/**
396 * htmlNodeListDump:
397 * @buf: the HTML buffer output
398 * @doc: the document
399 * @cur: the first node
400 *
401 * Dump an HTML node list, recursive behaviour,children are printed too.
402 */
403static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000404htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000405 if (cur == NULL) {
406 xmlGenericError(xmlGenericErrorContext,
407 "htmlNodeListDump : node == NULL\n");
408 return;
409 }
410 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000411 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000412 cur = cur->next;
413 }
414}
415
416/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000417 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000418 * @buf: the HTML buffer output
419 * @doc: the document
420 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000421 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000422 *
423 * Dump an HTML node, recursive behaviour,children are printed too.
424 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000425static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000426htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
427 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000428 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000429
430 if (cur == NULL) {
431 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000432 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000433 return;
434 }
435 /*
436 * Special cases.
437 */
438 if (cur->type == XML_DTD_NODE)
439 return;
440 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000441 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000442 return;
443 }
444 if (cur->type == HTML_TEXT_NODE) {
445 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000446 if (((cur->name == (const xmlChar *)xmlStringText) ||
447 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000448 ((cur->parent == NULL) ||
449 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000450 xmlChar *buffer;
451
452#ifndef XML_USE_BUFFER_CONTENT
453 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
454#else
455 buffer = xmlEncodeEntitiesReentrant(doc,
456 xmlBufferContent(cur->content));
457#endif
458 if (buffer != NULL) {
459 xmlBufferWriteCHAR(buf, buffer);
460 xmlFree(buffer);
461 }
462 } else {
463 xmlBufferWriteCHAR(buf, cur->content);
464 }
465 }
466 return;
467 }
468 if (cur->type == HTML_COMMENT_NODE) {
469 if (cur->content != NULL) {
470 xmlBufferWriteChar(buf, "<!--");
471#ifndef XML_USE_BUFFER_CONTENT
472 xmlBufferWriteCHAR(buf, cur->content);
473#else
474 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
475#endif
476 xmlBufferWriteChar(buf, "-->");
477 }
478 return;
479 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000480 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000481 if (cur->name == NULL)
482 return;
483 xmlBufferWriteChar(buf, "<?");
484 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000485 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000486 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000487#ifndef XML_USE_BUFFER_CONTENT
488 xmlBufferWriteCHAR(buf, cur->content);
489#else
490 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
491#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000492 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000493 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000494 return;
495 }
Owen Taylor3473f882001-02-23 17:55:21 +0000496 if (cur->type == HTML_ENTITY_REF_NODE) {
497 xmlBufferWriteChar(buf, "&");
498 xmlBufferWriteCHAR(buf, cur->name);
499 xmlBufferWriteChar(buf, ";");
500 return;
501 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000502 if (cur->type == HTML_PRESERVE_NODE) {
503 if (cur->content != NULL) {
504#ifndef XML_USE_BUFFER_CONTENT
505 xmlBufferWriteCHAR(buf, cur->content);
506#else
507 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
508#endif
509 }
510 return;
511 }
Owen Taylor3473f882001-02-23 17:55:21 +0000512
513 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000514 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000515 */
516 info = htmlTagLookup(cur->name);
517
518 xmlBufferWriteChar(buf, "<");
519 xmlBufferWriteCHAR(buf, cur->name);
520 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000521 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000522
523 if ((info != NULL) && (info->empty)) {
524 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000525 if ((format) && (info != NULL) && (!info->isinline) &&
526 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000527 if ((cur->next->type != HTML_TEXT_NODE) &&
528 (cur->next->type != HTML_ENTITY_REF_NODE))
529 xmlBufferWriteChar(buf, "\n");
530 }
531 return;
532 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000533 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
534 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000535 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000536 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
537 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000538 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000539 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000540 xmlBufferWriteChar(buf, "></");
541 xmlBufferWriteCHAR(buf, cur->name);
542 xmlBufferWriteChar(buf, ">");
543 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000544 if ((format) && (info != NULL) && (!info->isinline) &&
545 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000546 if ((cur->next->type != HTML_TEXT_NODE) &&
547 (cur->next->type != HTML_ENTITY_REF_NODE))
548 xmlBufferWriteChar(buf, "\n");
549 }
550 return;
551 }
552 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000553 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000554 xmlChar *buffer;
555
556#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000557 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000558#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000559 buffer = xmlEncodeEntitiesReentrant(doc,
560 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000561#endif
562 if (buffer != NULL) {
563 xmlBufferWriteCHAR(buf, buffer);
564 xmlFree(buffer);
565 }
566 }
567 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000568 if ((format) && (info != NULL) && (!info->isinline) &&
569 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000570 (cur->children->type != HTML_ENTITY_REF_NODE) &&
571 (cur->children != cur->last))
572 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000573 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000574 if ((format) && (info != NULL) && (!info->isinline) &&
575 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000576 (cur->last->type != HTML_ENTITY_REF_NODE) &&
577 (cur->children != cur->last))
578 xmlBufferWriteChar(buf, "\n");
579 }
Owen Taylor3473f882001-02-23 17:55:21 +0000580 xmlBufferWriteChar(buf, "</");
581 xmlBufferWriteCHAR(buf, cur->name);
582 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000583 if ((format) && (info != NULL) && (!info->isinline) &&
584 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000585 if ((cur->next->type != HTML_TEXT_NODE) &&
586 (cur->next->type != HTML_ENTITY_REF_NODE))
587 xmlBufferWriteChar(buf, "\n");
588 }
589}
590
591/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000592 * htmlNodeDump:
593 * @buf: the HTML buffer output
594 * @doc: the document
595 * @cur: the current node
596 *
597 * Dump an HTML node, recursive behaviour,children are printed too,
598 * and formatting returns are added.
599 */
600void
601htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
602 htmlNodeDumpFormat(buf, doc, cur, 1);
603}
604
605/**
606 * htmlNodeDumpFileFormat:
607 * @out: the FILE pointer
608 * @doc: the document
609 * @cur: the current node
610 * @encoding: the document encoding
611 * @format: should formatting spaces been added
612 *
613 * Dump an HTML node, recursive behaviour,children are printed too.
614 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000615 * TODO: if encoding == NULL try to save in the doc encoding
616 *
617 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000618 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000619int
620htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
621 xmlNodePtr cur, const char *encoding, int format) {
622 xmlOutputBufferPtr buf;
623 xmlCharEncodingHandlerPtr handler = NULL;
624 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000625
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000626 if (encoding != NULL) {
627 xmlCharEncoding enc;
628
629 enc = xmlParseCharEncoding(encoding);
630 if (enc != XML_CHAR_ENCODING_UTF8) {
631 handler = xmlFindCharEncodingHandler(encoding);
632 if (handler == NULL)
633 return(-1);
634 }
635 }
636
637 /*
638 * Fallback to HTML or ASCII when the encoding is unspecified
639 */
640 if (handler == NULL)
641 handler = xmlFindCharEncodingHandler("HTML");
642 if (handler == NULL)
643 handler = xmlFindCharEncodingHandler("ascii");
644
645 /*
646 * save the content to a temp buffer.
647 */
648 buf = xmlOutputBufferCreateFile(out, handler);
649 if (buf == NULL) return(0);
650
651 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
652
653 ret = xmlOutputBufferClose(buf);
654 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000655}
656
657/**
Owen Taylor3473f882001-02-23 17:55:21 +0000658 * htmlNodeDumpFile:
659 * @out: the FILE pointer
660 * @doc: the document
661 * @cur: the current node
662 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000663 * Dump an HTML node, recursive behaviour,children are printed too,
664 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000665 */
666void
667htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000668 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000669}
670
671/**
672 * htmlDocContentDump:
673 * @buf: the HTML buffer output
674 * @cur: the document
675 *
676 * Dump an HTML document.
677 */
678static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000679htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000680 int type;
681
682 /*
683 * force to output the stuff as HTML, especially for entities
684 */
685 type = cur->type;
686 cur->type = XML_HTML_DOCUMENT_NODE;
687 if (cur->intSubset != NULL)
688 htmlDtdDump(buf, cur);
689 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000690 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000691 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
692
693 }
694 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000695 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000696 }
697 xmlBufferWriteChar(buf, "\n");
698 cur->type = (xmlElementType) type;
699}
700
701/**
702 * htmlDocDumpMemory:
703 * @cur: the document
704 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000705 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000706 *
707 * Dump an HTML document in memory and return the xmlChar * and it's size.
708 * It's up to the caller to free the memory.
709 */
710void
711htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000712 xmlOutputBufferPtr buf;
713 xmlCharEncodingHandlerPtr handler = NULL;
714 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000715
716 if (cur == NULL) {
717#ifdef DEBUG_TREE
718 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000719 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000720#endif
721 *mem = NULL;
722 *size = 0;
723 return;
724 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000725
726 encoding = (const char *) htmlGetMetaEncoding(cur);
727
728 if (encoding != NULL) {
729 xmlCharEncoding enc;
730
731 enc = xmlParseCharEncoding(encoding);
732 if (enc != cur->charset) {
733 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
734 /*
735 * Not supported yet
736 */
737 *mem = NULL;
738 *size = 0;
739 return;
740 }
741
742 handler = xmlFindCharEncodingHandler(encoding);
743 if (handler == NULL) {
744 *mem = NULL;
745 *size = 0;
746 return;
747 }
748 }
749 }
750
751 /*
752 * Fallback to HTML or ASCII when the encoding is unspecified
753 */
754 if (handler == NULL)
755 handler = xmlFindCharEncodingHandler("HTML");
756 if (handler == NULL)
757 handler = xmlFindCharEncodingHandler("ascii");
758
759 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000760 if (buf == NULL) {
761 *mem = NULL;
762 *size = 0;
763 return;
764 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000765
766 htmlDocContentDumpOutput(buf, cur, NULL);
767 xmlOutputBufferFlush(buf);
768 if (buf->conv != NULL) {
769 *size = buf->conv->use;
770 *mem = xmlStrndup(buf->conv->content, *size);
771 } else {
772 *size = buf->buffer->use;
773 *mem = xmlStrndup(buf->buffer->content, *size);
774 }
775 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000776}
777
778
779/************************************************************************
780 * *
781 * Dumping HTML tree content to an I/O output buffer *
782 * *
783 ************************************************************************/
784
785/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000786 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000787 * @buf: the HTML buffer output
788 * @doc: the document
789 * @encoding: the encoding string
790 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000791 * TODO: check whether encoding is needed
792 *
Owen Taylor3473f882001-02-23 17:55:21 +0000793 * Dump the HTML document DTD, if any.
794 */
795static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000796htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000797 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000798 xmlDtdPtr cur = doc->intSubset;
799
800 if (cur == NULL) {
801 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000802 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000803 return;
804 }
805 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
806 xmlOutputBufferWriteString(buf, (const char *)cur->name);
807 if (cur->ExternalID != NULL) {
808 xmlOutputBufferWriteString(buf, " PUBLIC ");
809 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
810 if (cur->SystemID != NULL) {
811 xmlOutputBufferWriteString(buf, " ");
812 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
813 }
814 } else if (cur->SystemID != NULL) {
815 xmlOutputBufferWriteString(buf, " SYSTEM ");
816 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
817 }
818 xmlOutputBufferWriteString(buf, ">\n");
819}
820
821/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000822 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000823 * @buf: the HTML buffer output
824 * @doc: the document
825 * @cur: the attribute pointer
826 * @encoding: the encoding string
827 *
828 * Dump an HTML attribute
829 */
830static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000831htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000832 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000833 xmlChar *value;
834
Daniel Veillardeca60d02001-06-13 07:45:41 +0000835 /*
836 * TODO: The html output method should not escape a & character
837 * occurring in an attribute value immediately followed by
838 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
839 */
840
Owen Taylor3473f882001-02-23 17:55:21 +0000841 if (cur == NULL) {
842 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000843 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000844 return;
845 }
846 xmlOutputBufferWriteString(buf, " ");
847 xmlOutputBufferWriteString(buf, (const char *)cur->name);
848 if (cur->children != NULL) {
849 value = xmlNodeListGetString(doc, cur->children, 0);
850 if (value) {
851 xmlOutputBufferWriteString(buf, "=");
852 xmlBufferWriteQuotedString(buf->buffer, value);
853 xmlFree(value);
854 } else {
855 xmlOutputBufferWriteString(buf, "=\"\"");
856 }
857 }
858}
859
860/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000861 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000862 * @buf: the HTML buffer output
863 * @doc: the document
864 * @cur: the first attribute pointer
865 * @encoding: the encoding string
866 *
867 * Dump a list of HTML attributes
868 */
869static void
870htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
871 if (cur == NULL) {
872 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000873 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000874 return;
875 }
876 while (cur != NULL) {
877 htmlAttrDumpOutput(buf, doc, cur, encoding);
878 cur = cur->next;
879 }
880}
881
882
883void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
884 xmlNodePtr cur, const char *encoding);
885
886/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000887 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000888 * @buf: the HTML buffer output
889 * @doc: the document
890 * @cur: the first node
891 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000892 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000893 *
894 * Dump an HTML node list, recursive behaviour,children are printed too.
895 */
896static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000897htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
898 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000899 if (cur == NULL) {
900 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000901 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000902 return;
903 }
904 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000905 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000906 cur = cur->next;
907 }
908}
909
910/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000911 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000912 * @buf: the HTML buffer output
913 * @doc: the document
914 * @cur: the current node
915 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000916 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000917 *
918 * Dump an HTML node, recursive behaviour,children are printed too.
919 */
920void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000921htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
922 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000923 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000924
925 if (cur == NULL) {
926 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000927 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000928 return;
929 }
930 /*
931 * Special cases.
932 */
933 if (cur->type == XML_DTD_NODE)
934 return;
935 if (cur->type == XML_HTML_DOCUMENT_NODE) {
936 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
937 return;
938 }
939 if (cur->type == HTML_TEXT_NODE) {
940 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000941 if (((cur->name == (const xmlChar *)xmlStringText) ||
942 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000943 ((cur->parent == NULL) ||
944 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000945 xmlChar *buffer;
946
947#ifndef XML_USE_BUFFER_CONTENT
948 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
949#else
950 buffer = xmlEncodeEntitiesReentrant(doc,
951 xmlBufferContent(cur->content));
952#endif
953 if (buffer != NULL) {
954 xmlOutputBufferWriteString(buf, (const char *)buffer);
955 xmlFree(buffer);
956 }
957 } else {
958 xmlOutputBufferWriteString(buf, (const char *)cur->content);
959 }
960 }
961 return;
962 }
963 if (cur->type == HTML_COMMENT_NODE) {
964 if (cur->content != NULL) {
965 xmlOutputBufferWriteString(buf, "<!--");
966#ifndef XML_USE_BUFFER_CONTENT
967 xmlOutputBufferWriteString(buf, (const char *)cur->content);
968#else
969 xmlOutputBufferWriteString(buf, (const char *)
970 xmlBufferContent(cur->content));
971#endif
972 xmlOutputBufferWriteString(buf, "-->");
973 }
974 return;
975 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000976 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000977 if (cur->name == NULL)
978 return;
979 xmlOutputBufferWriteString(buf, "<?");
980 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000981 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000982 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000983#ifndef XML_USE_BUFFER_CONTENT
984 xmlOutputBufferWriteString(buf, (const char *)cur->content);
985#else
986 xmlOutputBufferWriteString(buf, (const char *)
987 xmlBufferContent(cur->content));
988#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000989 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000990 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000991 return;
992 }
Owen Taylor3473f882001-02-23 17:55:21 +0000993 if (cur->type == HTML_ENTITY_REF_NODE) {
994 xmlOutputBufferWriteString(buf, "&");
995 xmlOutputBufferWriteString(buf, (const char *)cur->name);
996 xmlOutputBufferWriteString(buf, ";");
997 return;
998 }
999 if (cur->type == HTML_PRESERVE_NODE) {
1000 if (cur->content != NULL) {
1001#ifndef XML_USE_BUFFER_CONTENT
1002 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1003#else
1004 xmlOutputBufferWriteString(buf, (const char *)
1005 xmlBufferContent(cur->content));
1006#endif
1007 }
1008 return;
1009 }
1010
1011 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001012 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +00001013 */
1014 info = htmlTagLookup(cur->name);
1015
1016 xmlOutputBufferWriteString(buf, "<");
1017 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1018 if (cur->properties != NULL)
1019 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1020
1021 if ((info != NULL) && (info->empty)) {
1022 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001023 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001024 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001025 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1026 (cur->parent != NULL) &&
1027 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001028 xmlOutputBufferWriteString(buf, "\n");
1029 }
1030 return;
1031 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001032 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1033 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001034 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001035 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1036 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001037 xmlOutputBufferWriteString(buf, ">");
1038 } else {
1039 xmlOutputBufferWriteString(buf, "></");
1040 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1041 xmlOutputBufferWriteString(buf, ">");
1042 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001043 if ((format) && (cur->next != NULL) &&
1044 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001045 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001046 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1047 (cur->parent != NULL) &&
1048 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001049 xmlOutputBufferWriteString(buf, "\n");
1050 }
1051 return;
1052 }
1053 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001054 if ((cur->type != XML_ELEMENT_NODE) &&
1055 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001056 /*
1057 * Uses the OutputBuffer property to automatically convert
1058 * invalids to charrefs
1059 */
1060
1061#ifndef XML_USE_BUFFER_CONTENT
1062 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1063#else
1064 xmlOutputBufferWriteString(buf,
1065 (const char *) xmlBufferContent(cur->content));
1066#endif
1067 }
1068 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001069 if ((format) && (info != NULL) && (!info->isinline) &&
1070 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001071 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001072 (cur->children != cur->last) &&
1073 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001074 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001075 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001076 if ((format) && (info != NULL) && (!info->isinline) &&
1077 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001078 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001079 (cur->children != cur->last) &&
1080 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001081 xmlOutputBufferWriteString(buf, "\n");
1082 }
Owen Taylor3473f882001-02-23 17:55:21 +00001083 xmlOutputBufferWriteString(buf, "</");
1084 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1085 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001086 if ((format) && (info != NULL) && (!info->isinline) &&
1087 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001088 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001089 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1090 (cur->parent != NULL) &&
1091 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001092 xmlOutputBufferWriteString(buf, "\n");
1093 }
1094}
1095
1096/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001097 * htmlNodeDumpOutput:
1098 * @buf: the HTML buffer output
1099 * @doc: the document
1100 * @cur: the current node
1101 * @encoding: the encoding string
1102 *
1103 * Dump an HTML node, recursive behaviour,children are printed too,
1104 * and formatting returns/spaces are added.
1105 */
1106void
1107htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1108 xmlNodePtr cur, const char *encoding) {
1109 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1110}
1111
1112/**
1113 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001114 * @buf: the HTML buffer output
1115 * @cur: the document
1116 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001117 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001118 *
1119 * Dump an HTML document.
1120 */
1121void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001122htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1123 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001124 int type;
1125
1126 /*
1127 * force to output the stuff as HTML, especially for entities
1128 */
1129 type = cur->type;
1130 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001131 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001132 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001133 }
1134 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001135 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001136 }
1137 xmlOutputBufferWriteString(buf, "\n");
1138 cur->type = (xmlElementType) type;
1139}
1140
Daniel Veillard95d845f2001-06-13 13:48:46 +00001141/**
1142 * htmlDocContentDumpOutput:
1143 * @buf: the HTML buffer output
1144 * @cur: the document
1145 * @encoding: the encoding string
1146 *
1147 * Dump an HTML document. Formating return/spaces are added.
1148 */
1149void
1150htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1151 const char *encoding) {
1152 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1153}
1154
Owen Taylor3473f882001-02-23 17:55:21 +00001155/************************************************************************
1156 * *
1157 * Saving functions front-ends *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * htmlDocDump:
1163 * @f: the FILE*
1164 * @cur: the document
1165 *
1166 * Dump an HTML document to an open FILE.
1167 *
1168 * returns: the number of byte written or -1 in case of failure.
1169 */
1170int
1171htmlDocDump(FILE *f, xmlDocPtr cur) {
1172 xmlOutputBufferPtr buf;
1173 xmlCharEncodingHandlerPtr handler = NULL;
1174 const char *encoding;
1175 int ret;
1176
1177 if (cur == NULL) {
1178#ifdef DEBUG_TREE
1179 xmlGenericError(xmlGenericErrorContext,
1180 "htmlDocDump : document == NULL\n");
1181#endif
1182 return(-1);
1183 }
1184
1185 encoding = (const char *) htmlGetMetaEncoding(cur);
1186
1187 if (encoding != NULL) {
1188 xmlCharEncoding enc;
1189
1190 enc = xmlParseCharEncoding(encoding);
1191 if (enc != cur->charset) {
1192 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1193 /*
1194 * Not supported yet
1195 */
1196 return(-1);
1197 }
1198
1199 handler = xmlFindCharEncodingHandler(encoding);
1200 if (handler == NULL)
1201 return(-1);
1202 }
1203 }
1204
1205 /*
1206 * Fallback to HTML or ASCII when the encoding is unspecified
1207 */
1208 if (handler == NULL)
1209 handler = xmlFindCharEncodingHandler("HTML");
1210 if (handler == NULL)
1211 handler = xmlFindCharEncodingHandler("ascii");
1212
1213 buf = xmlOutputBufferCreateFile(f, handler);
1214 if (buf == NULL) return(-1);
1215 htmlDocContentDumpOutput(buf, cur, NULL);
1216
1217 ret = xmlOutputBufferClose(buf);
1218 return(ret);
1219}
1220
1221/**
1222 * htmlSaveFile:
1223 * @filename: the filename (or URL)
1224 * @cur: the document
1225 *
1226 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1227 * used.
1228 * returns: the number of byte written or -1 in case of failure.
1229 */
1230int
1231htmlSaveFile(const char *filename, xmlDocPtr cur) {
1232 xmlOutputBufferPtr buf;
1233 xmlCharEncodingHandlerPtr handler = NULL;
1234 const char *encoding;
1235 int ret;
1236
1237 encoding = (const char *) htmlGetMetaEncoding(cur);
1238
1239 if (encoding != NULL) {
1240 xmlCharEncoding enc;
1241
1242 enc = xmlParseCharEncoding(encoding);
1243 if (enc != cur->charset) {
1244 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1245 /*
1246 * Not supported yet
1247 */
1248 return(-1);
1249 }
1250
1251 handler = xmlFindCharEncodingHandler(encoding);
1252 if (handler == NULL)
1253 return(-1);
1254 }
1255 }
1256
1257 /*
1258 * Fallback to HTML or ASCII when the encoding is unspecified
1259 */
1260 if (handler == NULL)
1261 handler = xmlFindCharEncodingHandler("HTML");
1262 if (handler == NULL)
1263 handler = xmlFindCharEncodingHandler("ascii");
1264
1265 /*
1266 * save the content to a temp buffer.
1267 */
1268 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1269 if (buf == NULL) return(0);
1270
1271 htmlDocContentDumpOutput(buf, cur, NULL);
1272
1273 ret = xmlOutputBufferClose(buf);
1274 return(ret);
1275}
1276
1277/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001278 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001279 * @filename: the filename
1280 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001281 * @format: should formatting spaces been added
1282 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001283 *
1284 * Dump an HTML document to a file using a given encoding.
1285 *
1286 * returns: the number of byte written or -1 in case of failure.
1287 */
1288int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001289htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1290 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001291 xmlOutputBufferPtr buf;
1292 xmlCharEncodingHandlerPtr handler = NULL;
1293 int ret;
1294
1295 if (encoding != NULL) {
1296 xmlCharEncoding enc;
1297
1298 enc = xmlParseCharEncoding(encoding);
1299 if (enc != cur->charset) {
1300 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1301 /*
1302 * Not supported yet
1303 */
1304 return(-1);
1305 }
1306
1307 handler = xmlFindCharEncodingHandler(encoding);
1308 if (handler == NULL)
1309 return(-1);
1310 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1311 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001312 } else {
1313 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001314 }
1315
1316 /*
1317 * Fallback to HTML or ASCII when the encoding is unspecified
1318 */
1319 if (handler == NULL)
1320 handler = xmlFindCharEncodingHandler("HTML");
1321 if (handler == NULL)
1322 handler = xmlFindCharEncodingHandler("ascii");
1323
1324 /*
1325 * save the content to a temp buffer.
1326 */
1327 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1328 if (buf == NULL) return(0);
1329
Daniel Veillard95d845f2001-06-13 13:48:46 +00001330 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001331
1332 ret = xmlOutputBufferClose(buf);
1333 return(ret);
1334}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001335
1336/**
1337 * htmlSaveFileEnc:
1338 * @filename: the filename
1339 * @cur: the document
1340 * @encoding: the document encoding
1341 *
1342 * Dump an HTML document to a file using a given encoding
1343 * and formatting returns/spaces are added.
1344 *
1345 * returns: the number of byte written or -1 in case of failure.
1346 */
1347int
1348htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1349 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1350}
1351
Owen Taylor3473f882001-02-23 17:55:21 +00001352#endif /* LIBXML_HTML_ENABLED */