blob: 3113670ddcc6a32b4bd4067c0e42970c4d136ebe [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * HTMLtree.c : implementation of access function for an HTML tree.
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011#ifdef LIBXML_HTML_ENABLED
12
Owen Taylor3473f882001-02-23 17:55:21 +000013#ifdef HAVE_CTYPE_H
14#include <ctype.h>
15#endif
16#ifdef HAVE_STDLIB_H
17#include <stdlib.h>
18#endif
19
20#include <libxml/xmlmemory.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/HTMLtree.h>
23#include <libxml/entities.h>
24#include <libxml/valid.h>
25#include <libxml/xmlerror.h>
26#include <libxml/parserInternals.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000027#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000028
29/************************************************************************
30 * *
31 * Getting/Setting encoding meta tags *
32 * *
33 ************************************************************************/
34
35/**
36 * htmlGetMetaEncoding:
37 * @doc: the document
38 *
39 * Encoding definition lookup in the Meta tags
40 *
41 * Returns the current encoding as flagged in the HTML source
42 */
43const xmlChar *
44htmlGetMetaEncoding(htmlDocPtr doc) {
45 htmlNodePtr cur;
46 const xmlChar *content;
47 const xmlChar *encoding;
48
49 if (doc == NULL)
50 return(NULL);
51 cur = doc->children;
52
53 /*
54 * Search the html
55 */
56 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000057 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000058 if (xmlStrEqual(cur->name, BAD_CAST"html"))
59 break;
60 if (xmlStrEqual(cur->name, BAD_CAST"head"))
61 goto found_head;
62 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
63 goto found_meta;
64 }
65 cur = cur->next;
66 }
67 if (cur == NULL)
68 return(NULL);
69 cur = cur->children;
70
71 /*
72 * Search the head
73 */
74 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000075 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000076 if (xmlStrEqual(cur->name, BAD_CAST"head"))
77 break;
78 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
79 goto found_meta;
80 }
81 cur = cur->next;
82 }
83 if (cur == NULL)
84 return(NULL);
85found_head:
86 cur = cur->children;
87
88 /*
89 * Search the meta elements
90 */
91found_meta:
92 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +000093 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +000094 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
95 xmlAttrPtr attr = cur->properties;
96 int http;
97 const xmlChar *value;
98
99 content = NULL;
100 http = 0;
101 while (attr != NULL) {
102 if ((attr->children != NULL) &&
103 (attr->children->type == XML_TEXT_NODE) &&
104 (attr->children->next == NULL)) {
105#ifndef XML_USE_BUFFER_CONTENT
106 value = attr->children->content;
107#else
108 value = xmlBufferContent(attr->children->content);
109#endif
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
118 }
119 attr = attr->next;
120 }
121 }
122 }
123 cur = cur->next;
124 }
125 return(NULL);
126
127found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
143 }
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146 }
147 return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta;
164 const xmlChar *content;
165 char newcontent[100];
166
167
168 if (doc == NULL)
169 return(-1);
170
171 if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000172 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173 encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000174 newcontent[sizeof(newcontent) - 1] = 0;
175 }
176
177 cur = doc->children;
178
179 /*
180 * Search the html
181 */
182 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000183 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000184 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185 break;
186 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187 goto found_head;
188 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000190 }
191 cur = cur->next;
192 }
193 if (cur == NULL)
194 return(-1);
195 cur = cur->children;
196
197 /*
198 * Search the head
199 */
200 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000201 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000202 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203 break;
204 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205 goto found_meta;
Owen Taylor3473f882001-02-23 17:55:21 +0000206 }
207 cur = cur->next;
208 }
209 if (cur == NULL)
210 return(-1);
211found_head:
212 if (cur->children == NULL) {
213 if (encoding == NULL)
214 return(0);
215 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216 xmlAddChild(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000217 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000218 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000219 return(0);
220 }
221 cur = cur->children;
222
223found_meta:
224 if (encoding != NULL) {
225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000226 * Create a new Meta element with the right attributes
Owen Taylor3473f882001-02-23 17:55:21 +0000227 */
228
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230 xmlAddPrevSibling(cur, meta);
Owen Taylor3473f882001-02-23 17:55:21 +0000231 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000232 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
Owen Taylor3473f882001-02-23 17:55:21 +0000233 }
234
235 /*
236 * Search and destroy all the remaining the meta elements carrying
237 * encoding informations
238 */
239 while (cur != NULL) {
Daniel Veillard5151c062001-10-23 13:10:19 +0000240 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000241 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +0000242 xmlAttrPtr attr = cur->properties;
243 int http;
244 const xmlChar *value;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000245 int same_charset;
Owen Taylor3473f882001-02-23 17:55:21 +0000246
247 content = NULL;
248 http = 0;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000249 same_charset = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000250 while (attr != NULL) {
251 if ((attr->children != NULL) &&
252 (attr->children->type == XML_TEXT_NODE) &&
253 (attr->children->next == NULL)) {
254#ifndef XML_USE_BUFFER_CONTENT
255 value = attr->children->content;
256#else
257 value = xmlBufferContent(attr->children->content);
258#endif
259 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
260 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
261 http = 1;
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000262 else
263 {
264 if ((value != NULL) &&
265 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
266 content = value;
267 else
268 if ((!xmlStrcasecmp(attr->name, BAD_CAST"charset"))
269 && (!xmlStrcasecmp(value, encoding)))
270 same_charset = 1;
271 }
272 if ((http != 0) && (content != NULL) && (same_charset != 0))
Owen Taylor3473f882001-02-23 17:55:21 +0000273 break;
274 }
275 attr = attr->next;
276 }
Daniel Veillard1ed3f882001-04-18 09:45:35 +0000277 if ((http != 0) && (content != NULL) && (same_charset != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000278 meta = cur;
279 cur = cur->next;
280 xmlUnlinkNode(meta);
281 xmlFreeNode(meta);
282 continue;
283 }
284
285 }
286 }
287 cur = cur->next;
288 }
289 return(0);
290}
291
292/************************************************************************
293 * *
294 * Dumping HTML tree content to a simple buffer *
295 * *
296 ************************************************************************/
297
298static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000299htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format);
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000300static void
301htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
302 int format);
Owen Taylor3473f882001-02-23 17:55:21 +0000303
304/**
305 * htmlDtdDump:
306 * @buf: the HTML buffer output
307 * @doc: the document
308 *
309 * Dump the HTML document DTD, if any.
310 */
311static void
312htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
313 xmlDtdPtr cur = doc->intSubset;
314
315 if (cur == NULL) {
316 xmlGenericError(xmlGenericErrorContext,
317 "htmlDtdDump : no internal subset\n");
318 return;
319 }
320 xmlBufferWriteChar(buf, "<!DOCTYPE ");
321 xmlBufferWriteCHAR(buf, cur->name);
322 if (cur->ExternalID != NULL) {
323 xmlBufferWriteChar(buf, " PUBLIC ");
324 xmlBufferWriteQuotedString(buf, cur->ExternalID);
325 if (cur->SystemID != NULL) {
326 xmlBufferWriteChar(buf, " ");
327 xmlBufferWriteQuotedString(buf, cur->SystemID);
328 }
329 } else if (cur->SystemID != NULL) {
330 xmlBufferWriteChar(buf, " SYSTEM ");
331 xmlBufferWriteQuotedString(buf, cur->SystemID);
332 }
333 xmlBufferWriteChar(buf, ">\n");
334}
335
336/**
337 * htmlAttrDump:
338 * @buf: the HTML buffer output
339 * @doc: the document
340 * @cur: the attribute pointer
341 *
342 * Dump an HTML attribute
343 */
344static void
345htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
346 xmlChar *value;
347
Daniel Veillardeca60d02001-06-13 07:45:41 +0000348 /*
349 * TODO: The html output method should not escape a & character
350 * occurring in an attribute value immediately followed by
351 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
352 */
353
Owen Taylor3473f882001-02-23 17:55:21 +0000354 if (cur == NULL) {
355 xmlGenericError(xmlGenericErrorContext,
356 "htmlAttrDump : property == NULL\n");
357 return;
358 }
359 xmlBufferWriteChar(buf, " ");
360 xmlBufferWriteCHAR(buf, cur->name);
361 if (cur->children != NULL) {
362 value = xmlNodeListGetString(doc, cur->children, 0);
363 if (value) {
364 xmlBufferWriteChar(buf, "=");
365 xmlBufferWriteQuotedString(buf, value);
366 xmlFree(value);
367 } else {
368 xmlBufferWriteChar(buf, "=\"\"");
369 }
370 }
371}
372
373/**
374 * htmlAttrListDump:
375 * @buf: the HTML buffer output
376 * @doc: the document
377 * @cur: the first attribute pointer
378 *
379 * Dump a list of HTML attributes
380 */
381static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000382htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) {
383 int i = 0;
384
Owen Taylor3473f882001-02-23 17:55:21 +0000385 if (cur == NULL) {
386 xmlGenericError(xmlGenericErrorContext,
387 "htmlAttrListDump : property == NULL\n");
388 return;
389 }
390 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000391 i++;
392 if ((format) && (i >= 5)) {
393 i = 0;
394 xmlBufferWriteChar(buf, "\n");
395 }
Owen Taylor3473f882001-02-23 17:55:21 +0000396 htmlAttrDump(buf, doc, cur);
397 cur = cur->next;
398 }
399}
400
Owen Taylor3473f882001-02-23 17:55:21 +0000401/**
402 * htmlNodeListDump:
403 * @buf: the HTML buffer output
404 * @doc: the document
405 * @cur: the first node
406 *
407 * Dump an HTML node list, recursive behaviour,children are printed too.
408 */
409static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000410htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000411 if (cur == NULL) {
412 xmlGenericError(xmlGenericErrorContext,
413 "htmlNodeListDump : node == NULL\n");
414 return;
415 }
416 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000417 htmlNodeDumpFormat(buf, doc, cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000418 cur = cur->next;
419 }
420}
421
422/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000423 * htmlNodeDumpFormat:
Owen Taylor3473f882001-02-23 17:55:21 +0000424 * @buf: the HTML buffer output
425 * @doc: the document
426 * @cur: the current node
Daniel Veillard95d845f2001-06-13 13:48:46 +0000427 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000428 *
429 * Dump an HTML node, recursive behaviour,children are printed too.
430 */
Daniel Veillard86fd5a72001-12-13 14:55:21 +0000431static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000432htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
433 int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000434 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000435
436 if (cur == NULL) {
437 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000438 "htmlNodeDumpFormat : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000439 return;
440 }
441 /*
442 * Special cases.
443 */
444 if (cur->type == XML_DTD_NODE)
445 return;
446 if (cur->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000447 htmlDocContentDump(buf, (xmlDocPtr) cur, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000448 return;
449 }
450 if (cur->type == HTML_TEXT_NODE) {
451 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000452 if (((cur->name == (const xmlChar *)xmlStringText) ||
453 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000454 ((cur->parent == NULL) ||
455 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000456 xmlChar *buffer;
457
458#ifndef XML_USE_BUFFER_CONTENT
459 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
460#else
461 buffer = xmlEncodeEntitiesReentrant(doc,
462 xmlBufferContent(cur->content));
463#endif
464 if (buffer != NULL) {
465 xmlBufferWriteCHAR(buf, buffer);
466 xmlFree(buffer);
467 }
468 } else {
469 xmlBufferWriteCHAR(buf, cur->content);
470 }
471 }
472 return;
473 }
474 if (cur->type == HTML_COMMENT_NODE) {
475 if (cur->content != NULL) {
476 xmlBufferWriteChar(buf, "<!--");
477#ifndef XML_USE_BUFFER_CONTENT
478 xmlBufferWriteCHAR(buf, cur->content);
479#else
480 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
481#endif
482 xmlBufferWriteChar(buf, "-->");
483 }
484 return;
485 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000486 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000487 if (cur->name == NULL)
488 return;
489 xmlBufferWriteChar(buf, "<?");
490 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000491 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000492 xmlBufferWriteChar(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000493#ifndef XML_USE_BUFFER_CONTENT
494 xmlBufferWriteCHAR(buf, cur->content);
495#else
496 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
497#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000498 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000499 xmlBufferWriteChar(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000500 return;
501 }
Owen Taylor3473f882001-02-23 17:55:21 +0000502 if (cur->type == HTML_ENTITY_REF_NODE) {
503 xmlBufferWriteChar(buf, "&");
504 xmlBufferWriteCHAR(buf, cur->name);
505 xmlBufferWriteChar(buf, ";");
506 return;
507 }
Daniel Veillard083c2662001-05-08 08:27:14 +0000508 if (cur->type == HTML_PRESERVE_NODE) {
509 if (cur->content != NULL) {
510#ifndef XML_USE_BUFFER_CONTENT
511 xmlBufferWriteCHAR(buf, cur->content);
512#else
513 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
514#endif
515 }
516 return;
517 }
Owen Taylor3473f882001-02-23 17:55:21 +0000518
519 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000520 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +0000521 */
522 info = htmlTagLookup(cur->name);
523
524 xmlBufferWriteChar(buf, "<");
525 xmlBufferWriteCHAR(buf, cur->name);
526 if (cur->properties != NULL)
Daniel Veillard95d845f2001-06-13 13:48:46 +0000527 htmlAttrListDump(buf, doc, cur->properties, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000528
529 if ((info != NULL) && (info->empty)) {
530 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000531 if ((format) && (info != NULL) && (!info->isinline) &&
532 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000533 if ((cur->next->type != HTML_TEXT_NODE) &&
534 (cur->next->type != HTML_ENTITY_REF_NODE))
535 xmlBufferWriteChar(buf, "\n");
536 }
537 return;
538 }
Daniel Veillard7db37732001-07-12 01:20:08 +0000539 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
540 (cur->children == NULL)) {
Daniel Veillard083c2662001-05-08 08:27:14 +0000541 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +0000542 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
543 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000544 xmlBufferWriteChar(buf, ">");
Daniel Veillard083c2662001-05-08 08:27:14 +0000545 } else {
Owen Taylor3473f882001-02-23 17:55:21 +0000546 xmlBufferWriteChar(buf, "></");
547 xmlBufferWriteCHAR(buf, cur->name);
548 xmlBufferWriteChar(buf, ">");
549 }
Daniel Veillard02bb1702001-06-13 21:11:59 +0000550 if ((format) && (info != NULL) && (!info->isinline) &&
551 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000552 if ((cur->next->type != HTML_TEXT_NODE) &&
553 (cur->next->type != HTML_ENTITY_REF_NODE))
554 xmlBufferWriteChar(buf, "\n");
555 }
556 return;
557 }
558 xmlBufferWriteChar(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +0000559 if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000560 xmlChar *buffer;
561
562#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard083c2662001-05-08 08:27:14 +0000563 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Owen Taylor3473f882001-02-23 17:55:21 +0000564#else
Daniel Veillard083c2662001-05-08 08:27:14 +0000565 buffer = xmlEncodeEntitiesReentrant(doc,
566 xmlBufferContent(cur->content));
Owen Taylor3473f882001-02-23 17:55:21 +0000567#endif
568 if (buffer != NULL) {
569 xmlBufferWriteCHAR(buf, buffer);
570 xmlFree(buffer);
571 }
572 }
573 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +0000574 if ((format) && (info != NULL) && (!info->isinline) &&
575 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000576 (cur->children->type != HTML_ENTITY_REF_NODE) &&
577 (cur->children != cur->last))
578 xmlBufferWriteChar(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +0000579 htmlNodeListDump(buf, doc, cur->children, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +0000580 if ((format) && (info != NULL) && (!info->isinline) &&
581 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000582 (cur->last->type != HTML_ENTITY_REF_NODE) &&
583 (cur->children != cur->last))
584 xmlBufferWriteChar(buf, "\n");
585 }
Owen Taylor3473f882001-02-23 17:55:21 +0000586 xmlBufferWriteChar(buf, "</");
587 xmlBufferWriteCHAR(buf, cur->name);
588 xmlBufferWriteChar(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +0000589 if ((format) && (info != NULL) && (!info->isinline) &&
590 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000591 if ((cur->next->type != HTML_TEXT_NODE) &&
592 (cur->next->type != HTML_ENTITY_REF_NODE))
593 xmlBufferWriteChar(buf, "\n");
594 }
595}
596
597/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000598 * htmlNodeDump:
599 * @buf: the HTML buffer output
600 * @doc: the document
601 * @cur: the current node
602 *
603 * Dump an HTML node, recursive behaviour,children are printed too,
604 * and formatting returns are added.
605 */
606void
607htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
608 htmlNodeDumpFormat(buf, doc, cur, 1);
609}
610
611/**
612 * htmlNodeDumpFileFormat:
613 * @out: the FILE pointer
614 * @doc: the document
615 * @cur: the current node
616 * @encoding: the document encoding
617 * @format: should formatting spaces been added
618 *
619 * Dump an HTML node, recursive behaviour,children are printed too.
620 *
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000621 * TODO: if encoding == NULL try to save in the doc encoding
622 *
623 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard95d845f2001-06-13 13:48:46 +0000624 */
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000625int
626htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
627 xmlNodePtr cur, const char *encoding, int format) {
628 xmlOutputBufferPtr buf;
629 xmlCharEncodingHandlerPtr handler = NULL;
630 int ret;
Daniel Veillard95d845f2001-06-13 13:48:46 +0000631
Daniel Veillardc4f631d2001-06-14 11:11:59 +0000632 if (encoding != NULL) {
633 xmlCharEncoding enc;
634
635 enc = xmlParseCharEncoding(encoding);
636 if (enc != XML_CHAR_ENCODING_UTF8) {
637 handler = xmlFindCharEncodingHandler(encoding);
638 if (handler == NULL)
639 return(-1);
640 }
641 }
642
643 /*
644 * Fallback to HTML or ASCII when the encoding is unspecified
645 */
646 if (handler == NULL)
647 handler = xmlFindCharEncodingHandler("HTML");
648 if (handler == NULL)
649 handler = xmlFindCharEncodingHandler("ascii");
650
651 /*
652 * save the content to a temp buffer.
653 */
654 buf = xmlOutputBufferCreateFile(out, handler);
655 if (buf == NULL) return(0);
656
657 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
658
659 ret = xmlOutputBufferClose(buf);
660 return(ret);
Daniel Veillard95d845f2001-06-13 13:48:46 +0000661}
662
663/**
Owen Taylor3473f882001-02-23 17:55:21 +0000664 * htmlNodeDumpFile:
665 * @out: the FILE pointer
666 * @doc: the document
667 * @cur: the current node
668 *
Daniel Veillard95d845f2001-06-13 13:48:46 +0000669 * Dump an HTML node, recursive behaviour,children are printed too,
670 * and formatting returns are added.
Owen Taylor3473f882001-02-23 17:55:21 +0000671 */
672void
673htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000674 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
Owen Taylor3473f882001-02-23 17:55:21 +0000675}
676
677/**
678 * htmlDocContentDump:
679 * @buf: the HTML buffer output
680 * @cur: the document
681 *
682 * Dump an HTML document.
683 */
684static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000685htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000686 int type;
687
688 /*
689 * force to output the stuff as HTML, especially for entities
690 */
691 type = cur->type;
692 cur->type = XML_HTML_DOCUMENT_NODE;
693 if (cur->intSubset != NULL)
694 htmlDtdDump(buf, cur);
695 else {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000696 /* Default to HTML-4.0 transitional @@@@ */
Owen Taylor3473f882001-02-23 17:55:21 +0000697 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
698
699 }
700 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000701 htmlNodeListDump(buf, cur, cur->children, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000702 }
703 xmlBufferWriteChar(buf, "\n");
704 cur->type = (xmlElementType) type;
705}
706
707/**
708 * htmlDocDumpMemory:
709 * @cur: the document
710 * @mem: OUT: the memory pointer
Daniel Veillard2d703722001-05-30 18:32:34 +0000711 * @size: OUT: the memory length
Owen Taylor3473f882001-02-23 17:55:21 +0000712 *
713 * Dump an HTML document in memory and return the xmlChar * and it's size.
714 * It's up to the caller to free the memory.
715 */
716void
717htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard2d703722001-05-30 18:32:34 +0000718 xmlOutputBufferPtr buf;
719 xmlCharEncodingHandlerPtr handler = NULL;
720 const char *encoding;
Owen Taylor3473f882001-02-23 17:55:21 +0000721
722 if (cur == NULL) {
723#ifdef DEBUG_TREE
724 xmlGenericError(xmlGenericErrorContext,
Daniel Veillard2d703722001-05-30 18:32:34 +0000725 "htmlDocDumpMemory : document == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000726#endif
727 *mem = NULL;
728 *size = 0;
729 return;
730 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000731
732 encoding = (const char *) htmlGetMetaEncoding(cur);
733
734 if (encoding != NULL) {
735 xmlCharEncoding enc;
736
737 enc = xmlParseCharEncoding(encoding);
738 if (enc != cur->charset) {
739 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
740 /*
741 * Not supported yet
742 */
743 *mem = NULL;
744 *size = 0;
745 return;
746 }
747
748 handler = xmlFindCharEncodingHandler(encoding);
749 if (handler == NULL) {
750 *mem = NULL;
751 *size = 0;
752 return;
753 }
754 }
755 }
756
757 /*
758 * Fallback to HTML or ASCII when the encoding is unspecified
759 */
760 if (handler == NULL)
761 handler = xmlFindCharEncodingHandler("HTML");
762 if (handler == NULL)
763 handler = xmlFindCharEncodingHandler("ascii");
764
765 buf = xmlAllocOutputBuffer(handler);
Owen Taylor3473f882001-02-23 17:55:21 +0000766 if (buf == NULL) {
767 *mem = NULL;
768 *size = 0;
769 return;
770 }
Daniel Veillard2d703722001-05-30 18:32:34 +0000771
772 htmlDocContentDumpOutput(buf, cur, NULL);
773 xmlOutputBufferFlush(buf);
774 if (buf->conv != NULL) {
775 *size = buf->conv->use;
776 *mem = xmlStrndup(buf->conv->content, *size);
777 } else {
778 *size = buf->buffer->use;
779 *mem = xmlStrndup(buf->buffer->content, *size);
780 }
781 (void)xmlOutputBufferClose(buf);
Owen Taylor3473f882001-02-23 17:55:21 +0000782}
783
784
785/************************************************************************
786 * *
787 * Dumping HTML tree content to an I/O output buffer *
788 * *
789 ************************************************************************/
790
791/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000792 * htmlDtdDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000793 * @buf: the HTML buffer output
794 * @doc: the document
795 * @encoding: the encoding string
796 *
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000797 * TODO: check whether encoding is needed
798 *
Owen Taylor3473f882001-02-23 17:55:21 +0000799 * Dump the HTML document DTD, if any.
800 */
801static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000802htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000803 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000804 xmlDtdPtr cur = doc->intSubset;
805
806 if (cur == NULL) {
807 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000808 "htmlDtdDumpOutput : no internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000809 return;
810 }
811 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
812 xmlOutputBufferWriteString(buf, (const char *)cur->name);
813 if (cur->ExternalID != NULL) {
814 xmlOutputBufferWriteString(buf, " PUBLIC ");
815 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
816 if (cur->SystemID != NULL) {
817 xmlOutputBufferWriteString(buf, " ");
818 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
819 }
820 } else if (cur->SystemID != NULL) {
821 xmlOutputBufferWriteString(buf, " SYSTEM ");
822 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
823 }
824 xmlOutputBufferWriteString(buf, ">\n");
825}
826
827/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000828 * htmlAttrDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000829 * @buf: the HTML buffer output
830 * @doc: the document
831 * @cur: the attribute pointer
832 * @encoding: the encoding string
833 *
834 * Dump an HTML attribute
835 */
836static void
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000837htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
Daniel Veillardc86a4fa2001-03-26 16:28:29 +0000838 const char *encoding ATTRIBUTE_UNUSED) {
Owen Taylor3473f882001-02-23 17:55:21 +0000839 xmlChar *value;
840
Daniel Veillardeca60d02001-06-13 07:45:41 +0000841 /*
842 * TODO: The html output method should not escape a & character
843 * occurring in an attribute value immediately followed by
844 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
845 */
846
Owen Taylor3473f882001-02-23 17:55:21 +0000847 if (cur == NULL) {
848 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000849 "htmlAttrDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000850 return;
851 }
852 xmlOutputBufferWriteString(buf, " ");
853 xmlOutputBufferWriteString(buf, (const char *)cur->name);
854 if (cur->children != NULL) {
855 value = xmlNodeListGetString(doc, cur->children, 0);
856 if (value) {
857 xmlOutputBufferWriteString(buf, "=");
858 xmlBufferWriteQuotedString(buf->buffer, value);
859 xmlFree(value);
860 } else {
861 xmlOutputBufferWriteString(buf, "=\"\"");
862 }
863 }
864}
865
866/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000867 * htmlAttrListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000868 * @buf: the HTML buffer output
869 * @doc: the document
870 * @cur: the first attribute pointer
871 * @encoding: the encoding string
872 *
873 * Dump a list of HTML attributes
874 */
875static void
876htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
877 if (cur == NULL) {
878 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000879 "htmlAttrListDumpOutput : property == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000880 return;
881 }
882 while (cur != NULL) {
883 htmlAttrDumpOutput(buf, doc, cur, encoding);
884 cur = cur->next;
885 }
886}
887
888
889void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
890 xmlNodePtr cur, const char *encoding);
891
892/**
Daniel Veillardeca60d02001-06-13 07:45:41 +0000893 * htmlNodeListDumpOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000894 * @buf: the HTML buffer output
895 * @doc: the document
896 * @cur: the first node
897 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000898 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000899 *
900 * Dump an HTML node list, recursive behaviour,children are printed too.
901 */
902static void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000903htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
904 xmlNodePtr cur, const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +0000905 if (cur == NULL) {
906 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000907 "htmlNodeListDumpOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000908 return;
909 }
910 while (cur != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +0000911 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +0000912 cur = cur->next;
913 }
914}
915
916/**
Daniel Veillard95d845f2001-06-13 13:48:46 +0000917 * htmlNodeDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +0000918 * @buf: the HTML buffer output
919 * @doc: the document
920 * @cur: the current node
921 * @encoding: the encoding string
Daniel Veillard95d845f2001-06-13 13:48:46 +0000922 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +0000923 *
924 * Dump an HTML node, recursive behaviour,children are printed too.
925 */
926void
Daniel Veillard95d845f2001-06-13 13:48:46 +0000927htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
928 xmlNodePtr cur, const char *encoding, int format) {
Daniel Veillardbb371292001-08-16 23:26:59 +0000929 const htmlElemDesc * info;
Owen Taylor3473f882001-02-23 17:55:21 +0000930
931 if (cur == NULL) {
932 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000933 "htmlNodeDumpFormatOutput : node == NULL\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000934 return;
935 }
936 /*
937 * Special cases.
938 */
939 if (cur->type == XML_DTD_NODE)
940 return;
941 if (cur->type == XML_HTML_DOCUMENT_NODE) {
942 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
943 return;
944 }
945 if (cur->type == HTML_TEXT_NODE) {
946 if (cur->content != NULL) {
Daniel Veillardb44025c2001-10-11 22:55:55 +0000947 if (((cur->name == (const xmlChar *)xmlStringText) ||
948 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
Daniel Veillard6e93c4a2001-06-05 20:57:42 +0000949 ((cur->parent == NULL) ||
950 (!xmlStrEqual(cur->parent->name, BAD_CAST "script")))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000951 xmlChar *buffer;
952
953#ifndef XML_USE_BUFFER_CONTENT
954 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
955#else
956 buffer = xmlEncodeEntitiesReentrant(doc,
957 xmlBufferContent(cur->content));
958#endif
959 if (buffer != NULL) {
960 xmlOutputBufferWriteString(buf, (const char *)buffer);
961 xmlFree(buffer);
962 }
963 } else {
964 xmlOutputBufferWriteString(buf, (const char *)cur->content);
965 }
966 }
967 return;
968 }
969 if (cur->type == HTML_COMMENT_NODE) {
970 if (cur->content != NULL) {
971 xmlOutputBufferWriteString(buf, "<!--");
972#ifndef XML_USE_BUFFER_CONTENT
973 xmlOutputBufferWriteString(buf, (const char *)cur->content);
974#else
975 xmlOutputBufferWriteString(buf, (const char *)
976 xmlBufferContent(cur->content));
977#endif
978 xmlOutputBufferWriteString(buf, "-->");
979 }
980 return;
981 }
Daniel Veillard7533cc82001-04-24 15:52:00 +0000982 if (cur->type == HTML_PI_NODE) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000983 if (cur->name == NULL)
984 return;
985 xmlOutputBufferWriteString(buf, "<?");
986 xmlOutputBufferWriteString(buf, (const char *)cur->name);
Daniel Veillard7533cc82001-04-24 15:52:00 +0000987 if (cur->content != NULL) {
Daniel Veillard5146f202001-04-25 10:29:44 +0000988 xmlOutputBufferWriteString(buf, " ");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000989#ifndef XML_USE_BUFFER_CONTENT
990 xmlOutputBufferWriteString(buf, (const char *)cur->content);
991#else
992 xmlOutputBufferWriteString(buf, (const char *)
993 xmlBufferContent(cur->content));
994#endif
Daniel Veillard7533cc82001-04-24 15:52:00 +0000995 }
Daniel Veillard5146f202001-04-25 10:29:44 +0000996 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7533cc82001-04-24 15:52:00 +0000997 return;
998 }
Owen Taylor3473f882001-02-23 17:55:21 +0000999 if (cur->type == HTML_ENTITY_REF_NODE) {
1000 xmlOutputBufferWriteString(buf, "&");
1001 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1002 xmlOutputBufferWriteString(buf, ";");
1003 return;
1004 }
1005 if (cur->type == HTML_PRESERVE_NODE) {
1006 if (cur->content != NULL) {
1007#ifndef XML_USE_BUFFER_CONTENT
1008 xmlOutputBufferWriteString(buf, (const char *)cur->content);
1009#else
1010 xmlOutputBufferWriteString(buf, (const char *)
1011 xmlBufferContent(cur->content));
1012#endif
1013 }
1014 return;
1015 }
1016
1017 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001018 * Get specific HTML info for that node.
Owen Taylor3473f882001-02-23 17:55:21 +00001019 */
1020 info = htmlTagLookup(cur->name);
1021
1022 xmlOutputBufferWriteString(buf, "<");
1023 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1024 if (cur->properties != NULL)
1025 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
1026
1027 if ((info != NULL) && (info->empty)) {
1028 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001029 if ((format) && (!info->isinline) && (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001030 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001031 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1032 (cur->parent != NULL) &&
1033 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001034 xmlOutputBufferWriteString(buf, "\n");
1035 }
1036 return;
1037 }
Daniel Veillard7db37732001-07-12 01:20:08 +00001038 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
1039 (cur->children == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001040 if ((info != NULL) && (info->saveEndTag != 0) &&
Daniel Veillardeca60d02001-06-13 07:45:41 +00001041 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
1042 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001043 xmlOutputBufferWriteString(buf, ">");
1044 } else {
1045 xmlOutputBufferWriteString(buf, "></");
1046 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1047 xmlOutputBufferWriteString(buf, ">");
1048 }
Daniel Veillard02bb1702001-06-13 21:11:59 +00001049 if ((format) && (cur->next != NULL) &&
1050 (info != NULL) && (!info->isinline)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001051 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillard8a926292001-06-07 11:20:20 +00001052 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1053 (cur->parent != NULL) &&
1054 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001055 xmlOutputBufferWriteString(buf, "\n");
1056 }
1057 return;
1058 }
1059 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard7db37732001-07-12 01:20:08 +00001060 if ((cur->type != XML_ELEMENT_NODE) &&
1061 (cur->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001062 /*
1063 * Uses the OutputBuffer property to automatically convert
1064 * invalids to charrefs
1065 */
1066
1067#ifndef XML_USE_BUFFER_CONTENT
1068 xmlOutputBufferWriteString(buf, (const char *) cur->content);
1069#else
1070 xmlOutputBufferWriteString(buf,
1071 (const char *) xmlBufferContent(cur->content));
1072#endif
1073 }
1074 if (cur->children != NULL) {
Daniel Veillard02bb1702001-06-13 21:11:59 +00001075 if ((format) && (info != NULL) && (!info->isinline) &&
1076 (cur->children->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001077 (cur->children->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001078 (cur->children != cur->last) &&
1079 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001080 xmlOutputBufferWriteString(buf, "\n");
Daniel Veillard95d845f2001-06-13 13:48:46 +00001081 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
Daniel Veillard02bb1702001-06-13 21:11:59 +00001082 if ((format) && (info != NULL) && (!info->isinline) &&
1083 (cur->last->type != HTML_TEXT_NODE) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001084 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001085 (cur->children != cur->last) &&
1086 (!xmlStrEqual(cur->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001087 xmlOutputBufferWriteString(buf, "\n");
1088 }
Owen Taylor3473f882001-02-23 17:55:21 +00001089 xmlOutputBufferWriteString(buf, "</");
1090 xmlOutputBufferWriteString(buf, (const char *)cur->name);
1091 xmlOutputBufferWriteString(buf, ">");
Daniel Veillard02bb1702001-06-13 21:11:59 +00001092 if ((format) && (info != NULL) && (!info->isinline) &&
1093 (cur->next != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001094 if ((cur->next->type != HTML_TEXT_NODE) &&
Daniel Veillardf0c53762001-06-07 16:07:07 +00001095 (cur->next->type != HTML_ENTITY_REF_NODE) &&
1096 (cur->parent != NULL) &&
1097 (!xmlStrEqual(cur->parent->name, BAD_CAST "pre")))
Owen Taylor3473f882001-02-23 17:55:21 +00001098 xmlOutputBufferWriteString(buf, "\n");
1099 }
1100}
1101
1102/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001103 * htmlNodeDumpOutput:
1104 * @buf: the HTML buffer output
1105 * @doc: the document
1106 * @cur: the current node
1107 * @encoding: the encoding string
1108 *
1109 * Dump an HTML node, recursive behaviour,children are printed too,
1110 * and formatting returns/spaces are added.
1111 */
1112void
1113htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1114 xmlNodePtr cur, const char *encoding) {
1115 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1116}
1117
1118/**
1119 * htmlDocContentDumpFormatOutput:
Owen Taylor3473f882001-02-23 17:55:21 +00001120 * @buf: the HTML buffer output
1121 * @cur: the document
1122 * @encoding: the encoding string
Daniel Veillard9d06d302002-01-22 18:15:52 +00001123 * @format: should formatting spaces been added
Owen Taylor3473f882001-02-23 17:55:21 +00001124 *
1125 * Dump an HTML document.
1126 */
1127void
Daniel Veillard95d845f2001-06-13 13:48:46 +00001128htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1129 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001130 int type;
1131
1132 /*
1133 * force to output the stuff as HTML, especially for entities
1134 */
1135 type = cur->type;
1136 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard4dd93462001-04-02 15:16:19 +00001137 if (cur->intSubset != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00001138 htmlDtdDumpOutput(buf, cur, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001139 }
1140 if (cur->children != NULL) {
Daniel Veillard95d845f2001-06-13 13:48:46 +00001141 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001142 }
1143 xmlOutputBufferWriteString(buf, "\n");
1144 cur->type = (xmlElementType) type;
1145}
1146
Daniel Veillard95d845f2001-06-13 13:48:46 +00001147/**
1148 * htmlDocContentDumpOutput:
1149 * @buf: the HTML buffer output
1150 * @cur: the document
1151 * @encoding: the encoding string
1152 *
1153 * Dump an HTML document. Formating return/spaces are added.
1154 */
1155void
1156htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1157 const char *encoding) {
1158 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1159}
1160
Owen Taylor3473f882001-02-23 17:55:21 +00001161/************************************************************************
1162 * *
1163 * Saving functions front-ends *
1164 * *
1165 ************************************************************************/
1166
1167/**
1168 * htmlDocDump:
1169 * @f: the FILE*
1170 * @cur: the document
1171 *
1172 * Dump an HTML document to an open FILE.
1173 *
1174 * returns: the number of byte written or -1 in case of failure.
1175 */
1176int
1177htmlDocDump(FILE *f, xmlDocPtr cur) {
1178 xmlOutputBufferPtr buf;
1179 xmlCharEncodingHandlerPtr handler = NULL;
1180 const char *encoding;
1181 int ret;
1182
1183 if (cur == NULL) {
1184#ifdef DEBUG_TREE
1185 xmlGenericError(xmlGenericErrorContext,
1186 "htmlDocDump : document == NULL\n");
1187#endif
1188 return(-1);
1189 }
1190
1191 encoding = (const char *) htmlGetMetaEncoding(cur);
1192
1193 if (encoding != NULL) {
1194 xmlCharEncoding enc;
1195
1196 enc = xmlParseCharEncoding(encoding);
1197 if (enc != cur->charset) {
1198 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1199 /*
1200 * Not supported yet
1201 */
1202 return(-1);
1203 }
1204
1205 handler = xmlFindCharEncodingHandler(encoding);
1206 if (handler == NULL)
1207 return(-1);
1208 }
1209 }
1210
1211 /*
1212 * Fallback to HTML or ASCII when the encoding is unspecified
1213 */
1214 if (handler == NULL)
1215 handler = xmlFindCharEncodingHandler("HTML");
1216 if (handler == NULL)
1217 handler = xmlFindCharEncodingHandler("ascii");
1218
1219 buf = xmlOutputBufferCreateFile(f, handler);
1220 if (buf == NULL) return(-1);
1221 htmlDocContentDumpOutput(buf, cur, NULL);
1222
1223 ret = xmlOutputBufferClose(buf);
1224 return(ret);
1225}
1226
1227/**
1228 * htmlSaveFile:
1229 * @filename: the filename (or URL)
1230 * @cur: the document
1231 *
1232 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1233 * used.
1234 * returns: the number of byte written or -1 in case of failure.
1235 */
1236int
1237htmlSaveFile(const char *filename, xmlDocPtr cur) {
1238 xmlOutputBufferPtr buf;
1239 xmlCharEncodingHandlerPtr handler = NULL;
1240 const char *encoding;
1241 int ret;
1242
1243 encoding = (const char *) htmlGetMetaEncoding(cur);
1244
1245 if (encoding != NULL) {
1246 xmlCharEncoding enc;
1247
1248 enc = xmlParseCharEncoding(encoding);
1249 if (enc != cur->charset) {
1250 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1251 /*
1252 * Not supported yet
1253 */
1254 return(-1);
1255 }
1256
1257 handler = xmlFindCharEncodingHandler(encoding);
1258 if (handler == NULL)
1259 return(-1);
1260 }
1261 }
1262
1263 /*
1264 * Fallback to HTML or ASCII when the encoding is unspecified
1265 */
1266 if (handler == NULL)
1267 handler = xmlFindCharEncodingHandler("HTML");
1268 if (handler == NULL)
1269 handler = xmlFindCharEncodingHandler("ascii");
1270
1271 /*
1272 * save the content to a temp buffer.
1273 */
1274 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1275 if (buf == NULL) return(0);
1276
1277 htmlDocContentDumpOutput(buf, cur, NULL);
1278
1279 ret = xmlOutputBufferClose(buf);
1280 return(ret);
1281}
1282
1283/**
Daniel Veillard95d845f2001-06-13 13:48:46 +00001284 * htmlSaveFileFormat:
Owen Taylor3473f882001-02-23 17:55:21 +00001285 * @filename: the filename
1286 * @cur: the document
Daniel Veillard95d845f2001-06-13 13:48:46 +00001287 * @format: should formatting spaces been added
1288 * @encoding: the document encoding
Owen Taylor3473f882001-02-23 17:55:21 +00001289 *
1290 * Dump an HTML document to a file using a given encoding.
1291 *
1292 * returns: the number of byte written or -1 in case of failure.
1293 */
1294int
Daniel Veillard95d845f2001-06-13 13:48:46 +00001295htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1296 const char *encoding, int format) {
Owen Taylor3473f882001-02-23 17:55:21 +00001297 xmlOutputBufferPtr buf;
1298 xmlCharEncodingHandlerPtr handler = NULL;
1299 int ret;
1300
1301 if (encoding != NULL) {
1302 xmlCharEncoding enc;
1303
1304 enc = xmlParseCharEncoding(encoding);
1305 if (enc != cur->charset) {
1306 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1307 /*
1308 * Not supported yet
1309 */
1310 return(-1);
1311 }
1312
1313 handler = xmlFindCharEncodingHandler(encoding);
1314 if (handler == NULL)
1315 return(-1);
1316 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1317 }
Daniel Veillard4dd93462001-04-02 15:16:19 +00001318 } else {
1319 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
Owen Taylor3473f882001-02-23 17:55:21 +00001320 }
1321
1322 /*
1323 * Fallback to HTML or ASCII when the encoding is unspecified
1324 */
1325 if (handler == NULL)
1326 handler = xmlFindCharEncodingHandler("HTML");
1327 if (handler == NULL)
1328 handler = xmlFindCharEncodingHandler("ascii");
1329
1330 /*
1331 * save the content to a temp buffer.
1332 */
1333 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1334 if (buf == NULL) return(0);
1335
Daniel Veillard95d845f2001-06-13 13:48:46 +00001336 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
Owen Taylor3473f882001-02-23 17:55:21 +00001337
1338 ret = xmlOutputBufferClose(buf);
1339 return(ret);
1340}
Daniel Veillard95d845f2001-06-13 13:48:46 +00001341
1342/**
1343 * htmlSaveFileEnc:
1344 * @filename: the filename
1345 * @cur: the document
1346 * @encoding: the document encoding
1347 *
1348 * Dump an HTML document to a file using a given encoding
1349 * and formatting returns/spaces are added.
1350 *
1351 * returns: the number of byte written or -1 in case of failure.
1352 */
1353int
1354htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1355 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1356}
1357
Owen Taylor3473f882001-02-23 17:55:21 +00001358#endif /* LIBXML_HTML_ENABLED */