blob: c8e8a646ff329f481525edf1107ea6d619ae43de [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillard32bc74e2000-07-14 14:49:25 +000035/************************************************************************
36 * *
37 * Getting/Setting encoding meta tags *
38 * *
39 ************************************************************************/
40
41/**
42 * htmlGetMetaEncoding:
43 * @doc: the document
44 *
45 * Encoding definition lookup in the Meta tags
46 *
47 * Returns the current encoding as flagged in the HTML source
48 */
49const xmlChar *
50htmlGetMetaEncoding(htmlDocPtr doc) {
51 htmlNodePtr cur;
52 const xmlChar *content;
53 const xmlChar *encoding;
54
55 if (doc == NULL)
56 return(NULL);
57 cur = doc->children;
58
59 /*
60 * Search the html
61 */
62 while (cur != NULL) {
63 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000064 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000065 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000066 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000067 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000068 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000069 goto found_meta;
70 }
71 cur = cur->next;
72 }
73 if (cur == NULL)
74 return(NULL);
75 cur = cur->children;
76
77 /*
78 * Search the head
79 */
80 while (cur != NULL) {
81 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000082 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000083 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000084 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000085 goto found_meta;
86 }
87 cur = cur->next;
88 }
89 if (cur == NULL)
90 return(NULL);
91found_head:
92 cur = cur->children;
93
94 /*
95 * Search the meta elements
96 */
97found_meta:
98 while (cur != NULL) {
99 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000100 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000101 xmlAttrPtr attr = cur->properties;
102 int http;
103 const xmlChar *value;
104
105 content = NULL;
106 http = 0;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
111#ifndef XML_USE_BUFFER_CONTENT
112 value = attr->children->content;
113#else
114 value = xmlBufferContent(attr->children->content);
115#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000116 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
117 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000118 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000119 else if ((value != NULL)
120 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000121 content = value;
122 if ((http != 0) && (content != NULL))
123 goto found_content;
124 }
125 attr = attr->next;
126 }
127 }
128 }
129 cur = cur->next;
130 }
131 return(NULL);
132
133found_content:
134 encoding = xmlStrstr(content, BAD_CAST"charset=");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"Charset=");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
139 if (encoding != NULL) {
140 encoding += 8;
141 } else {
142 encoding = xmlStrstr(content, BAD_CAST"charset =");
143 if (encoding == NULL)
144 encoding = xmlStrstr(content, BAD_CAST"Charset =");
145 if (encoding == NULL)
146 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
147 if (encoding != NULL)
148 encoding += 9;
149 }
150 if (encoding != NULL) {
151 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
152 }
153 return(encoding);
154}
155
156/**
157 * htmlSetMetaEncoding:
158 * @doc: the document
159 * @encoding: the encoding string
160 *
161 * Sets the current encoding in the Meta tags
162 * NOTE: this will not change the document content encoding, just
163 * the META flag associated.
164 *
165 * Returns 0 in case of success and -1 in case of error
166 */
167int
168htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
169 htmlNodePtr cur, meta;
170 const xmlChar *content;
171 char newcontent[100];
172
173
174 if (doc == NULL)
175 return(-1);
176
177 if (encoding != NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +0000178#ifdef HAVE_SNPRINTF
179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
180 encoding);
181#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000182 sprintf(newcontent, "text/html; charset=%s", encoding);
Daniel Veillard39c7d712000-09-10 16:14:55 +0000183#endif
184 newcontent[sizeof(newcontent) - 1] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000185 }
186
187 cur = doc->children;
188
189 /*
190 * Search the html
191 */
192 while (cur != NULL) {
193 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000194 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000195 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000196 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000197 if (encoding == NULL)
198 return(0);
199 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
200 xmlAddPrevSibling(cur, meta);
201 cur = meta;
202 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
203 xmlAddChild(cur, meta);
204 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
205 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
206 return(0);
207 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000208 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000209 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000210 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000211 goto found_meta;
212 }
213 cur = cur->next;
214 }
215 if (cur == NULL)
216 return(-1);
217 cur = cur->children;
218
219 /*
220 * Search the head
221 */
222 while (cur != NULL) {
223 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000224 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000225 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000226 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000227 if (encoding == NULL)
228 return(0);
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
230 xmlAddPrevSibling(cur, meta);
231 cur = meta;
232 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
233 xmlAddChild(cur, meta);
234 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
235 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
236 return(0);
237 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000238 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000239 goto found_meta;
240 }
241 cur = cur->next;
242 }
243 if (cur == NULL)
244 return(-1);
245found_head:
246 if (cur->children == NULL) {
247 if (encoding == NULL)
248 return(0);
249 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
250 xmlAddChild(cur, meta);
251 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
252 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
253 return(0);
254 }
255 cur = cur->children;
256
257found_meta:
258 if (encoding != NULL) {
259 /*
260 * Create a new Meta element with the right aatributes
261 */
262
263 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
264 xmlAddPrevSibling(cur, meta);
265 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
266 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
267 }
268
269 /*
270 * Search and destroy all the remaining the meta elements carrying
271 * encoding informations
272 */
273 while (cur != NULL) {
274 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000275 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000276 xmlAttrPtr attr = cur->properties;
277 int http;
278 const xmlChar *value;
279
280 content = NULL;
281 http = 0;
282 while (attr != NULL) {
283 if ((attr->children != NULL) &&
284 (attr->children->type == XML_TEXT_NODE) &&
285 (attr->children->next == NULL)) {
286#ifndef XML_USE_BUFFER_CONTENT
287 value = attr->children->content;
288#else
289 value = xmlBufferContent(attr->children->content);
290#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000291 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
292 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000293 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000294 else if ((value != NULL)
295 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000296 content = value;
297 if ((http != 0) && (content != NULL))
298 break;
299 }
300 attr = attr->next;
301 }
302 if ((http != 0) && (content != NULL)) {
303 meta = cur;
304 cur = cur->next;
305 xmlUnlinkNode(meta);
306 xmlFreeNode(meta);
307 continue;
308 }
309
310 }
311 }
312 cur = cur->next;
313 }
314 return(0);
315}
316
317/************************************************************************
318 * *
319 * Dumping HTML tree content to a simple buffer *
320 * *
321 ************************************************************************/
322
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000323static void
324htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
325
Daniel Veillard167b5091999-07-07 04:19:20 +0000326/**
327 * htmlDtdDump:
328 * @buf: the HTML buffer output
329 * @doc: the document
330 *
331 * Dump the HTML document DTD, if any.
332 */
333static void
334htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
335 xmlDtdPtr cur = doc->intSubset;
336
337 if (cur == NULL) {
338 fprintf(stderr, "htmlDtdDump : no internal subset\n");
339 return;
340 }
341 xmlBufferWriteChar(buf, "<!DOCTYPE ");
342 xmlBufferWriteCHAR(buf, cur->name);
343 if (cur->ExternalID != NULL) {
344 xmlBufferWriteChar(buf, " PUBLIC ");
345 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000346 if (cur->SystemID != NULL) {
347 xmlBufferWriteChar(buf, " ");
348 xmlBufferWriteQuotedString(buf, cur->SystemID);
349 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000350 } else if (cur->SystemID != NULL) {
351 xmlBufferWriteChar(buf, " SYSTEM ");
352 xmlBufferWriteQuotedString(buf, cur->SystemID);
353 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000354 xmlBufferWriteChar(buf, ">\n");
355}
356
357/**
358 * htmlAttrDump:
359 * @buf: the HTML buffer output
360 * @doc: the document
361 * @cur: the attribute pointer
362 *
363 * Dump an HTML attribute
364 */
365static void
366htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000367 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +0000368
369 if (cur == NULL) {
370 fprintf(stderr, "htmlAttrDump : property == NULL\n");
371 return;
372 }
373 xmlBufferWriteChar(buf, " ");
374 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +0000375 if (cur->children != NULL) {
376 value = xmlNodeListGetString(doc, cur->children, 0);
377 if (value) {
378 xmlBufferWriteChar(buf, "=");
379 xmlBufferWriteQuotedString(buf, value);
380 xmlFree(value);
381 } else {
382 xmlBufferWriteChar(buf, "=\"\"");
383 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000384 }
385}
386
387/**
388 * htmlAttrListDump:
389 * @buf: the HTML buffer output
390 * @doc: the document
391 * @cur: the first attribute pointer
392 *
393 * Dump a list of HTML attributes
394 */
395static void
396htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
397 if (cur == NULL) {
398 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
399 return;
400 }
401 while (cur != NULL) {
402 htmlAttrDump(buf, doc, cur);
403 cur = cur->next;
404 }
405}
406
407
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000408void
Daniel Veillard82150d81999-07-07 07:32:15 +0000409htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000410/**
411 * htmlNodeListDump:
412 * @buf: the HTML buffer output
413 * @doc: the document
414 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000415 *
416 * Dump an HTML node list, recursive behaviour,children are printed too.
417 */
418static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000419htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000420 if (cur == NULL) {
421 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
422 return;
423 }
424 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000425 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000426 cur = cur->next;
427 }
428}
429
430/**
431 * htmlNodeDump:
432 * @buf: the HTML buffer output
433 * @doc: the document
434 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000435 *
436 * Dump an HTML node, recursive behaviour,children are printed too.
437 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000438void
Daniel Veillard82150d81999-07-07 07:32:15 +0000439htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000440 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000441
442 if (cur == NULL) {
443 fprintf(stderr, "htmlNodeDump : node == NULL\n");
444 return;
445 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000446 /*
447 * Special cases.
448 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000449 if (cur->type == XML_DTD_NODE)
450 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000451 if (cur->type == XML_HTML_DOCUMENT_NODE) {
452 htmlDocContentDump(buf, (xmlDocPtr) cur);
453 return;
454 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000455 if (cur->type == HTML_TEXT_NODE) {
456 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000457 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000458
Daniel Veillardd293fd11999-12-01 09:51:45 +0000459#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000460 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000461#else
462 buffer = xmlEncodeEntitiesReentrant(doc,
463 xmlBufferContent(cur->content));
464#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000465 if (buffer != NULL) {
466 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000467 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000468 }
469 }
470 return;
471 }
472 if (cur->type == HTML_COMMENT_NODE) {
473 if (cur->content != NULL) {
474 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000475#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000476 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000477#else
478 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
479#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000480 xmlBufferWriteChar(buf, "-->");
481 }
482 return;
483 }
484 if (cur->type == HTML_ENTITY_REF_NODE) {
485 xmlBufferWriteChar(buf, "&");
486 xmlBufferWriteCHAR(buf, cur->name);
487 xmlBufferWriteChar(buf, ";");
488 return;
489 }
490
Daniel Veillard82150d81999-07-07 07:32:15 +0000491 /*
492 * Get specific HTmL info for taht node.
493 */
494 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000495
Daniel Veillard82150d81999-07-07 07:32:15 +0000496 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000497 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000498 if (cur->properties != NULL)
499 htmlAttrListDump(buf, doc, cur->properties);
500
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000501 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000502 xmlBufferWriteChar(buf, ">");
503 if (cur->next != NULL) {
504 if ((cur->next->type != HTML_TEXT_NODE) &&
505 (cur->next->type != HTML_ENTITY_REF_NODE))
506 xmlBufferWriteChar(buf, "\n");
507 }
508 return;
509 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000510 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000511 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000512 xmlBufferWriteChar(buf, ">");
513 else {
514 xmlBufferWriteChar(buf, "></");
515 xmlBufferWriteCHAR(buf, cur->name);
516 xmlBufferWriteChar(buf, ">");
517 }
518 if (cur->next != NULL) {
519 if ((cur->next->type != HTML_TEXT_NODE) &&
520 (cur->next->type != HTML_ENTITY_REF_NODE))
521 xmlBufferWriteChar(buf, "\n");
522 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000523 return;
524 }
525 xmlBufferWriteChar(buf, ">");
526 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000527 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000528
Daniel Veillardd293fd11999-12-01 09:51:45 +0000529#ifndef XML_USE_BUFFER_CONTENT
530 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
531#else
532 buffer = xmlEncodeEntitiesReentrant(doc,
533 xmlBufferContent(cur->content));
534#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000535 if (buffer != NULL) {
536 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000537 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000538 }
539 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000540 if (cur->children != NULL) {
541 if ((cur->children->type != HTML_TEXT_NODE) &&
542 (cur->children->type != HTML_ENTITY_REF_NODE) &&
543 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000544 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000545 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000546 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000547 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000548 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000549 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000550 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000551 if (!htmlIsAutoClosed(doc, cur)) {
552 xmlBufferWriteChar(buf, "</");
553 xmlBufferWriteCHAR(buf, cur->name);
554 xmlBufferWriteChar(buf, ">");
555 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000556 if (cur->next != NULL) {
557 if ((cur->next->type != HTML_TEXT_NODE) &&
558 (cur->next->type != HTML_ENTITY_REF_NODE))
559 xmlBufferWriteChar(buf, "\n");
560 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000561}
562
563/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000564 * htmlNodeDumpFile:
565 * @out: the FILE pointer
566 * @doc: the document
567 * @cur: the current node
568 *
569 * Dump an HTML node, recursive behaviour,children are printed too.
570 */
571void
572htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
573 xmlBufferPtr buf;
574
575 buf = xmlBufferCreate();
576 if (buf == NULL) return;
577 htmlNodeDump(buf, doc, cur);
578 xmlBufferDump(out, buf);
579 xmlBufferFree(buf);
580}
581
582/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000583 * htmlDocContentDump:
584 * @buf: the HTML buffer output
585 * @cur: the document
586 *
587 * Dump an HTML document.
588 */
589static void
590htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000591 int type;
592
593 /*
594 * force to output the stuff as HTML, especially for entities
595 */
596 type = cur->type;
597 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000598 if (cur->intSubset != NULL)
599 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000600 else {
601 /* Default to HTML-4.0 transitionnal @@@@ */
602 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
603
604 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000605 if (cur->children != NULL) {
606 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000607 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000608 xmlBufferWriteChar(buf, "\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000609 cur->type = (xmlElementType) type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000610}
611
612/**
613 * htmlDocDumpMemory:
614 * @cur: the document
615 * @mem: OUT: the memory pointer
616 * @size: OUT: the memory lenght
617 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000618 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000619 * It's up to the caller to free the memory.
620 */
621void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000622htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000623 xmlBufferPtr buf;
624
625 if (cur == NULL) {
626#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000627 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000628#endif
629 *mem = NULL;
630 *size = 0;
631 return;
632 }
633 buf = xmlBufferCreate();
634 if (buf == NULL) {
635 *mem = NULL;
636 *size = 0;
637 return;
638 }
639 htmlDocContentDump(buf, cur);
640 *mem = buf->content;
641 *size = buf->use;
642 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000643 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000644}
645
646
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000647/************************************************************************
648 * *
649 * Dumping HTML tree content to an I/O output buffer *
650 * *
651 ************************************************************************/
652
653static void
654htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
655
656/**
657 * htmlDtdDump:
658 * @buf: the HTML buffer output
659 * @doc: the document
660 *
661 * Dump the HTML document DTD, if any.
662 */
663static void
664htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
665 xmlDtdPtr cur = doc->intSubset;
666
667 if (cur == NULL) {
668 fprintf(stderr, "htmlDtdDump : no internal subset\n");
669 return;
670 }
671 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
672 xmlOutputBufferWriteString(buf, (const char *)cur->name);
673 if (cur->ExternalID != NULL) {
674 xmlOutputBufferWriteString(buf, " PUBLIC ");
675 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
676 if (cur->SystemID != NULL) {
677 xmlOutputBufferWriteString(buf, " ");
678 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
679 }
680 } else if (cur->SystemID != NULL) {
681 xmlOutputBufferWriteString(buf, " SYSTEM ");
682 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
683 }
684 xmlOutputBufferWriteString(buf, ">\n");
685}
686
687/**
688 * htmlAttrDump:
689 * @buf: the HTML buffer output
690 * @doc: the document
691 * @cur: the attribute pointer
692 *
693 * Dump an HTML attribute
694 */
695static void
696htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
697 xmlChar *value;
698
699 if (cur == NULL) {
700 fprintf(stderr, "htmlAttrDump : property == NULL\n");
701 return;
702 }
703 xmlOutputBufferWriteString(buf, " ");
704 xmlOutputBufferWriteString(buf, (const char *)cur->name);
705 if (cur->children != NULL) {
706 value = xmlNodeListGetString(doc, cur->children, 0);
707 if (value) {
708 xmlOutputBufferWriteString(buf, "=");
709 xmlBufferWriteQuotedString(buf->buffer, value);
710 xmlFree(value);
711 } else {
712 xmlOutputBufferWriteString(buf, "=\"\"");
713 }
714 }
715}
716
717/**
718 * htmlAttrListDump:
719 * @buf: the HTML buffer output
720 * @doc: the document
721 * @cur: the first attribute pointer
722 *
723 * Dump a list of HTML attributes
724 */
725static void
726htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
727 if (cur == NULL) {
728 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
729 return;
730 }
731 while (cur != NULL) {
732 htmlAttrDumpOutput(buf, doc, cur, encoding);
733 cur = cur->next;
734 }
735}
736
737
738void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
739 xmlNodePtr cur, const char *encoding);
740
741/**
742 * htmlNodeListDump:
743 * @buf: the HTML buffer output
744 * @doc: the document
745 * @cur: the first node
746 *
747 * Dump an HTML node list, recursive behaviour,children are printed too.
748 */
749static void
750htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
751 if (cur == NULL) {
752 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
753 return;
754 }
755 while (cur != NULL) {
756 htmlNodeDumpOutput(buf, doc, cur, encoding);
757 cur = cur->next;
758 }
759}
760
761/**
762 * htmlNodeDump:
763 * @buf: the HTML buffer output
764 * @doc: the document
765 * @cur: the current node
766 *
767 * Dump an HTML node, recursive behaviour,children are printed too.
768 */
769void
770htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
771 htmlElemDescPtr info;
772
773 if (cur == NULL) {
774 fprintf(stderr, "htmlNodeDump : node == NULL\n");
775 return;
776 }
777 /*
778 * Special cases.
779 */
780 if (cur->type == XML_DTD_NODE)
781 return;
782 if (cur->type == XML_HTML_DOCUMENT_NODE) {
783 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
784 return;
785 }
786 if (cur->type == HTML_TEXT_NODE) {
787 if (cur->content != NULL) {
788 xmlChar *buffer;
789
790#ifndef XML_USE_BUFFER_CONTENT
791 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
792#else
793 buffer = xmlEncodeEntitiesReentrant(doc,
794 xmlBufferContent(cur->content));
795#endif
796 if (buffer != NULL) {
797 xmlOutputBufferWriteString(buf, (const char *)buffer);
798 xmlFree(buffer);
799 }
800 }
801 return;
802 }
803 if (cur->type == HTML_COMMENT_NODE) {
804 if (cur->content != NULL) {
805 xmlOutputBufferWriteString(buf, "<!--");
806#ifndef XML_USE_BUFFER_CONTENT
807 xmlOutputBufferWriteString(buf, (const char *)cur->content);
808#else
809 xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
810#endif
811 xmlOutputBufferWriteString(buf, "-->");
812 }
813 return;
814 }
815 if (cur->type == HTML_ENTITY_REF_NODE) {
816 xmlOutputBufferWriteString(buf, "&");
817 xmlOutputBufferWriteString(buf, (const char *)cur->name);
818 xmlOutputBufferWriteString(buf, ";");
819 return;
820 }
Daniel Veillard7eda8452000-10-14 23:38:43 +0000821 if (cur->type == HTML_PRESERVE_NODE) {
822 if (cur->content != NULL) {
823#ifndef XML_USE_BUFFER_CONTENT
824 xmlOutputBufferWriteString(buf, (const char *)cur->content);
825#else
826 xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
827#endif
828 }
829 return;
830 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000831
832 /*
833 * Get specific HTmL info for taht node.
834 */
835 info = htmlTagLookup(cur->name);
836
837 xmlOutputBufferWriteString(buf, "<");
838 xmlOutputBufferWriteString(buf, (const char *)cur->name);
839 if (cur->properties != NULL)
840 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
841
842 if ((info != NULL) && (info->empty)) {
843 xmlOutputBufferWriteString(buf, ">");
844 if (cur->next != NULL) {
845 if ((cur->next->type != HTML_TEXT_NODE) &&
846 (cur->next->type != HTML_ENTITY_REF_NODE))
847 xmlOutputBufferWriteString(buf, "\n");
848 }
849 return;
850 }
851 if ((cur->content == NULL) && (cur->children == NULL)) {
852 if ((info != NULL) && (info->endTag != 0))
853 xmlOutputBufferWriteString(buf, ">");
854 else {
855 xmlOutputBufferWriteString(buf, "></");
856 xmlOutputBufferWriteString(buf, (const char *)cur->name);
857 xmlOutputBufferWriteString(buf, ">");
858 }
859 if (cur->next != NULL) {
860 if ((cur->next->type != HTML_TEXT_NODE) &&
861 (cur->next->type != HTML_ENTITY_REF_NODE))
862 xmlOutputBufferWriteString(buf, "\n");
863 }
864 return;
865 }
866 xmlOutputBufferWriteString(buf, ">");
867 if (cur->content != NULL) {
868#if 0
869 xmlChar *buffer;
870
871#ifndef XML_USE_BUFFER_CONTENT
872 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
873#else
874 buffer = xmlEncodeEntitiesReentrant(doc,
875 xmlBufferContent(cur->content));
876#endif
877 if (buffer != NULL) {
878 xmlOutputBufferWriteString(buf, buffer);
879 xmlFree(buffer);
880 }
881#else
882 /*
883 * Uses the OutputBuffer property to automatically convert
884 * invalids to charrefs
885 */
886
887#ifndef XML_USE_BUFFER_CONTENT
888 xmlOutputBufferWriteString(buf, (const char *) cur->content);
889#else
890 xmlOutputBufferWriteString(buf,
891 (const char *) xmlBufferContent(cur->content));
892#endif
893#endif
894 }
895 if (cur->children != NULL) {
896 if ((cur->children->type != HTML_TEXT_NODE) &&
897 (cur->children->type != HTML_ENTITY_REF_NODE) &&
898 (cur->children != cur->last))
899 xmlOutputBufferWriteString(buf, "\n");
900 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
901 if ((cur->last->type != HTML_TEXT_NODE) &&
902 (cur->last->type != HTML_ENTITY_REF_NODE) &&
903 (cur->children != cur->last))
904 xmlOutputBufferWriteString(buf, "\n");
905 }
906 if (!htmlIsAutoClosed(doc, cur)) {
907 xmlOutputBufferWriteString(buf, "</");
908 xmlOutputBufferWriteString(buf, (const char *)cur->name);
909 xmlOutputBufferWriteString(buf, ">");
910 }
911 if (cur->next != NULL) {
912 if ((cur->next->type != HTML_TEXT_NODE) &&
913 (cur->next->type != HTML_ENTITY_REF_NODE))
914 xmlOutputBufferWriteString(buf, "\n");
915 }
916}
917
918/**
919 * htmlDocContentDump:
920 * @buf: the HTML buffer output
921 * @cur: the document
922 *
923 * Dump an HTML document.
924 */
925static void
926htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
927 int type;
928
929 /*
930 * force to output the stuff as HTML, especially for entities
931 */
932 type = cur->type;
933 cur->type = XML_HTML_DOCUMENT_NODE;
934 if (cur->intSubset != NULL)
935 htmlDtdDumpOutput(buf, cur, NULL);
936 else {
937 /* Default to HTML-4.0 transitionnal @@@@ */
938 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
939
940 }
941 if (cur->children != NULL) {
942 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
943 }
944 xmlOutputBufferWriteString(buf, "\n");
945 cur->type = (xmlElementType) type;
946}
947
948
949/************************************************************************
950 * *
951 * Saving functions front-ends *
952 * *
953 ************************************************************************/
954
Daniel Veillard167b5091999-07-07 04:19:20 +0000955/**
956 * htmlDocDump:
957 * @f: the FILE*
958 * @cur: the document
959 *
960 * Dump an HTML document to an open FILE.
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000961 *
962 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard167b5091999-07-07 04:19:20 +0000963 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000964int
Daniel Veillard167b5091999-07-07 04:19:20 +0000965htmlDocDump(FILE *f, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000966 xmlOutputBufferPtr buf;
967 xmlCharEncodingHandlerPtr handler = NULL;
968 const char *encoding;
969 int ret;
Daniel Veillard167b5091999-07-07 04:19:20 +0000970
971 if (cur == NULL) {
972#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000973 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000974#endif
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000975 return(-1);
Daniel Veillard167b5091999-07-07 04:19:20 +0000976 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000977
978 encoding = (const char *) htmlGetMetaEncoding(cur);
979
980 if (encoding != NULL) {
981 xmlCharEncoding enc;
982
983 enc = xmlParseCharEncoding(encoding);
984 if (enc != cur->charset) {
985 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
986 /*
987 * Not supported yet
988 */
989 return(-1);
990 }
991
992 handler = xmlFindCharEncodingHandler(encoding);
993 if (handler == NULL)
994 return(-1);
995 }
996 }
997
998 /*
999 * Fallback to HTML or ASCII when the encoding is unspecified
1000 */
1001 if (handler == NULL)
1002 handler = xmlFindCharEncodingHandler("HTML");
1003 if (handler == NULL)
1004 handler = xmlFindCharEncodingHandler("ascii");
1005
1006 buf = xmlOutputBufferCreateFile(f, handler);
1007 if (buf == NULL) return(-1);
1008 htmlDocContentDumpOutput(buf, cur, NULL);
1009
1010 ret = xmlOutputBufferClose(buf);
1011 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001012}
1013
1014/**
1015 * htmlSaveFile:
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001016 * @filename: the filename (or URL)
Daniel Veillard167b5091999-07-07 04:19:20 +00001017 * @cur: the document
1018 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001019 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1020 * used.
Daniel Veillard167b5091999-07-07 04:19:20 +00001021 * returns: the number of byte written or -1 in case of failure.
1022 */
1023int
1024htmlSaveFile(const char *filename, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001025 xmlOutputBufferPtr buf;
1026 xmlCharEncodingHandlerPtr handler = NULL;
1027 const char *encoding;
Daniel Veillard167b5091999-07-07 04:19:20 +00001028 int ret;
1029
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001030 encoding = (const char *) htmlGetMetaEncoding(cur);
1031
1032 if (encoding != NULL) {
1033 xmlCharEncoding enc;
1034
1035 enc = xmlParseCharEncoding(encoding);
1036 if (enc != cur->charset) {
1037 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1038 /*
1039 * Not supported yet
1040 */
1041 return(-1);
1042 }
1043
1044 handler = xmlFindCharEncodingHandler(encoding);
1045 if (handler == NULL)
1046 return(-1);
1047 }
1048 }
1049
1050 /*
1051 * Fallback to HTML or ASCII when the encoding is unspecified
1052 */
1053 if (handler == NULL)
1054 handler = xmlFindCharEncodingHandler("HTML");
1055 if (handler == NULL)
1056 handler = xmlFindCharEncodingHandler("ascii");
1057
Daniel Veillard167b5091999-07-07 04:19:20 +00001058 /*
1059 * save the content to a temp buffer.
1060 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001061 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
Daniel Veillard167b5091999-07-07 04:19:20 +00001062 if (buf == NULL) return(0);
Daniel Veillard167b5091999-07-07 04:19:20 +00001063
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001064 htmlDocContentDumpOutput(buf, cur, NULL);
Daniel Veillard167b5091999-07-07 04:19:20 +00001065
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001066 ret = xmlOutputBufferClose(buf);
1067 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001068}
1069
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001070/**
1071 * htmlSaveFileEnc:
1072 * @filename: the filename
1073 * @cur: the document
1074 *
1075 * Dump an HTML document to a file using a given encoding.
1076 *
1077 * returns: the number of byte written or -1 in case of failure.
1078 */
1079int
1080htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1081 xmlOutputBufferPtr buf;
1082 xmlCharEncodingHandlerPtr handler = NULL;
1083 int ret;
1084
1085 if (encoding != NULL) {
1086 xmlCharEncoding enc;
1087
1088 enc = xmlParseCharEncoding(encoding);
1089 if (enc != cur->charset) {
1090 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1091 /*
1092 * Not supported yet
1093 */
1094 return(-1);
1095 }
1096
1097 handler = xmlFindCharEncodingHandler(encoding);
1098 if (handler == NULL)
1099 return(-1);
1100 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1101 }
1102 }
1103
1104 /*
1105 * Fallback to HTML or ASCII when the encoding is unspecified
1106 */
1107 if (handler == NULL)
1108 handler = xmlFindCharEncodingHandler("HTML");
1109 if (handler == NULL)
1110 handler = xmlFindCharEncodingHandler("ascii");
1111
1112 /*
1113 * save the content to a temp buffer.
1114 */
1115 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1116 if (buf == NULL) return(0);
1117
1118 htmlDocContentDumpOutput(buf, cur, encoding);
1119
1120 ret = xmlOutputBufferClose(buf);
1121 return(ret);
1122}
Daniel Veillard361d8452000-04-03 19:48:13 +00001123#endif /* LIBXML_HTML_ENABLED */