blob: 9ce689717cf2036e11aaea7d5f4ad390314981e5 [file] [log] [blame]
Daniel Veillard167b5091999-07-07 04:19:20 +00001/*
2 * HTMLtree.c : implemetation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#ifdef WIN32
11#include "win32config.h"
12#else
Daniel Veillard167b5091999-07-07 04:19:20 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillard361d8452000-04-03 19:48:13 +000015
Daniel Veillardb71379b2000-10-09 12:30:39 +000016#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000017#ifdef LIBXML_HTML_ENABLED
18
Daniel Veillard167b5091999-07-07 04:19:20 +000019#include <stdio.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000020#include <string.h> /* for memset() only ! */
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#ifdef HAVE_CTYPE_H
23#include <ctype.h>
24#endif
25#ifdef HAVE_STDLIB_H
26#include <stdlib.h>
27#endif
28
Daniel Veillard361d8452000-04-03 19:48:13 +000029#include <libxml/xmlmemory.h>
30#include <libxml/HTMLparser.h>
31#include <libxml/HTMLtree.h>
32#include <libxml/entities.h>
33#include <libxml/valid.h>
Daniel Veillard167b5091999-07-07 04:19:20 +000034
Daniel Veillard32bc74e2000-07-14 14:49:25 +000035/************************************************************************
36 * *
37 * Getting/Setting encoding meta tags *
38 * *
39 ************************************************************************/
40
41/**
42 * htmlGetMetaEncoding:
43 * @doc: the document
44 *
45 * Encoding definition lookup in the Meta tags
46 *
47 * Returns the current encoding as flagged in the HTML source
48 */
49const xmlChar *
50htmlGetMetaEncoding(htmlDocPtr doc) {
51 htmlNodePtr cur;
52 const xmlChar *content;
53 const xmlChar *encoding;
54
55 if (doc == NULL)
56 return(NULL);
57 cur = doc->children;
58
59 /*
60 * Search the html
61 */
62 while (cur != NULL) {
63 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000064 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000065 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000066 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000067 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000068 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000069 goto found_meta;
70 }
71 cur = cur->next;
72 }
73 if (cur == NULL)
74 return(NULL);
75 cur = cur->children;
76
77 /*
78 * Search the head
79 */
80 while (cur != NULL) {
81 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +000082 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000083 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +000084 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +000085 goto found_meta;
86 }
87 cur = cur->next;
88 }
89 if (cur == NULL)
90 return(NULL);
91found_head:
92 cur = cur->children;
93
94 /*
95 * Search the meta elements
96 */
97found_meta:
98 while (cur != NULL) {
99 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000100 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000101 xmlAttrPtr attr = cur->properties;
102 int http;
103 const xmlChar *value;
104
105 content = NULL;
106 http = 0;
107 while (attr != NULL) {
108 if ((attr->children != NULL) &&
109 (attr->children->type == XML_TEXT_NODE) &&
110 (attr->children->next == NULL)) {
111#ifndef XML_USE_BUFFER_CONTENT
112 value = attr->children->content;
113#else
114 value = xmlBufferContent(attr->children->content);
115#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000116 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
117 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000118 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000119 else if ((value != NULL)
120 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000121 content = value;
122 if ((http != 0) && (content != NULL))
123 goto found_content;
124 }
125 attr = attr->next;
126 }
127 }
128 }
129 cur = cur->next;
130 }
131 return(NULL);
132
133found_content:
134 encoding = xmlStrstr(content, BAD_CAST"charset=");
135 if (encoding == NULL)
136 encoding = xmlStrstr(content, BAD_CAST"Charset=");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
139 if (encoding != NULL) {
140 encoding += 8;
141 } else {
142 encoding = xmlStrstr(content, BAD_CAST"charset =");
143 if (encoding == NULL)
144 encoding = xmlStrstr(content, BAD_CAST"Charset =");
145 if (encoding == NULL)
146 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
147 if (encoding != NULL)
148 encoding += 9;
149 }
150 if (encoding != NULL) {
151 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
152 }
153 return(encoding);
154}
155
156/**
157 * htmlSetMetaEncoding:
158 * @doc: the document
159 * @encoding: the encoding string
160 *
161 * Sets the current encoding in the Meta tags
162 * NOTE: this will not change the document content encoding, just
163 * the META flag associated.
164 *
165 * Returns 0 in case of success and -1 in case of error
166 */
167int
168htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
169 htmlNodePtr cur, meta;
170 const xmlChar *content;
171 char newcontent[100];
172
173
174 if (doc == NULL)
175 return(-1);
176
177 if (encoding != NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +0000178#ifdef HAVE_SNPRINTF
179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
180 encoding);
181#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000182 sprintf(newcontent, "text/html; charset=%s", encoding);
Daniel Veillard39c7d712000-09-10 16:14:55 +0000183#endif
184 newcontent[sizeof(newcontent) - 1] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000185 }
186
187 cur = doc->children;
188
189 /*
190 * Search the html
191 */
192 while (cur != NULL) {
193 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000194 if (xmlStrEqual(cur->name, BAD_CAST"html"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000195 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000196 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000197 if (encoding == NULL)
198 return(0);
199 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
200 xmlAddPrevSibling(cur, meta);
201 cur = meta;
202 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
203 xmlAddChild(cur, meta);
204 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
205 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
206 return(0);
207 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000208 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000209 goto found_head;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000210 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000211 goto found_meta;
212 }
213 cur = cur->next;
214 }
215 if (cur == NULL)
216 return(-1);
217 cur = cur->children;
218
219 /*
220 * Search the head
221 */
222 while (cur != NULL) {
223 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000224 if (xmlStrEqual(cur->name, BAD_CAST"head"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000225 break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000226 if (xmlStrEqual(cur->name, BAD_CAST"body")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000227 if (encoding == NULL)
228 return(0);
229 meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
230 xmlAddPrevSibling(cur, meta);
231 cur = meta;
232 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
233 xmlAddChild(cur, meta);
234 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
235 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
236 return(0);
237 }
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000238 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000239 goto found_meta;
240 }
241 cur = cur->next;
242 }
243 if (cur == NULL)
244 return(-1);
245found_head:
246 if (cur->children == NULL) {
247 if (encoding == NULL)
248 return(0);
249 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
250 xmlAddChild(cur, meta);
251 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
252 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
253 return(0);
254 }
255 cur = cur->children;
256
257found_meta:
258 if (encoding != NULL) {
259 /*
260 * Create a new Meta element with the right aatributes
261 */
262
263 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
264 xmlAddPrevSibling(cur, meta);
265 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
266 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
267 }
268
269 /*
270 * Search and destroy all the remaining the meta elements carrying
271 * encoding informations
272 */
273 while (cur != NULL) {
274 if (cur->name != NULL) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +0000275 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000276 xmlAttrPtr attr = cur->properties;
277 int http;
278 const xmlChar *value;
279
280 content = NULL;
281 http = 0;
282 while (attr != NULL) {
283 if ((attr->children != NULL) &&
284 (attr->children->type == XML_TEXT_NODE) &&
285 (attr->children->next == NULL)) {
286#ifndef XML_USE_BUFFER_CONTENT
287 value = attr->children->content;
288#else
289 value = xmlBufferContent(attr->children->content);
290#endif
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000291 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
292 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000293 http = 1;
Daniel Veillardb656ebe2000-09-22 13:51:48 +0000294 else if ((value != NULL)
295 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000296 content = value;
297 if ((http != 0) && (content != NULL))
298 break;
299 }
300 attr = attr->next;
301 }
302 if ((http != 0) && (content != NULL)) {
303 meta = cur;
304 cur = cur->next;
305 xmlUnlinkNode(meta);
306 xmlFreeNode(meta);
307 continue;
308 }
309
310 }
311 }
312 cur = cur->next;
313 }
314 return(0);
315}
316
317/************************************************************************
318 * *
319 * Dumping HTML tree content to a simple buffer *
320 * *
321 ************************************************************************/
322
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000323static void
324htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
325
Daniel Veillard167b5091999-07-07 04:19:20 +0000326/**
327 * htmlDtdDump:
328 * @buf: the HTML buffer output
329 * @doc: the document
330 *
331 * Dump the HTML document DTD, if any.
332 */
333static void
334htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
335 xmlDtdPtr cur = doc->intSubset;
336
337 if (cur == NULL) {
338 fprintf(stderr, "htmlDtdDump : no internal subset\n");
339 return;
340 }
341 xmlBufferWriteChar(buf, "<!DOCTYPE ");
342 xmlBufferWriteCHAR(buf, cur->name);
343 if (cur->ExternalID != NULL) {
344 xmlBufferWriteChar(buf, " PUBLIC ");
345 xmlBufferWriteQuotedString(buf, cur->ExternalID);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000346 if (cur->SystemID != NULL) {
347 xmlBufferWriteChar(buf, " ");
348 xmlBufferWriteQuotedString(buf, cur->SystemID);
349 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000350 } else if (cur->SystemID != NULL) {
351 xmlBufferWriteChar(buf, " SYSTEM ");
352 xmlBufferWriteQuotedString(buf, cur->SystemID);
353 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000354 xmlBufferWriteChar(buf, ">\n");
355}
356
357/**
358 * htmlAttrDump:
359 * @buf: the HTML buffer output
360 * @doc: the document
361 * @cur: the attribute pointer
362 *
363 * Dump an HTML attribute
364 */
365static void
366htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000367 xmlChar *value;
Daniel Veillard167b5091999-07-07 04:19:20 +0000368
369 if (cur == NULL) {
370 fprintf(stderr, "htmlAttrDump : property == NULL\n");
371 return;
372 }
373 xmlBufferWriteChar(buf, " ");
374 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillardbe803962000-06-28 23:40:59 +0000375 if (cur->children != NULL) {
376 value = xmlNodeListGetString(doc, cur->children, 0);
377 if (value) {
378 xmlBufferWriteChar(buf, "=");
379 xmlBufferWriteQuotedString(buf, value);
380 xmlFree(value);
381 } else {
382 xmlBufferWriteChar(buf, "=\"\"");
383 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000384 }
385}
386
387/**
388 * htmlAttrListDump:
389 * @buf: the HTML buffer output
390 * @doc: the document
391 * @cur: the first attribute pointer
392 *
393 * Dump a list of HTML attributes
394 */
395static void
396htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
397 if (cur == NULL) {
398 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
399 return;
400 }
401 while (cur != NULL) {
402 htmlAttrDump(buf, doc, cur);
403 cur = cur->next;
404 }
405}
406
407
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000408void
Daniel Veillard82150d81999-07-07 07:32:15 +0000409htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000410/**
411 * htmlNodeListDump:
412 * @buf: the HTML buffer output
413 * @doc: the document
414 * @cur: the first node
Daniel Veillard167b5091999-07-07 04:19:20 +0000415 *
416 * Dump an HTML node list, recursive behaviour,children are printed too.
417 */
418static void
Daniel Veillard82150d81999-07-07 07:32:15 +0000419htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000420 if (cur == NULL) {
421 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
422 return;
423 }
424 while (cur != NULL) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000425 htmlNodeDump(buf, doc, cur);
Daniel Veillard167b5091999-07-07 04:19:20 +0000426 cur = cur->next;
427 }
428}
429
430/**
431 * htmlNodeDump:
432 * @buf: the HTML buffer output
433 * @doc: the document
434 * @cur: the current node
Daniel Veillard167b5091999-07-07 04:19:20 +0000435 *
436 * Dump an HTML node, recursive behaviour,children are printed too.
437 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000438void
Daniel Veillard82150d81999-07-07 07:32:15 +0000439htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000440 htmlElemDescPtr info;
Daniel Veillard167b5091999-07-07 04:19:20 +0000441
442 if (cur == NULL) {
443 fprintf(stderr, "htmlNodeDump : node == NULL\n");
444 return;
445 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000446 /*
447 * Special cases.
448 */
Daniel Veillardd83eb822000-06-30 18:39:56 +0000449 if (cur->type == XML_DTD_NODE)
450 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000451 if (cur->type == XML_HTML_DOCUMENT_NODE) {
452 htmlDocContentDump(buf, (xmlDocPtr) cur);
453 return;
454 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000455 if (cur->type == HTML_TEXT_NODE) {
456 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000457 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000458
Daniel Veillardd293fd11999-12-01 09:51:45 +0000459#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000460 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000461#else
462 buffer = xmlEncodeEntitiesReentrant(doc,
463 xmlBufferContent(cur->content));
464#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000465 if (buffer != NULL) {
466 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000467 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000468 }
469 }
470 return;
471 }
472 if (cur->type == HTML_COMMENT_NODE) {
473 if (cur->content != NULL) {
474 xmlBufferWriteChar(buf, "<!--");
Daniel Veillardd293fd11999-12-01 09:51:45 +0000475#ifndef XML_USE_BUFFER_CONTENT
Daniel Veillard167b5091999-07-07 04:19:20 +0000476 xmlBufferWriteCHAR(buf, cur->content);
Daniel Veillardd293fd11999-12-01 09:51:45 +0000477#else
478 xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
479#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000480 xmlBufferWriteChar(buf, "-->");
481 }
482 return;
483 }
484 if (cur->type == HTML_ENTITY_REF_NODE) {
485 xmlBufferWriteChar(buf, "&");
486 xmlBufferWriteCHAR(buf, cur->name);
487 xmlBufferWriteChar(buf, ";");
488 return;
489 }
490
Daniel Veillard82150d81999-07-07 07:32:15 +0000491 /*
492 * Get specific HTmL info for taht node.
493 */
494 info = htmlTagLookup(cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000495
Daniel Veillard82150d81999-07-07 07:32:15 +0000496 xmlBufferWriteChar(buf, "<");
Daniel Veillard167b5091999-07-07 04:19:20 +0000497 xmlBufferWriteCHAR(buf, cur->name);
Daniel Veillard167b5091999-07-07 04:19:20 +0000498 if (cur->properties != NULL)
499 htmlAttrListDump(buf, doc, cur->properties);
500
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000501 if ((info != NULL) && (info->empty)) {
Daniel Veillard82150d81999-07-07 07:32:15 +0000502 xmlBufferWriteChar(buf, ">");
503 if (cur->next != NULL) {
504 if ((cur->next->type != HTML_TEXT_NODE) &&
505 (cur->next->type != HTML_ENTITY_REF_NODE))
506 xmlBufferWriteChar(buf, "\n");
507 }
508 return;
509 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000510 if ((cur->content == NULL) && (cur->children == NULL)) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000511 if ((info != NULL) && (info->endTag != 0))
Daniel Veillard82150d81999-07-07 07:32:15 +0000512 xmlBufferWriteChar(buf, ">");
513 else {
514 xmlBufferWriteChar(buf, "></");
515 xmlBufferWriteCHAR(buf, cur->name);
516 xmlBufferWriteChar(buf, ">");
517 }
518 if (cur->next != NULL) {
519 if ((cur->next->type != HTML_TEXT_NODE) &&
520 (cur->next->type != HTML_ENTITY_REF_NODE))
521 xmlBufferWriteChar(buf, "\n");
522 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000523 return;
524 }
525 xmlBufferWriteChar(buf, ">");
526 if (cur->content != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000527 xmlChar *buffer;
Daniel Veillard167b5091999-07-07 04:19:20 +0000528
Daniel Veillardd293fd11999-12-01 09:51:45 +0000529#ifndef XML_USE_BUFFER_CONTENT
530 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
531#else
532 buffer = xmlEncodeEntitiesReentrant(doc,
533 xmlBufferContent(cur->content));
534#endif
Daniel Veillard167b5091999-07-07 04:19:20 +0000535 if (buffer != NULL) {
536 xmlBufferWriteCHAR(buf, buffer);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000537 xmlFree(buffer);
Daniel Veillard167b5091999-07-07 04:19:20 +0000538 }
539 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000540 if (cur->children != NULL) {
541 if ((cur->children->type != HTML_TEXT_NODE) &&
542 (cur->children->type != HTML_ENTITY_REF_NODE) &&
543 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000544 xmlBufferWriteChar(buf, "\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000545 htmlNodeListDump(buf, doc, cur->children);
Daniel Veillard82150d81999-07-07 07:32:15 +0000546 if ((cur->last->type != HTML_TEXT_NODE) &&
Chris Lahey6dff2141999-12-01 09:51:45 +0000547 (cur->last->type != HTML_ENTITY_REF_NODE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +0000548 (cur->children != cur->last))
Daniel Veillard82150d81999-07-07 07:32:15 +0000549 xmlBufferWriteChar(buf, "\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000550 }
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000551 if (!htmlIsAutoClosed(doc, cur)) {
552 xmlBufferWriteChar(buf, "</");
553 xmlBufferWriteCHAR(buf, cur->name);
554 xmlBufferWriteChar(buf, ">");
555 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000556 if (cur->next != NULL) {
557 if ((cur->next->type != HTML_TEXT_NODE) &&
558 (cur->next->type != HTML_ENTITY_REF_NODE))
559 xmlBufferWriteChar(buf, "\n");
560 }
Daniel Veillard167b5091999-07-07 04:19:20 +0000561}
562
563/**
Daniel Veillard5feb8492000-02-02 17:15:36 +0000564 * htmlNodeDumpFile:
565 * @out: the FILE pointer
566 * @doc: the document
567 * @cur: the current node
568 *
569 * Dump an HTML node, recursive behaviour,children are printed too.
570 */
571void
572htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
573 xmlBufferPtr buf;
574
575 buf = xmlBufferCreate();
576 if (buf == NULL) return;
577 htmlNodeDump(buf, doc, cur);
578 xmlBufferDump(out, buf);
579 xmlBufferFree(buf);
580}
581
582/**
Daniel Veillard167b5091999-07-07 04:19:20 +0000583 * htmlDocContentDump:
584 * @buf: the HTML buffer output
585 * @cur: the document
586 *
587 * Dump an HTML document.
588 */
589static void
590htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000591 int type;
592
593 /*
594 * force to output the stuff as HTML, especially for entities
595 */
596 type = cur->type;
597 cur->type = XML_HTML_DOCUMENT_NODE;
Daniel Veillard167b5091999-07-07 04:19:20 +0000598 if (cur->intSubset != NULL)
599 htmlDtdDump(buf, cur);
Daniel Veillard5cb5ab81999-12-21 15:35:29 +0000600 else {
601 /* Default to HTML-4.0 transitionnal @@@@ */
602 xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
603
604 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000605 if (cur->children != NULL) {
606 htmlNodeListDump(buf, cur, cur->children);
Daniel Veillard167b5091999-07-07 04:19:20 +0000607 }
Daniel Veillard82150d81999-07-07 07:32:15 +0000608 xmlBufferWriteChar(buf, "\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000609 cur->type = (xmlElementType) type;
Daniel Veillard167b5091999-07-07 04:19:20 +0000610}
611
612/**
613 * htmlDocDumpMemory:
614 * @cur: the document
615 * @mem: OUT: the memory pointer
616 * @size: OUT: the memory lenght
617 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000618 * Dump an HTML document in memory and return the xmlChar * and it's size.
Daniel Veillard167b5091999-07-07 04:19:20 +0000619 * It's up to the caller to free the memory.
620 */
621void
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000622htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
Daniel Veillard167b5091999-07-07 04:19:20 +0000623 xmlBufferPtr buf;
624
625 if (cur == NULL) {
626#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000627 fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000628#endif
629 *mem = NULL;
630 *size = 0;
631 return;
632 }
633 buf = xmlBufferCreate();
634 if (buf == NULL) {
635 *mem = NULL;
636 *size = 0;
637 return;
638 }
639 htmlDocContentDump(buf, cur);
640 *mem = buf->content;
641 *size = buf->use;
642 memset(buf, -1, sizeof(xmlBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000643 xmlFree(buf);
Daniel Veillard167b5091999-07-07 04:19:20 +0000644}
645
646
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000647/************************************************************************
648 * *
649 * Dumping HTML tree content to an I/O output buffer *
650 * *
651 ************************************************************************/
652
653static void
654htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
655
656/**
657 * htmlDtdDump:
658 * @buf: the HTML buffer output
659 * @doc: the document
660 *
661 * Dump the HTML document DTD, if any.
662 */
663static void
664htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
665 xmlDtdPtr cur = doc->intSubset;
666
667 if (cur == NULL) {
668 fprintf(stderr, "htmlDtdDump : no internal subset\n");
669 return;
670 }
671 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
672 xmlOutputBufferWriteString(buf, (const char *)cur->name);
673 if (cur->ExternalID != NULL) {
674 xmlOutputBufferWriteString(buf, " PUBLIC ");
675 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
676 if (cur->SystemID != NULL) {
677 xmlOutputBufferWriteString(buf, " ");
678 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
679 }
680 } else if (cur->SystemID != NULL) {
681 xmlOutputBufferWriteString(buf, " SYSTEM ");
682 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
683 }
684 xmlOutputBufferWriteString(buf, ">\n");
685}
686
687/**
688 * htmlAttrDump:
689 * @buf: the HTML buffer output
690 * @doc: the document
691 * @cur: the attribute pointer
692 *
693 * Dump an HTML attribute
694 */
695static void
696htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
697 xmlChar *value;
698
699 if (cur == NULL) {
700 fprintf(stderr, "htmlAttrDump : property == NULL\n");
701 return;
702 }
703 xmlOutputBufferWriteString(buf, " ");
704 xmlOutputBufferWriteString(buf, (const char *)cur->name);
705 if (cur->children != NULL) {
706 value = xmlNodeListGetString(doc, cur->children, 0);
707 if (value) {
708 xmlOutputBufferWriteString(buf, "=");
709 xmlBufferWriteQuotedString(buf->buffer, value);
710 xmlFree(value);
711 } else {
712 xmlOutputBufferWriteString(buf, "=\"\"");
713 }
714 }
715}
716
717/**
718 * htmlAttrListDump:
719 * @buf: the HTML buffer output
720 * @doc: the document
721 * @cur: the first attribute pointer
722 *
723 * Dump a list of HTML attributes
724 */
725static void
726htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
727 if (cur == NULL) {
728 fprintf(stderr, "htmlAttrListDump : property == NULL\n");
729 return;
730 }
731 while (cur != NULL) {
732 htmlAttrDumpOutput(buf, doc, cur, encoding);
733 cur = cur->next;
734 }
735}
736
737
738void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
739 xmlNodePtr cur, const char *encoding);
740
741/**
742 * htmlNodeListDump:
743 * @buf: the HTML buffer output
744 * @doc: the document
745 * @cur: the first node
746 *
747 * Dump an HTML node list, recursive behaviour,children are printed too.
748 */
749static void
750htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
751 if (cur == NULL) {
752 fprintf(stderr, "htmlNodeListDump : node == NULL\n");
753 return;
754 }
755 while (cur != NULL) {
756 htmlNodeDumpOutput(buf, doc, cur, encoding);
757 cur = cur->next;
758 }
759}
760
761/**
762 * htmlNodeDump:
763 * @buf: the HTML buffer output
764 * @doc: the document
765 * @cur: the current node
766 *
767 * Dump an HTML node, recursive behaviour,children are printed too.
768 */
769void
770htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
771 htmlElemDescPtr info;
772
773 if (cur == NULL) {
774 fprintf(stderr, "htmlNodeDump : node == NULL\n");
775 return;
776 }
777 /*
778 * Special cases.
779 */
780 if (cur->type == XML_DTD_NODE)
781 return;
782 if (cur->type == XML_HTML_DOCUMENT_NODE) {
783 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
784 return;
785 }
786 if (cur->type == HTML_TEXT_NODE) {
787 if (cur->content != NULL) {
788 xmlChar *buffer;
789
790#ifndef XML_USE_BUFFER_CONTENT
791 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
792#else
793 buffer = xmlEncodeEntitiesReentrant(doc,
794 xmlBufferContent(cur->content));
795#endif
796 if (buffer != NULL) {
797 xmlOutputBufferWriteString(buf, (const char *)buffer);
798 xmlFree(buffer);
799 }
800 }
801 return;
802 }
803 if (cur->type == HTML_COMMENT_NODE) {
804 if (cur->content != NULL) {
805 xmlOutputBufferWriteString(buf, "<!--");
806#ifndef XML_USE_BUFFER_CONTENT
807 xmlOutputBufferWriteString(buf, (const char *)cur->content);
808#else
809 xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
810#endif
811 xmlOutputBufferWriteString(buf, "-->");
812 }
813 return;
814 }
815 if (cur->type == HTML_ENTITY_REF_NODE) {
816 xmlOutputBufferWriteString(buf, "&");
817 xmlOutputBufferWriteString(buf, (const char *)cur->name);
818 xmlOutputBufferWriteString(buf, ";");
819 return;
820 }
821
822 /*
823 * Get specific HTmL info for taht node.
824 */
825 info = htmlTagLookup(cur->name);
826
827 xmlOutputBufferWriteString(buf, "<");
828 xmlOutputBufferWriteString(buf, (const char *)cur->name);
829 if (cur->properties != NULL)
830 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
831
832 if ((info != NULL) && (info->empty)) {
833 xmlOutputBufferWriteString(buf, ">");
834 if (cur->next != NULL) {
835 if ((cur->next->type != HTML_TEXT_NODE) &&
836 (cur->next->type != HTML_ENTITY_REF_NODE))
837 xmlOutputBufferWriteString(buf, "\n");
838 }
839 return;
840 }
841 if ((cur->content == NULL) && (cur->children == NULL)) {
842 if ((info != NULL) && (info->endTag != 0))
843 xmlOutputBufferWriteString(buf, ">");
844 else {
845 xmlOutputBufferWriteString(buf, "></");
846 xmlOutputBufferWriteString(buf, (const char *)cur->name);
847 xmlOutputBufferWriteString(buf, ">");
848 }
849 if (cur->next != NULL) {
850 if ((cur->next->type != HTML_TEXT_NODE) &&
851 (cur->next->type != HTML_ENTITY_REF_NODE))
852 xmlOutputBufferWriteString(buf, "\n");
853 }
854 return;
855 }
856 xmlOutputBufferWriteString(buf, ">");
857 if (cur->content != NULL) {
858#if 0
859 xmlChar *buffer;
860
861#ifndef XML_USE_BUFFER_CONTENT
862 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
863#else
864 buffer = xmlEncodeEntitiesReentrant(doc,
865 xmlBufferContent(cur->content));
866#endif
867 if (buffer != NULL) {
868 xmlOutputBufferWriteString(buf, buffer);
869 xmlFree(buffer);
870 }
871#else
872 /*
873 * Uses the OutputBuffer property to automatically convert
874 * invalids to charrefs
875 */
876
877#ifndef XML_USE_BUFFER_CONTENT
878 xmlOutputBufferWriteString(buf, (const char *) cur->content);
879#else
880 xmlOutputBufferWriteString(buf,
881 (const char *) xmlBufferContent(cur->content));
882#endif
883#endif
884 }
885 if (cur->children != NULL) {
886 if ((cur->children->type != HTML_TEXT_NODE) &&
887 (cur->children->type != HTML_ENTITY_REF_NODE) &&
888 (cur->children != cur->last))
889 xmlOutputBufferWriteString(buf, "\n");
890 htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
891 if ((cur->last->type != HTML_TEXT_NODE) &&
892 (cur->last->type != HTML_ENTITY_REF_NODE) &&
893 (cur->children != cur->last))
894 xmlOutputBufferWriteString(buf, "\n");
895 }
896 if (!htmlIsAutoClosed(doc, cur)) {
897 xmlOutputBufferWriteString(buf, "</");
898 xmlOutputBufferWriteString(buf, (const char *)cur->name);
899 xmlOutputBufferWriteString(buf, ">");
900 }
901 if (cur->next != NULL) {
902 if ((cur->next->type != HTML_TEXT_NODE) &&
903 (cur->next->type != HTML_ENTITY_REF_NODE))
904 xmlOutputBufferWriteString(buf, "\n");
905 }
906}
907
908/**
909 * htmlDocContentDump:
910 * @buf: the HTML buffer output
911 * @cur: the document
912 *
913 * Dump an HTML document.
914 */
915static void
916htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
917 int type;
918
919 /*
920 * force to output the stuff as HTML, especially for entities
921 */
922 type = cur->type;
923 cur->type = XML_HTML_DOCUMENT_NODE;
924 if (cur->intSubset != NULL)
925 htmlDtdDumpOutput(buf, cur, NULL);
926 else {
927 /* Default to HTML-4.0 transitionnal @@@@ */
928 xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
929
930 }
931 if (cur->children != NULL) {
932 htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
933 }
934 xmlOutputBufferWriteString(buf, "\n");
935 cur->type = (xmlElementType) type;
936}
937
938
939/************************************************************************
940 * *
941 * Saving functions front-ends *
942 * *
943 ************************************************************************/
944
Daniel Veillard167b5091999-07-07 04:19:20 +0000945/**
946 * htmlDocDump:
947 * @f: the FILE*
948 * @cur: the document
949 *
950 * Dump an HTML document to an open FILE.
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000951 *
952 * returns: the number of byte written or -1 in case of failure.
Daniel Veillard167b5091999-07-07 04:19:20 +0000953 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000954int
Daniel Veillard167b5091999-07-07 04:19:20 +0000955htmlDocDump(FILE *f, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000956 xmlOutputBufferPtr buf;
957 xmlCharEncodingHandlerPtr handler = NULL;
958 const char *encoding;
959 int ret;
Daniel Veillard167b5091999-07-07 04:19:20 +0000960
961 if (cur == NULL) {
962#ifdef DEBUG_TREE
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000963 fprintf(stderr, "htmlDocDump : document == NULL\n");
Daniel Veillard167b5091999-07-07 04:19:20 +0000964#endif
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000965 return(-1);
Daniel Veillard167b5091999-07-07 04:19:20 +0000966 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000967
968 encoding = (const char *) htmlGetMetaEncoding(cur);
969
970 if (encoding != NULL) {
971 xmlCharEncoding enc;
972
973 enc = xmlParseCharEncoding(encoding);
974 if (enc != cur->charset) {
975 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
976 /*
977 * Not supported yet
978 */
979 return(-1);
980 }
981
982 handler = xmlFindCharEncodingHandler(encoding);
983 if (handler == NULL)
984 return(-1);
985 }
986 }
987
988 /*
989 * Fallback to HTML or ASCII when the encoding is unspecified
990 */
991 if (handler == NULL)
992 handler = xmlFindCharEncodingHandler("HTML");
993 if (handler == NULL)
994 handler = xmlFindCharEncodingHandler("ascii");
995
996 buf = xmlOutputBufferCreateFile(f, handler);
997 if (buf == NULL) return(-1);
998 htmlDocContentDumpOutput(buf, cur, NULL);
999
1000 ret = xmlOutputBufferClose(buf);
1001 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001002}
1003
1004/**
1005 * htmlSaveFile:
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001006 * @filename: the filename (or URL)
Daniel Veillard167b5091999-07-07 04:19:20 +00001007 * @cur: the document
1008 *
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001009 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1010 * used.
Daniel Veillard167b5091999-07-07 04:19:20 +00001011 * returns: the number of byte written or -1 in case of failure.
1012 */
1013int
1014htmlSaveFile(const char *filename, xmlDocPtr cur) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001015 xmlOutputBufferPtr buf;
1016 xmlCharEncodingHandlerPtr handler = NULL;
1017 const char *encoding;
Daniel Veillard167b5091999-07-07 04:19:20 +00001018 int ret;
1019
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001020 encoding = (const char *) htmlGetMetaEncoding(cur);
1021
1022 if (encoding != NULL) {
1023 xmlCharEncoding enc;
1024
1025 enc = xmlParseCharEncoding(encoding);
1026 if (enc != cur->charset) {
1027 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1028 /*
1029 * Not supported yet
1030 */
1031 return(-1);
1032 }
1033
1034 handler = xmlFindCharEncodingHandler(encoding);
1035 if (handler == NULL)
1036 return(-1);
1037 }
1038 }
1039
1040 /*
1041 * Fallback to HTML or ASCII when the encoding is unspecified
1042 */
1043 if (handler == NULL)
1044 handler = xmlFindCharEncodingHandler("HTML");
1045 if (handler == NULL)
1046 handler = xmlFindCharEncodingHandler("ascii");
1047
Daniel Veillard167b5091999-07-07 04:19:20 +00001048 /*
1049 * save the content to a temp buffer.
1050 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001051 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
Daniel Veillard167b5091999-07-07 04:19:20 +00001052 if (buf == NULL) return(0);
Daniel Veillard167b5091999-07-07 04:19:20 +00001053
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001054 htmlDocContentDumpOutput(buf, cur, NULL);
Daniel Veillard167b5091999-07-07 04:19:20 +00001055
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001056 ret = xmlOutputBufferClose(buf);
1057 return(ret);
Daniel Veillard167b5091999-07-07 04:19:20 +00001058}
1059
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001060/**
1061 * htmlSaveFileEnc:
1062 * @filename: the filename
1063 * @cur: the document
1064 *
1065 * Dump an HTML document to a file using a given encoding.
1066 *
1067 * returns: the number of byte written or -1 in case of failure.
1068 */
1069int
1070htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1071 xmlOutputBufferPtr buf;
1072 xmlCharEncodingHandlerPtr handler = NULL;
1073 int ret;
1074
1075 if (encoding != NULL) {
1076 xmlCharEncoding enc;
1077
1078 enc = xmlParseCharEncoding(encoding);
1079 if (enc != cur->charset) {
1080 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1081 /*
1082 * Not supported yet
1083 */
1084 return(-1);
1085 }
1086
1087 handler = xmlFindCharEncodingHandler(encoding);
1088 if (handler == NULL)
1089 return(-1);
1090 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1091 }
1092 }
1093
1094 /*
1095 * Fallback to HTML or ASCII when the encoding is unspecified
1096 */
1097 if (handler == NULL)
1098 handler = xmlFindCharEncodingHandler("HTML");
1099 if (handler == NULL)
1100 handler = xmlFindCharEncodingHandler("ascii");
1101
1102 /*
1103 * save the content to a temp buffer.
1104 */
1105 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1106 if (buf == NULL) return(0);
1107
1108 htmlDocContentDumpOutput(buf, cur, encoding);
1109
1110 ret = xmlOutputBufferClose(buf);
1111 return(ret);
1112}
Daniel Veillard361d8452000-04-03 19:48:13 +00001113#endif /* LIBXML_HTML_ENABLED */