| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * HTMLtree.c : implemetation of access function for an HTML tree. | 
 | 3 |  * | 
 | 4 |  * See Copyright for the status of this software. | 
 | 5 |  * | 
 | 6 |  * Daniel.Veillard@w3.org | 
 | 7 |  */ | 
 | 8 |  | 
 | 9 | #include "config.h" | 
 | 10 | #include <stdio.h> | 
 | 11 | #include <ctype.h> | 
 | 12 | #include <stdlib.h> | 
 | 13 | #include <string.h> /* for memset() only ! */ | 
 | 14 |  | 
| Daniel Veillard | 6454aec | 1999-09-02 22:04:43 +0000 | [diff] [blame^] | 15 | #include "xmlmemory.h" | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 16 | #include "HTMLparser.h" | 
 | 17 | #include "HTMLtree.h" | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 18 | #include "entities.h" | 
 | 19 | #include "valid.h" | 
 | 20 |  | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 21 | /** | 
 | 22 |  * htmlDtdDump: | 
 | 23 |  * @buf:  the HTML buffer output | 
 | 24 |  * @doc:  the document | 
 | 25 |  *  | 
 | 26 |  * Dump the HTML document DTD, if any. | 
 | 27 |  */ | 
 | 28 | static void | 
 | 29 | htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) { | 
 | 30 |     xmlDtdPtr cur = doc->intSubset; | 
 | 31 |  | 
 | 32 |     if (cur == NULL) { | 
 | 33 |         fprintf(stderr, "htmlDtdDump : no internal subset\n"); | 
 | 34 | 	return; | 
 | 35 |     } | 
 | 36 |     xmlBufferWriteChar(buf, "<!DOCTYPE "); | 
 | 37 |     xmlBufferWriteCHAR(buf, cur->name); | 
 | 38 |     if (cur->ExternalID != NULL) { | 
 | 39 | 	xmlBufferWriteChar(buf, " PUBLIC "); | 
 | 40 | 	xmlBufferWriteQuotedString(buf, cur->ExternalID); | 
| Daniel Veillard | 1566d3a | 1999-07-15 14:24:29 +0000 | [diff] [blame] | 41 | 	if (cur->SystemID != NULL) { | 
 | 42 | 	    xmlBufferWriteChar(buf, " "); | 
 | 43 | 	    xmlBufferWriteQuotedString(buf, cur->SystemID); | 
 | 44 | 	}  | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 45 |     }  else if (cur->SystemID != NULL) { | 
 | 46 | 	xmlBufferWriteChar(buf, " SYSTEM "); | 
 | 47 | 	xmlBufferWriteQuotedString(buf, cur->SystemID); | 
 | 48 |     } | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 49 |     xmlBufferWriteChar(buf, ">\n"); | 
 | 50 | } | 
 | 51 |  | 
 | 52 | /** | 
 | 53 |  * htmlAttrDump: | 
 | 54 |  * @buf:  the HTML buffer output | 
 | 55 |  * @doc:  the document | 
 | 56 |  * @cur:  the attribute pointer | 
 | 57 |  * | 
 | 58 |  * Dump an HTML attribute | 
 | 59 |  */ | 
 | 60 | static void | 
 | 61 | htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { | 
 | 62 |     CHAR *value; | 
 | 63 |  | 
 | 64 |     if (cur == NULL) { | 
 | 65 |         fprintf(stderr, "htmlAttrDump : property == NULL\n"); | 
 | 66 | 	return; | 
 | 67 |     } | 
 | 68 |     xmlBufferWriteChar(buf, " "); | 
 | 69 |     xmlBufferWriteCHAR(buf, cur->name); | 
 | 70 |     value = xmlNodeListGetString(doc, cur->val, 0); | 
 | 71 |     if (value) { | 
 | 72 | 	xmlBufferWriteChar(buf, "="); | 
 | 73 | 	xmlBufferWriteQuotedString(buf, value); | 
| Daniel Veillard | 6454aec | 1999-09-02 22:04:43 +0000 | [diff] [blame^] | 74 | 	xmlFree(value); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 75 |     } else  { | 
 | 76 | 	xmlBufferWriteChar(buf, "=\"\""); | 
 | 77 |     } | 
 | 78 | } | 
 | 79 |  | 
 | 80 | /** | 
 | 81 |  * htmlAttrListDump: | 
 | 82 |  * @buf:  the HTML buffer output | 
 | 83 |  * @doc:  the document | 
 | 84 |  * @cur:  the first attribute pointer | 
 | 85 |  * | 
 | 86 |  * Dump a list of HTML attributes | 
 | 87 |  */ | 
 | 88 | static void | 
 | 89 | htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { | 
 | 90 |     if (cur == NULL) { | 
 | 91 |         fprintf(stderr, "htmlAttrListDump : property == NULL\n"); | 
 | 92 | 	return; | 
 | 93 |     } | 
 | 94 |     while (cur != NULL) { | 
 | 95 |         htmlAttrDump(buf, doc, cur); | 
 | 96 | 	cur = cur->next; | 
 | 97 |     } | 
 | 98 | } | 
 | 99 |  | 
 | 100 |  | 
 | 101 | static void | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 102 | htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 103 | /** | 
 | 104 |  * htmlNodeListDump: | 
 | 105 |  * @buf:  the HTML buffer output | 
 | 106 |  * @doc:  the document | 
 | 107 |  * @cur:  the first node | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 108 |  * | 
 | 109 |  * Dump an HTML node list, recursive behaviour,children are printed too. | 
 | 110 |  */ | 
 | 111 | static void | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 112 | htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 113 |     if (cur == NULL) { | 
 | 114 |         fprintf(stderr, "htmlNodeListDump : node == NULL\n"); | 
 | 115 | 	return; | 
 | 116 |     } | 
 | 117 |     while (cur != NULL) { | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 118 |         htmlNodeDump(buf, doc, cur); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 119 | 	cur = cur->next; | 
 | 120 |     } | 
 | 121 | } | 
 | 122 |  | 
 | 123 | /** | 
 | 124 |  * htmlNodeDump: | 
 | 125 |  * @buf:  the HTML buffer output | 
 | 126 |  * @doc:  the document | 
 | 127 |  * @cur:  the current node | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 128 |  * | 
 | 129 |  * Dump an HTML node, recursive behaviour,children are printed too. | 
 | 130 |  */ | 
 | 131 | static void | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 132 | htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 133 |     htmlElemDescPtr info; | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 134 |  | 
 | 135 |     if (cur == NULL) { | 
 | 136 |         fprintf(stderr, "htmlNodeDump : node == NULL\n"); | 
 | 137 | 	return; | 
 | 138 |     } | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 139 |     /* | 
 | 140 |      * Special cases. | 
 | 141 |      */ | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 142 |     if (cur->type == HTML_TEXT_NODE) { | 
 | 143 | 	if (cur->content != NULL) { | 
 | 144 |             CHAR *buffer; | 
 | 145 |  | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 146 | 	    /* uses the HTML encoding routine !!!!!!!!!! */ | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 147 |             buffer = xmlEncodeEntitiesReentrant(doc, cur->content); | 
 | 148 | 	    if (buffer != NULL) { | 
 | 149 | 		xmlBufferWriteCHAR(buf, buffer); | 
| Daniel Veillard | 6454aec | 1999-09-02 22:04:43 +0000 | [diff] [blame^] | 150 | 		xmlFree(buffer); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 151 | 	    } | 
 | 152 | 	} | 
 | 153 | 	return; | 
 | 154 |     } | 
 | 155 |     if (cur->type == HTML_COMMENT_NODE) { | 
 | 156 | 	if (cur->content != NULL) { | 
 | 157 | 	    xmlBufferWriteChar(buf, "<!--"); | 
 | 158 | 	    xmlBufferWriteCHAR(buf, cur->content); | 
 | 159 | 	    xmlBufferWriteChar(buf, "-->"); | 
 | 160 | 	} | 
 | 161 | 	return; | 
 | 162 |     } | 
 | 163 |     if (cur->type == HTML_ENTITY_REF_NODE) { | 
 | 164 |         xmlBufferWriteChar(buf, "&"); | 
 | 165 | 	xmlBufferWriteCHAR(buf, cur->name); | 
 | 166 |         xmlBufferWriteChar(buf, ";"); | 
 | 167 | 	return; | 
 | 168 |     } | 
 | 169 |  | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 170 |     /* | 
 | 171 |      * Get specific HTmL info for taht node. | 
 | 172 |      */ | 
 | 173 |     info = htmlTagLookup(cur->name); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 174 |  | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 175 |     xmlBufferWriteChar(buf, "<"); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 176 |     xmlBufferWriteCHAR(buf, cur->name); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 177 |     if (cur->properties != NULL) | 
 | 178 |         htmlAttrListDump(buf, doc, cur->properties); | 
 | 179 |  | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 180 |     if (info->empty) { | 
 | 181 |         xmlBufferWriteChar(buf, ">"); | 
 | 182 | 	if (cur->next != NULL) { | 
 | 183 | 	    if ((cur->next->type != HTML_TEXT_NODE) && | 
 | 184 | 		(cur->next->type != HTML_ENTITY_REF_NODE)) | 
 | 185 | 		xmlBufferWriteChar(buf, "\n"); | 
 | 186 | 	} | 
 | 187 | 	return; | 
 | 188 |     } | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 189 |     if ((cur->content == NULL) && (cur->childs == NULL)) { | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 190 |         if (info->endTag != 0) | 
 | 191 | 	    xmlBufferWriteChar(buf, ">"); | 
 | 192 | 	else { | 
 | 193 | 	    xmlBufferWriteChar(buf, "></"); | 
 | 194 | 	    xmlBufferWriteCHAR(buf, cur->name); | 
 | 195 | 	    xmlBufferWriteChar(buf, ">"); | 
 | 196 | 	} | 
 | 197 | 	if (cur->next != NULL) { | 
 | 198 | 	    if ((cur->next->type != HTML_TEXT_NODE) && | 
 | 199 | 		(cur->next->type != HTML_ENTITY_REF_NODE)) | 
 | 200 | 		xmlBufferWriteChar(buf, "\n"); | 
 | 201 | 	} | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 202 | 	return; | 
 | 203 |     } | 
 | 204 |     xmlBufferWriteChar(buf, ">"); | 
 | 205 |     if (cur->content != NULL) { | 
 | 206 | 	CHAR *buffer; | 
 | 207 |  | 
 | 208 | 	buffer = xmlEncodeEntitiesReentrant(doc, cur->content); | 
 | 209 | 	if (buffer != NULL) { | 
 | 210 | 	    xmlBufferWriteCHAR(buf, buffer); | 
| Daniel Veillard | 6454aec | 1999-09-02 22:04:43 +0000 | [diff] [blame^] | 211 | 	    xmlFree(buffer); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 212 | 	} | 
 | 213 |     } | 
 | 214 |     if (cur->childs != NULL) { | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 215 |         if ((cur->childs->type != HTML_TEXT_NODE) && | 
 | 216 | 	    (cur->childs->type != HTML_ENTITY_REF_NODE)) | 
 | 217 | 	    xmlBufferWriteChar(buf, "\n"); | 
 | 218 | 	htmlNodeListDump(buf, doc, cur->childs); | 
 | 219 |         if ((cur->last->type != HTML_TEXT_NODE) && | 
 | 220 | 	    (cur->last->type != HTML_ENTITY_REF_NODE)) | 
 | 221 | 	    xmlBufferWriteChar(buf, "\n"); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 222 |     } | 
 | 223 |     xmlBufferWriteChar(buf, "</"); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 224 |     xmlBufferWriteCHAR(buf, cur->name); | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 225 |     xmlBufferWriteChar(buf, ">"); | 
 | 226 |     if (cur->next != NULL) { | 
 | 227 |         if ((cur->next->type != HTML_TEXT_NODE) && | 
 | 228 | 	    (cur->next->type != HTML_ENTITY_REF_NODE)) | 
 | 229 | 	    xmlBufferWriteChar(buf, "\n"); | 
 | 230 |     } | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 231 | } | 
 | 232 |  | 
 | 233 | /** | 
 | 234 |  * htmlDocContentDump: | 
 | 235 |  * @buf:  the HTML buffer output | 
 | 236 |  * @cur:  the document | 
 | 237 |  * | 
 | 238 |  * Dump an HTML document. | 
 | 239 |  */ | 
 | 240 | static void | 
 | 241 | htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) { | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 242 |     if (cur->intSubset != NULL) | 
 | 243 |         htmlDtdDump(buf, cur); | 
 | 244 |     if (cur->root != NULL) { | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 245 |         htmlNodeDump(buf, cur, cur->root); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 246 |     } | 
| Daniel Veillard | 82150d8 | 1999-07-07 07:32:15 +0000 | [diff] [blame] | 247 |     xmlBufferWriteChar(buf, "\n"); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 248 | } | 
 | 249 |  | 
 | 250 | /** | 
 | 251 |  * htmlDocDumpMemory: | 
 | 252 |  * @cur:  the document | 
 | 253 |  * @mem:  OUT: the memory pointer | 
 | 254 |  * @size:  OUT: the memory lenght | 
 | 255 |  * | 
 | 256 |  * Dump an HTML document in memory and return the CHAR * and it's size. | 
 | 257 |  * It's up to the caller to free the memory. | 
 | 258 |  */ | 
 | 259 | void | 
 | 260 | htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) { | 
 | 261 |     xmlBufferPtr buf; | 
 | 262 |  | 
 | 263 |     if (cur == NULL) { | 
 | 264 | #ifdef DEBUG_TREE | 
 | 265 |         fprintf(stderr, "xmlDocDumpMemory : document == NULL\n"); | 
 | 266 | #endif | 
 | 267 | 	*mem = NULL; | 
 | 268 | 	*size = 0; | 
 | 269 | 	return; | 
 | 270 |     } | 
 | 271 |     buf = xmlBufferCreate(); | 
 | 272 |     if (buf == NULL) { | 
 | 273 | 	*mem = NULL; | 
 | 274 | 	*size = 0; | 
 | 275 | 	return; | 
 | 276 |     } | 
 | 277 |     htmlDocContentDump(buf, cur); | 
 | 278 |     *mem = buf->content; | 
 | 279 |     *size = buf->use; | 
 | 280 |     memset(buf, -1, sizeof(xmlBuffer)); | 
| Daniel Veillard | 6454aec | 1999-09-02 22:04:43 +0000 | [diff] [blame^] | 281 |     xmlFree(buf); | 
| Daniel Veillard | 167b509 | 1999-07-07 04:19:20 +0000 | [diff] [blame] | 282 | } | 
 | 283 |  | 
 | 284 |  | 
 | 285 | /** | 
 | 286 |  * htmlDocDump: | 
 | 287 |  * @f:  the FILE* | 
 | 288 |  * @cur:  the document | 
 | 289 |  * | 
 | 290 |  * Dump an HTML document to an open FILE. | 
 | 291 |  */ | 
 | 292 | void | 
 | 293 | htmlDocDump(FILE *f, xmlDocPtr cur) { | 
 | 294 |     xmlBufferPtr buf; | 
 | 295 |  | 
 | 296 |     if (cur == NULL) { | 
 | 297 | #ifdef DEBUG_TREE | 
 | 298 |         fprintf(stderr, "xmlDocDump : document == NULL\n"); | 
 | 299 | #endif | 
 | 300 | 	return; | 
 | 301 |     } | 
 | 302 |     buf = xmlBufferCreate(); | 
 | 303 |     if (buf == NULL) return; | 
 | 304 |     htmlDocContentDump(buf, cur); | 
 | 305 |     xmlBufferDump(f, buf); | 
 | 306 |     xmlBufferFree(buf); | 
 | 307 | } | 
 | 308 |  | 
 | 309 | /** | 
 | 310 |  * htmlSaveFile: | 
 | 311 |  * @filename:  the filename | 
 | 312 |  * @cur:  the document | 
 | 313 |  * | 
 | 314 |  * Dump an HTML document to a file. | 
 | 315 |  *  | 
 | 316 |  * returns: the number of byte written or -1 in case of failure. | 
 | 317 |  */ | 
 | 318 | int | 
 | 319 | htmlSaveFile(const char *filename, xmlDocPtr cur) { | 
 | 320 |     xmlBufferPtr buf; | 
 | 321 |     FILE *output = NULL; | 
 | 322 |     int ret; | 
 | 323 |  | 
 | 324 |     /*  | 
 | 325 |      * save the content to a temp buffer. | 
 | 326 |      */ | 
 | 327 |     buf = xmlBufferCreate(); | 
 | 328 |     if (buf == NULL) return(0); | 
 | 329 |     htmlDocContentDump(buf, cur); | 
 | 330 |  | 
 | 331 |     output = fopen(filename, "w"); | 
 | 332 |     if (output == NULL) return(-1); | 
 | 333 |     ret = xmlBufferDump(output, buf); | 
 | 334 |     fclose(output); | 
 | 335 |  | 
 | 336 |     xmlBufferFree(buf); | 
 | 337 |     return(ret * sizeof(CHAR)); | 
 | 338 | } | 
 | 339 |  |