blob: dbdf9ebf32129f1b3f8a0594e2dbb4ff88cf847e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * entities.c : implementation for the XML entities handling
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Daniel Veillard34ce8be2002-03-18 19:37:11 +00009#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011
Owen Taylor3473f882001-02-23 17:55:21 +000012#include <string.h>
13#ifdef HAVE_STDLIB_H
14#include <stdlib.h>
15#endif
16#include <libxml/xmlmemory.h>
17#include <libxml/hash.h>
18#include <libxml/entities.h>
19#include <libxml/parser.h>
20#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000021#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000022
Owen Taylor3473f882001-02-23 17:55:21 +000023/*
24 * The XML predefined entities.
25 */
26
27struct xmlPredefinedEntityValue {
28 const char *name;
29 const char *value;
30};
Daniel Veillard22090732001-07-16 00:06:07 +000031static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000032 { "lt", "<" },
33 { "gt", ">" },
34 { "apos", "'" },
35 { "quot", "\"" },
36 { "amp", "&" }
37};
38
39/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000040 * TODO: This is GROSS, allocation of a 256 entry hash for
41 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000042 */
Daniel Veillardb44025c2001-10-11 22:55:55 +000043static xmlHashTablePtr xmlPredefinedEntities = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000044
45/*
46 * xmlFreeEntity : clean-up an entity record.
47 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000048static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000049 if (entity == NULL) return;
50
Daniel Veillard22090732001-07-16 00:06:07 +000051 if ((entity->children) &&
52 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000053 xmlFreeNodeList(entity->children);
54 if (entity->name != NULL)
55 xmlFree((char *) entity->name);
56 if (entity->ExternalID != NULL)
57 xmlFree((char *) entity->ExternalID);
58 if (entity->SystemID != NULL)
59 xmlFree((char *) entity->SystemID);
60 if (entity->URI != NULL)
61 xmlFree((char *) entity->URI);
62 if (entity->content != NULL)
63 xmlFree((char *) entity->content);
64 if (entity->orig != NULL)
65 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000066 xmlFree(entity);
67}
68
69/*
70 * xmlAddEntity : register a new entity for an entities table.
71 */
72static xmlEntityPtr
73xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
74 const xmlChar *ExternalID, const xmlChar *SystemID,
75 const xmlChar *content) {
76 xmlEntitiesTablePtr table = NULL;
77 xmlEntityPtr ret;
78
79 if (name == NULL)
80 return(NULL);
81 switch (type) {
82 case XML_INTERNAL_GENERAL_ENTITY:
83 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
84 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
85 if (dtd->entities == NULL)
86 dtd->entities = xmlHashCreate(0);
87 table = dtd->entities;
88 break;
89 case XML_INTERNAL_PARAMETER_ENTITY:
90 case XML_EXTERNAL_PARAMETER_ENTITY:
91 if (dtd->pentities == NULL)
92 dtd->pentities = xmlHashCreate(0);
93 table = dtd->pentities;
94 break;
95 case XML_INTERNAL_PREDEFINED_ENTITY:
96 if (xmlPredefinedEntities == NULL)
97 xmlPredefinedEntities = xmlHashCreate(8);
98 table = xmlPredefinedEntities;
99 }
100 if (table == NULL)
101 return(NULL);
102 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
103 if (ret == NULL) {
104 xmlGenericError(xmlGenericErrorContext,
105 "xmlAddEntity: out of memory\n");
106 return(NULL);
107 }
108 memset(ret, 0, sizeof(xmlEntity));
109 ret->type = XML_ENTITY_DECL;
110
111 /*
112 * fill the structure.
113 */
114 ret->name = xmlStrdup(name);
115 ret->etype = (xmlEntityType) type;
116 if (ExternalID != NULL)
117 ret->ExternalID = xmlStrdup(ExternalID);
118 if (SystemID != NULL)
119 ret->SystemID = xmlStrdup(SystemID);
120 if (content != NULL) {
121 ret->length = xmlStrlen(content);
122 ret->content = xmlStrndup(content, ret->length);
123 } else {
124 ret->length = 0;
125 ret->content = NULL;
126 }
127 ret->URI = NULL; /* to be computed by the layer knowing
128 the defining entity */
129 ret->orig = NULL;
130
131 if (xmlHashAddEntry(table, name, ret)) {
132 /*
133 * entity was already defined at another level.
134 */
135 xmlFreeEntity(ret);
136 return(NULL);
137 }
138 return(ret);
139}
140
141/**
142 * xmlInitializePredefinedEntities:
143 *
144 * Set up the predefined entities.
145 */
146void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000148 xmlChar name[50];
149 xmlChar value[50];
150 const char *in;
151 xmlChar *out;
152
153 if (xmlPredefinedEntities != NULL) return;
154
155 xmlPredefinedEntities = xmlCreateEntitiesTable();
156 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
157 sizeof(xmlPredefinedEntityValues[0]);i++) {
158 in = xmlPredefinedEntityValues[i].name;
159 out = &name[0];
160 for (;(*out++ = (xmlChar) *in);)in++;
161 in = xmlPredefinedEntityValues[i].value;
162 out = &value[0];
163 for (;(*out++ = (xmlChar) *in);)in++;
164
165 xmlAddEntity(NULL, (const xmlChar *) &name[0],
166 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
167 &value[0]);
168 }
169}
170
171/**
172 * xmlCleanupPredefinedEntities:
173 *
174 * Cleanup up the predefined entities table.
175 */
176void xmlCleanupPredefinedEntities(void) {
177 if (xmlPredefinedEntities == NULL) return;
178
179 xmlFreeEntitiesTable(xmlPredefinedEntities);
180 xmlPredefinedEntities = NULL;
181}
182
183/**
184 * xmlGetPredefinedEntity:
185 * @name: the entity name
186 *
187 * Check whether this name is an predefined entity.
188 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000189 * Returns NULL if not, otherwise the entity
Owen Taylor3473f882001-02-23 17:55:21 +0000190 */
191xmlEntityPtr
192xmlGetPredefinedEntity(const xmlChar *name) {
193 if (xmlPredefinedEntities == NULL)
194 xmlInitializePredefinedEntities();
195 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
196}
197
198/**
199 * xmlAddDtdEntity:
200 * @doc: the document
201 * @name: the entity name
202 * @type: the entity type XML_xxx_yyy_ENTITY
203 * @ExternalID: the entity external ID if available
204 * @SystemID: the entity system ID if available
205 * @content: the entity content
206 *
207 * Register a new entity for this document DTD external subset.
208 *
209 * Returns a pointer to the entity or NULL in case of error
210 */
211xmlEntityPtr
212xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
213 const xmlChar *ExternalID, const xmlChar *SystemID,
214 const xmlChar *content) {
215 xmlEntityPtr ret;
216 xmlDtdPtr dtd;
217
218 if (doc == NULL) {
219 xmlGenericError(xmlGenericErrorContext,
220 "xmlAddDtdEntity: doc == NULL !\n");
221 return(NULL);
222 }
223 if (doc->extSubset == NULL) {
224 xmlGenericError(xmlGenericErrorContext,
225 "xmlAddDtdEntity: document without external subset !\n");
226 return(NULL);
227 }
228 dtd = doc->extSubset;
229 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
230 if (ret == NULL) return(NULL);
231
232 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000233 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000234 */
235 ret->parent = dtd;
236 ret->doc = dtd->doc;
237 if (dtd->last == NULL) {
238 dtd->children = dtd->last = (xmlNodePtr) ret;
239 } else {
240 dtd->last->next = (xmlNodePtr) ret;
241 ret->prev = dtd->last;
242 dtd->last = (xmlNodePtr) ret;
243 }
244 return(ret);
245}
246
247/**
248 * xmlAddDocEntity:
249 * @doc: the document
250 * @name: the entity name
251 * @type: the entity type XML_xxx_yyy_ENTITY
252 * @ExternalID: the entity external ID if available
253 * @SystemID: the entity system ID if available
254 * @content: the entity content
255 *
256 * Register a new entity for this document.
257 *
258 * Returns a pointer to the entity or NULL in case of error
259 */
260xmlEntityPtr
261xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
262 const xmlChar *ExternalID, const xmlChar *SystemID,
263 const xmlChar *content) {
264 xmlEntityPtr ret;
265 xmlDtdPtr dtd;
266
267 if (doc == NULL) {
268 xmlGenericError(xmlGenericErrorContext,
269 "xmlAddDocEntity: document is NULL !\n");
270 return(NULL);
271 }
272 if (doc->intSubset == NULL) {
273 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000274 "xmlAddDocEntity: document without internal subset !\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000275 return(NULL);
276 }
277 dtd = doc->intSubset;
278 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
279 if (ret == NULL) return(NULL);
280
281 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000282 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000283 */
284 ret->parent = dtd;
285 ret->doc = dtd->doc;
286 if (dtd->last == NULL) {
287 dtd->children = dtd->last = (xmlNodePtr) ret;
288 } else {
289 dtd->last->next = (xmlNodePtr) ret;
290 ret->prev = dtd->last;
291 dtd->last = (xmlNodePtr) ret;
292 }
293 return(ret);
294}
295
296/**
297 * xmlGetEntityFromTable:
298 * @table: an entity table
299 * @name: the entity name
300 * @parameter: look for parameter entities
301 *
302 * Do an entity lookup in the table.
303 * returns the corresponding parameter entity, if found.
304 *
305 * Returns A pointer to the entity structure or NULL if not found.
306 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000307static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000308xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
309 return((xmlEntityPtr) xmlHashLookup(table, name));
310}
311
312/**
313 * xmlGetParameterEntity:
314 * @doc: the document referencing the entity
315 * @name: the entity name
316 *
317 * Do an entity lookup in the internal and external subsets and
318 * returns the corresponding parameter entity, if found.
319 *
320 * Returns A pointer to the entity structure or NULL if not found.
321 */
322xmlEntityPtr
323xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
324 xmlEntitiesTablePtr table;
325 xmlEntityPtr ret;
326
Daniel Veillard36065812002-01-24 15:02:46 +0000327 if (doc == NULL)
328 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000329 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
330 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
331 ret = xmlGetEntityFromTable(table, name);
332 if (ret != NULL)
333 return(ret);
334 }
335 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
336 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
337 return(xmlGetEntityFromTable(table, name));
338 }
339 return(NULL);
340}
341
342/**
343 * xmlGetDtdEntity:
344 * @doc: the document referencing the entity
345 * @name: the entity name
346 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000347 * Do an entity lookup in the DTD entity hash table and
Owen Taylor3473f882001-02-23 17:55:21 +0000348 * returns the corresponding entity, if found.
Daniel Veillard36065812002-01-24 15:02:46 +0000349 * Note: the first argument is the document node, not the DTD node.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 *
351 * Returns A pointer to the entity structure or NULL if not found.
352 */
353xmlEntityPtr
354xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
355 xmlEntitiesTablePtr table;
356
Daniel Veillard36065812002-01-24 15:02:46 +0000357 if (doc == NULL)
358 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000359 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
360 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
361 return(xmlGetEntityFromTable(table, name));
362 }
363 return(NULL);
364}
365
366/**
367 * xmlGetDocEntity:
368 * @doc: the document referencing the entity
369 * @name: the entity name
370 *
371 * Do an entity lookup in the document entity hash table and
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000372 * returns the corresponding entity, otherwise a lookup is done
Owen Taylor3473f882001-02-23 17:55:21 +0000373 * in the predefined entities too.
374 *
375 * Returns A pointer to the entity structure or NULL if not found.
376 */
377xmlEntityPtr
378xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
379 xmlEntityPtr cur;
380 xmlEntitiesTablePtr table;
381
382 if (doc != NULL) {
383 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
384 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
385 cur = xmlGetEntityFromTable(table, name);
386 if (cur != NULL)
387 return(cur);
388 }
Daniel Veillard28757702002-02-18 11:19:30 +0000389 if (doc->standalone != 1) {
390 if ((doc->extSubset != NULL) &&
391 (doc->extSubset->entities != NULL)) {
392 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
393 cur = xmlGetEntityFromTable(table, name);
394 if (cur != NULL)
395 return(cur);
396 }
Owen Taylor3473f882001-02-23 17:55:21 +0000397 }
398 }
399 if (xmlPredefinedEntities == NULL)
400 xmlInitializePredefinedEntities();
401 table = xmlPredefinedEntities;
402 return(xmlGetEntityFromTable(table, name));
403}
404
405/*
406 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
407 * | [#x10000-#x10FFFF]
408 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
409 */
410#define IS_CHAR(c) \
411 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
412 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
413
414/*
415 * A buffer used for converting entities to their equivalent and back.
416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000417static int static_buffer_size = 0;
418static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000419
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000420static int growBuffer(void) {
421 static_buffer_size *= 2;
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000422 static_buffer = (xmlChar *) xmlRealloc(static_buffer,
423 static_buffer_size * sizeof(xmlChar));
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000424 if (static_buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000425 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000426 return(-1);
427 }
428 return(0);
429}
430
431
432/**
433 * xmlEncodeEntities:
434 * @doc: the document containing the string
435 * @input: A string to convert to XML.
436 *
437 * Do a global encoding of a string, replacing the predefined entities
438 * and non ASCII values with their entities and CharRef counterparts.
439 *
440 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
441 * compatibility
442 *
443 * People must migrate their code to xmlEncodeEntitiesReentrant !
444 * This routine will issue a warning when encountered.
445 *
446 * Returns A newly allocated string with the substitution done.
447 */
448const xmlChar *
449xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
450 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000451 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000452 static int warning = 1;
453 int html = 0;
454
455
456 if (warning) {
457 xmlGenericError(xmlGenericErrorContext,
458 "Deprecated API xmlEncodeEntities() used\n");
459 xmlGenericError(xmlGenericErrorContext,
460 " change code to use xmlEncodeEntitiesReentrant()\n");
461 warning = 0;
462 }
463
464 if (input == NULL) return(NULL);
465 if (doc != NULL)
466 html = (doc->type == XML_HTML_DOCUMENT_NODE);
467
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000468 if (static_buffer == NULL) {
469 static_buffer_size = 1000;
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000470 static_buffer = (xmlChar *)
471 xmlMalloc(static_buffer_size * sizeof(xmlChar));
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000472 if (static_buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000473 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000474 return(NULL);
475 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000476 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000477 }
478 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000479 if (out - static_buffer > static_buffer_size - 100) {
480 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000481
482 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000483 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000484 }
485
486 /*
487 * By default one have to encode at least '<', '>', '"' and '&' !
488 */
489 if (*cur == '<') {
490 *out++ = '&';
491 *out++ = 'l';
492 *out++ = 't';
493 *out++ = ';';
494 } else if (*cur == '>') {
495 *out++ = '&';
496 *out++ = 'g';
497 *out++ = 't';
498 *out++ = ';';
499 } else if (*cur == '&') {
500 *out++ = '&';
501 *out++ = 'a';
502 *out++ = 'm';
503 *out++ = 'p';
504 *out++ = ';';
505 } else if (*cur == '"') {
506 *out++ = '&';
507 *out++ = 'q';
508 *out++ = 'u';
509 *out++ = 'o';
510 *out++ = 't';
511 *out++ = ';';
512 } else if ((*cur == '\'') && (!html)) {
513 *out++ = '&';
514 *out++ = 'a';
515 *out++ = 'p';
516 *out++ = 'o';
517 *out++ = 's';
518 *out++ = ';';
519 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
520 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
521 /*
522 * default case, just copy !
523 */
524 *out++ = *cur;
525#ifndef USE_UTF_8
526 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
527 char buf[10], *ptr;
528
Owen Taylor3473f882001-02-23 17:55:21 +0000529 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000530 buf[sizeof(buf) - 1] = 0;
531 ptr = buf;
532 while (*ptr != 0) *out++ = *ptr++;
533#endif
534 } else if (IS_CHAR(*cur)) {
535 char buf[10], *ptr;
536
Owen Taylor3473f882001-02-23 17:55:21 +0000537 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000538 buf[sizeof(buf) - 1] = 0;
539 ptr = buf;
540 while (*ptr != 0) *out++ = *ptr++;
541 }
542#if 0
543 else {
544 /*
545 * default case, this is not a valid char !
546 * Skip it...
547 */
548 xmlGenericError(xmlGenericErrorContext,
549 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
550 }
551#endif
552 cur++;
553 }
554 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000555 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000556}
557
558/*
559 * Macro used to grow the current buffer.
560 */
561#define growBufferReentrant() { \
562 buffer_size *= 2; \
563 buffer = (xmlChar *) \
564 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
565 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000566 xmlGenericError(xmlGenericErrorContext, "realloc failed\n"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000567 return(NULL); \
568 } \
569}
570
571
572/**
573 * xmlEncodeEntitiesReentrant:
574 * @doc: the document containing the string
575 * @input: A string to convert to XML.
576 *
577 * Do a global encoding of a string, replacing the predefined entities
578 * and non ASCII values with their entities and CharRef counterparts.
579 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
580 * must be deallocated.
581 *
582 * Returns A newly allocated string with the substitution done.
583 */
584xmlChar *
585xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
586 const xmlChar *cur = input;
587 xmlChar *buffer = NULL;
588 xmlChar *out = NULL;
589 int buffer_size = 0;
590 int html = 0;
591
592 if (input == NULL) return(NULL);
593 if (doc != NULL)
594 html = (doc->type == XML_HTML_DOCUMENT_NODE);
595
596 /*
597 * allocate an translation buffer.
598 */
599 buffer_size = 1000;
600 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
601 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000602 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000603 return(NULL);
604 }
605 out = buffer;
606
607 while (*cur != '\0') {
608 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000609 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000610
611 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000612 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000613 }
614
615 /*
616 * By default one have to encode at least '<', '>', '"' and '&' !
617 */
618 if (*cur == '<') {
619 *out++ = '&';
620 *out++ = 'l';
621 *out++ = 't';
622 *out++ = ';';
623 } else if (*cur == '>') {
624 *out++ = '&';
625 *out++ = 'g';
626 *out++ = 't';
627 *out++ = ';';
628 } else if (*cur == '&') {
629 *out++ = '&';
630 *out++ = 'a';
631 *out++ = 'm';
632 *out++ = 'p';
633 *out++ = ';';
634 } else if (*cur == '"') {
635 *out++ = '&';
636 *out++ = 'q';
637 *out++ = 'u';
638 *out++ = 'o';
639 *out++ = 't';
640 *out++ = ';';
641#if 0
642 } else if ((*cur == '\'') && (!html)) {
643 *out++ = '&';
644 *out++ = 'a';
645 *out++ = 'p';
646 *out++ = 'o';
647 *out++ = 's';
648 *out++ = ';';
649#endif
650 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
651 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
652 /*
653 * default case, just copy !
654 */
655 *out++ = *cur;
656 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000657 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000658 /*
659 * Bjørn Reese <br@sseusa.com> provided the patch
660 xmlChar xc;
661 xc = (*cur & 0x3F) << 6;
662 if (cur[1] != 0) {
663 xc += *(++cur) & 0x3F;
664 *out++ = xc;
665 } else
666 */
667 *out++ = *cur;
668 } else {
669 /*
670 * We assume we have UTF-8 input.
671 */
672 char buf[10], *ptr;
673 int val = 0, l = 1;
674
675 if (*cur < 0xC0) {
676 xmlGenericError(xmlGenericErrorContext,
677 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000678 if (doc != NULL)
679 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000680 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000681 buf[sizeof(buf) - 1] = 0;
682 ptr = buf;
683 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000684 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 continue;
686 } else if (*cur < 0xE0) {
687 val = (cur[0]) & 0x1F;
688 val <<= 6;
689 val |= (cur[1]) & 0x3F;
690 l = 2;
691 } else if (*cur < 0xF0) {
692 val = (cur[0]) & 0x0F;
693 val <<= 6;
694 val |= (cur[1]) & 0x3F;
695 val <<= 6;
696 val |= (cur[2]) & 0x3F;
697 l = 3;
698 } else if (*cur < 0xF8) {
699 val = (cur[0]) & 0x07;
700 val <<= 6;
701 val |= (cur[1]) & 0x3F;
702 val <<= 6;
703 val |= (cur[2]) & 0x3F;
704 val <<= 6;
705 val |= (cur[3]) & 0x3F;
706 l = 4;
707 }
708 if ((l == 1) || (!IS_CHAR(val))) {
709 xmlGenericError(xmlGenericErrorContext,
710 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000711 if (doc != NULL)
712 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000713 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000714 buf[sizeof(buf) - 1] = 0;
715 ptr = buf;
716 while (*ptr != 0) *out++ = *ptr++;
717 cur++;
718 continue;
719 }
720 /*
721 * We could do multiple things here. Just save as a char ref
722 */
Daniel Veillard16698282001-09-14 10:29:27 +0000723 if (html)
724 snprintf(buf, sizeof(buf), "&#%d;", val);
725 else
726 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000727 buf[sizeof(buf) - 1] = 0;
728 ptr = buf;
729 while (*ptr != 0) *out++ = *ptr++;
730 cur += l;
731 continue;
732 }
733 } else if (IS_CHAR(*cur)) {
734 char buf[10], *ptr;
735
Owen Taylor3473f882001-02-23 17:55:21 +0000736 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000737 buf[sizeof(buf) - 1] = 0;
738 ptr = buf;
739 while (*ptr != 0) *out++ = *ptr++;
740 }
741#if 0
742 else {
743 /*
744 * default case, this is not a valid char !
745 * Skip it...
746 */
747 xmlGenericError(xmlGenericErrorContext,
748 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
749 }
750#endif
751 cur++;
752 }
753 *out++ = 0;
754 return(buffer);
755}
756
757/**
758 * xmlEncodeSpecialChars:
759 * @doc: the document containing the string
760 * @input: A string to convert to XML.
761 *
762 * Do a global encoding of a string, replacing the predefined entities
763 * this routine is reentrant, and result must be deallocated.
764 *
765 * Returns A newly allocated string with the substitution done.
766 */
767xmlChar *
768xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
769 const xmlChar *cur = input;
770 xmlChar *buffer = NULL;
771 xmlChar *out = NULL;
772 int buffer_size = 0;
773 int html = 0;
774
775 if (input == NULL) return(NULL);
776 if (doc != NULL)
777 html = (doc->type == XML_HTML_DOCUMENT_NODE);
778
779 /*
780 * allocate an translation buffer.
781 */
782 buffer_size = 1000;
783 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
784 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000785 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000786 return(NULL);
787 }
788 out = buffer;
789
790 while (*cur != '\0') {
791 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000792 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000793
794 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000795 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000796 }
797
798 /*
799 * By default one have to encode at least '<', '>', '"' and '&' !
800 */
801 if (*cur == '<') {
802 *out++ = '&';
803 *out++ = 'l';
804 *out++ = 't';
805 *out++ = ';';
806 } else if (*cur == '>') {
807 *out++ = '&';
808 *out++ = 'g';
809 *out++ = 't';
810 *out++ = ';';
811 } else if (*cur == '&') {
812 *out++ = '&';
813 *out++ = 'a';
814 *out++ = 'm';
815 *out++ = 'p';
816 *out++ = ';';
817 } else if (*cur == '"') {
818 *out++ = '&';
819 *out++ = 'q';
820 *out++ = 'u';
821 *out++ = 'o';
822 *out++ = 't';
823 *out++ = ';';
824 } else {
825 /*
826 * Works because on UTF-8, all extended sequences cannot
827 * result in bytes in the ASCII range.
828 */
829 *out++ = *cur;
830 }
831 cur++;
832 }
833 *out++ = 0;
834 return(buffer);
835}
836
837/**
838 * xmlCreateEntitiesTable:
839 *
840 * create and initialize an empty entities hash table.
841 *
842 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
843 */
844xmlEntitiesTablePtr
845xmlCreateEntitiesTable(void) {
846 return((xmlEntitiesTablePtr) xmlHashCreate(0));
847}
848
849/**
850 * xmlFreeEntitiesTable:
851 * @table: An entity table
852 *
853 * Deallocate the memory used by an entities hash table.
854 */
855void
856xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
857 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
858}
859
860/**
861 * xmlCopyEntity:
862 * @ent: An entity
863 *
864 * Build a copy of an entity
865 *
866 * Returns the new xmlEntitiesPtr or NULL in case of error.
867 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000868static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000869xmlCopyEntity(xmlEntityPtr ent) {
870 xmlEntityPtr cur;
871
872 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
873 if (cur == NULL) {
874 xmlGenericError(xmlGenericErrorContext,
875 "xmlCopyEntity: out of memory !\n");
876 return(NULL);
877 }
878 memset(cur, 0, sizeof(xmlEntity));
Daniel Veillard845cce42002-01-09 11:51:37 +0000879 cur->type = XML_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +0000880
881 cur->etype = ent->etype;
882 if (ent->name != NULL)
883 cur->name = xmlStrdup(ent->name);
884 if (ent->ExternalID != NULL)
885 cur->ExternalID = xmlStrdup(ent->ExternalID);
886 if (ent->SystemID != NULL)
887 cur->SystemID = xmlStrdup(ent->SystemID);
888 if (ent->content != NULL)
889 cur->content = xmlStrdup(ent->content);
890 if (ent->orig != NULL)
891 cur->orig = xmlStrdup(ent->orig);
Daniel Veillard8ee9c8f2002-01-26 21:42:58 +0000892 if (ent->URI != NULL)
893 cur->URI = xmlStrdup(ent->URI);
Owen Taylor3473f882001-02-23 17:55:21 +0000894 return(cur);
895}
896
897/**
898 * xmlCopyEntitiesTable:
899 * @table: An entity table
900 *
901 * Build a copy of an entity table.
902 *
903 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
904 */
905xmlEntitiesTablePtr
906xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
907 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
908}
909
910/**
911 * xmlDumpEntityDecl:
912 * @buf: An XML buffer.
913 * @ent: An entity table
914 *
915 * This will dump the content of the entity table as an XML DTD definition
916 */
917void
918xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
919 switch (ent->etype) {
920 case XML_INTERNAL_GENERAL_ENTITY:
921 xmlBufferWriteChar(buf, "<!ENTITY ");
922 xmlBufferWriteCHAR(buf, ent->name);
923 xmlBufferWriteChar(buf, " ");
924 if (ent->orig != NULL)
925 xmlBufferWriteQuotedString(buf, ent->orig);
926 else
927 xmlBufferWriteQuotedString(buf, ent->content);
928 xmlBufferWriteChar(buf, ">\n");
929 break;
930 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
931 xmlBufferWriteChar(buf, "<!ENTITY ");
932 xmlBufferWriteCHAR(buf, ent->name);
933 if (ent->ExternalID != NULL) {
934 xmlBufferWriteChar(buf, " PUBLIC ");
935 xmlBufferWriteQuotedString(buf, ent->ExternalID);
936 xmlBufferWriteChar(buf, " ");
937 xmlBufferWriteQuotedString(buf, ent->SystemID);
938 } else {
939 xmlBufferWriteChar(buf, " SYSTEM ");
940 xmlBufferWriteQuotedString(buf, ent->SystemID);
941 }
942 xmlBufferWriteChar(buf, ">\n");
943 break;
944 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
945 xmlBufferWriteChar(buf, "<!ENTITY ");
946 xmlBufferWriteCHAR(buf, ent->name);
947 if (ent->ExternalID != NULL) {
948 xmlBufferWriteChar(buf, " PUBLIC ");
949 xmlBufferWriteQuotedString(buf, ent->ExternalID);
950 xmlBufferWriteChar(buf, " ");
951 xmlBufferWriteQuotedString(buf, ent->SystemID);
952 } else {
953 xmlBufferWriteChar(buf, " SYSTEM ");
954 xmlBufferWriteQuotedString(buf, ent->SystemID);
955 }
956 if (ent->content != NULL) { /* Should be true ! */
957 xmlBufferWriteChar(buf, " NDATA ");
958 if (ent->orig != NULL)
959 xmlBufferWriteCHAR(buf, ent->orig);
960 else
961 xmlBufferWriteCHAR(buf, ent->content);
962 }
963 xmlBufferWriteChar(buf, ">\n");
964 break;
965 case XML_INTERNAL_PARAMETER_ENTITY:
966 xmlBufferWriteChar(buf, "<!ENTITY % ");
967 xmlBufferWriteCHAR(buf, ent->name);
968 xmlBufferWriteChar(buf, " ");
969 if (ent->orig == NULL)
970 xmlBufferWriteQuotedString(buf, ent->content);
971 else
972 xmlBufferWriteQuotedString(buf, ent->orig);
973 xmlBufferWriteChar(buf, ">\n");
974 break;
975 case XML_EXTERNAL_PARAMETER_ENTITY:
976 xmlBufferWriteChar(buf, "<!ENTITY % ");
977 xmlBufferWriteCHAR(buf, ent->name);
978 if (ent->ExternalID != NULL) {
979 xmlBufferWriteChar(buf, " PUBLIC ");
980 xmlBufferWriteQuotedString(buf, ent->ExternalID);
981 xmlBufferWriteChar(buf, " ");
982 xmlBufferWriteQuotedString(buf, ent->SystemID);
983 } else {
984 xmlBufferWriteChar(buf, " SYSTEM ");
985 xmlBufferWriteQuotedString(buf, ent->SystemID);
986 }
987 xmlBufferWriteChar(buf, ">\n");
988 break;
989 default:
990 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000991 "xmlDumpEntitiesDecl: internal: unknown type %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000992 ent->etype);
993 }
994}
995
996/**
997 * xmlDumpEntitiesTable:
998 * @buf: An XML buffer.
999 * @table: An entity table
1000 *
1001 * This will dump the content of the entity table as an XML DTD definition
1002 */
1003void
1004xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1005 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
1006}