blob: 1ab70bd59354339cbfb5395024b91b018406d6e2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * entities.c : implementation for the XML entities handling
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Daniel Veillard34ce8be2002-03-18 19:37:11 +00009#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011
Owen Taylor3473f882001-02-23 17:55:21 +000012#include <string.h>
13#ifdef HAVE_STDLIB_H
14#include <stdlib.h>
15#endif
16#include <libxml/xmlmemory.h>
17#include <libxml/hash.h>
18#include <libxml/entities.h>
19#include <libxml/parser.h>
20#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000021#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000022
Owen Taylor3473f882001-02-23 17:55:21 +000023/*
24 * The XML predefined entities.
25 */
26
27struct xmlPredefinedEntityValue {
28 const char *name;
29 const char *value;
30};
Daniel Veillard22090732001-07-16 00:06:07 +000031static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000032 { "lt", "<" },
33 { "gt", ">" },
34 { "apos", "'" },
35 { "quot", "\"" },
36 { "amp", "&" }
37};
38
39/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000040 * TODO: This is GROSS, allocation of a 256 entry hash for
41 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000042 */
Daniel Veillardb44025c2001-10-11 22:55:55 +000043static xmlHashTablePtr xmlPredefinedEntities = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000044
45/*
46 * xmlFreeEntity : clean-up an entity record.
47 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000048static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000049 if (entity == NULL) return;
50
Daniel Veillard2d84a892002-12-30 00:01:08 +000051 if ((entity->children) && (entity->owner == 1) &&
Daniel Veillard22090732001-07-16 00:06:07 +000052 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000053 xmlFreeNodeList(entity->children);
54 if (entity->name != NULL)
55 xmlFree((char *) entity->name);
56 if (entity->ExternalID != NULL)
57 xmlFree((char *) entity->ExternalID);
58 if (entity->SystemID != NULL)
59 xmlFree((char *) entity->SystemID);
60 if (entity->URI != NULL)
61 xmlFree((char *) entity->URI);
62 if (entity->content != NULL)
63 xmlFree((char *) entity->content);
64 if (entity->orig != NULL)
65 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000066 xmlFree(entity);
67}
68
69/*
70 * xmlAddEntity : register a new entity for an entities table.
71 */
72static xmlEntityPtr
73xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
74 const xmlChar *ExternalID, const xmlChar *SystemID,
75 const xmlChar *content) {
76 xmlEntitiesTablePtr table = NULL;
77 xmlEntityPtr ret;
78
79 if (name == NULL)
80 return(NULL);
81 switch (type) {
82 case XML_INTERNAL_GENERAL_ENTITY:
83 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
84 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
85 if (dtd->entities == NULL)
86 dtd->entities = xmlHashCreate(0);
87 table = dtd->entities;
88 break;
89 case XML_INTERNAL_PARAMETER_ENTITY:
90 case XML_EXTERNAL_PARAMETER_ENTITY:
91 if (dtd->pentities == NULL)
92 dtd->pentities = xmlHashCreate(0);
93 table = dtd->pentities;
94 break;
95 case XML_INTERNAL_PREDEFINED_ENTITY:
96 if (xmlPredefinedEntities == NULL)
97 xmlPredefinedEntities = xmlHashCreate(8);
98 table = xmlPredefinedEntities;
99 }
100 if (table == NULL)
101 return(NULL);
102 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
103 if (ret == NULL) {
104 xmlGenericError(xmlGenericErrorContext,
105 "xmlAddEntity: out of memory\n");
106 return(NULL);
107 }
108 memset(ret, 0, sizeof(xmlEntity));
109 ret->type = XML_ENTITY_DECL;
110
111 /*
112 * fill the structure.
113 */
114 ret->name = xmlStrdup(name);
115 ret->etype = (xmlEntityType) type;
116 if (ExternalID != NULL)
117 ret->ExternalID = xmlStrdup(ExternalID);
118 if (SystemID != NULL)
119 ret->SystemID = xmlStrdup(SystemID);
120 if (content != NULL) {
121 ret->length = xmlStrlen(content);
122 ret->content = xmlStrndup(content, ret->length);
123 } else {
124 ret->length = 0;
125 ret->content = NULL;
126 }
127 ret->URI = NULL; /* to be computed by the layer knowing
128 the defining entity */
129 ret->orig = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +0000130 ret->owner = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000131
132 if (xmlHashAddEntry(table, name, ret)) {
133 /*
134 * entity was already defined at another level.
135 */
136 xmlFreeEntity(ret);
137 return(NULL);
138 }
139 return(ret);
140}
141
142/**
143 * xmlInitializePredefinedEntities:
144 *
145 * Set up the predefined entities.
146 */
147void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000148 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000149 xmlChar name[50];
150 xmlChar value[50];
151 const char *in;
152 xmlChar *out;
153
154 if (xmlPredefinedEntities != NULL) return;
155
156 xmlPredefinedEntities = xmlCreateEntitiesTable();
157 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
158 sizeof(xmlPredefinedEntityValues[0]);i++) {
159 in = xmlPredefinedEntityValues[i].name;
160 out = &name[0];
161 for (;(*out++ = (xmlChar) *in);)in++;
162 in = xmlPredefinedEntityValues[i].value;
163 out = &value[0];
164 for (;(*out++ = (xmlChar) *in);)in++;
165
166 xmlAddEntity(NULL, (const xmlChar *) &name[0],
167 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
168 &value[0]);
169 }
170}
171
172/**
173 * xmlCleanupPredefinedEntities:
174 *
175 * Cleanup up the predefined entities table.
176 */
177void xmlCleanupPredefinedEntities(void) {
178 if (xmlPredefinedEntities == NULL) return;
179
180 xmlFreeEntitiesTable(xmlPredefinedEntities);
181 xmlPredefinedEntities = NULL;
182}
183
184/**
185 * xmlGetPredefinedEntity:
186 * @name: the entity name
187 *
188 * Check whether this name is an predefined entity.
189 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000190 * Returns NULL if not, otherwise the entity
Owen Taylor3473f882001-02-23 17:55:21 +0000191 */
192xmlEntityPtr
193xmlGetPredefinedEntity(const xmlChar *name) {
194 if (xmlPredefinedEntities == NULL)
195 xmlInitializePredefinedEntities();
196 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
197}
198
199/**
200 * xmlAddDtdEntity:
201 * @doc: the document
202 * @name: the entity name
203 * @type: the entity type XML_xxx_yyy_ENTITY
204 * @ExternalID: the entity external ID if available
205 * @SystemID: the entity system ID if available
206 * @content: the entity content
207 *
208 * Register a new entity for this document DTD external subset.
209 *
210 * Returns a pointer to the entity or NULL in case of error
211 */
212xmlEntityPtr
213xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
214 const xmlChar *ExternalID, const xmlChar *SystemID,
215 const xmlChar *content) {
216 xmlEntityPtr ret;
217 xmlDtdPtr dtd;
218
219 if (doc == NULL) {
220 xmlGenericError(xmlGenericErrorContext,
221 "xmlAddDtdEntity: doc == NULL !\n");
222 return(NULL);
223 }
224 if (doc->extSubset == NULL) {
225 xmlGenericError(xmlGenericErrorContext,
226 "xmlAddDtdEntity: document without external subset !\n");
227 return(NULL);
228 }
229 dtd = doc->extSubset;
230 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
231 if (ret == NULL) return(NULL);
232
233 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000234 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000235 */
236 ret->parent = dtd;
237 ret->doc = dtd->doc;
238 if (dtd->last == NULL) {
239 dtd->children = dtd->last = (xmlNodePtr) ret;
240 } else {
241 dtd->last->next = (xmlNodePtr) ret;
242 ret->prev = dtd->last;
243 dtd->last = (xmlNodePtr) ret;
244 }
245 return(ret);
246}
247
248/**
249 * xmlAddDocEntity:
250 * @doc: the document
251 * @name: the entity name
252 * @type: the entity type XML_xxx_yyy_ENTITY
253 * @ExternalID: the entity external ID if available
254 * @SystemID: the entity system ID if available
255 * @content: the entity content
256 *
257 * Register a new entity for this document.
258 *
259 * Returns a pointer to the entity or NULL in case of error
260 */
261xmlEntityPtr
262xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
263 const xmlChar *ExternalID, const xmlChar *SystemID,
264 const xmlChar *content) {
265 xmlEntityPtr ret;
266 xmlDtdPtr dtd;
267
268 if (doc == NULL) {
269 xmlGenericError(xmlGenericErrorContext,
270 "xmlAddDocEntity: document is NULL !\n");
271 return(NULL);
272 }
273 if (doc->intSubset == NULL) {
274 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000275 "xmlAddDocEntity: document without internal subset !\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000276 return(NULL);
277 }
278 dtd = doc->intSubset;
279 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
280 if (ret == NULL) return(NULL);
281
282 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000283 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000284 */
285 ret->parent = dtd;
286 ret->doc = dtd->doc;
287 if (dtd->last == NULL) {
288 dtd->children = dtd->last = (xmlNodePtr) ret;
289 } else {
290 dtd->last->next = (xmlNodePtr) ret;
291 ret->prev = dtd->last;
292 dtd->last = (xmlNodePtr) ret;
293 }
294 return(ret);
295}
296
297/**
298 * xmlGetEntityFromTable:
299 * @table: an entity table
300 * @name: the entity name
301 * @parameter: look for parameter entities
302 *
303 * Do an entity lookup in the table.
304 * returns the corresponding parameter entity, if found.
305 *
306 * Returns A pointer to the entity structure or NULL if not found.
307 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000308static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000309xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
310 return((xmlEntityPtr) xmlHashLookup(table, name));
311}
312
313/**
314 * xmlGetParameterEntity:
315 * @doc: the document referencing the entity
316 * @name: the entity name
317 *
318 * Do an entity lookup in the internal and external subsets and
319 * returns the corresponding parameter entity, if found.
320 *
321 * Returns A pointer to the entity structure or NULL if not found.
322 */
323xmlEntityPtr
324xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
325 xmlEntitiesTablePtr table;
326 xmlEntityPtr ret;
327
Daniel Veillard36065812002-01-24 15:02:46 +0000328 if (doc == NULL)
329 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000330 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
331 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
332 ret = xmlGetEntityFromTable(table, name);
333 if (ret != NULL)
334 return(ret);
335 }
336 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
337 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
338 return(xmlGetEntityFromTable(table, name));
339 }
340 return(NULL);
341}
342
343/**
344 * xmlGetDtdEntity:
345 * @doc: the document referencing the entity
346 * @name: the entity name
347 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000348 * Do an entity lookup in the DTD entity hash table and
Owen Taylor3473f882001-02-23 17:55:21 +0000349 * returns the corresponding entity, if found.
Daniel Veillard36065812002-01-24 15:02:46 +0000350 * Note: the first argument is the document node, not the DTD node.
Owen Taylor3473f882001-02-23 17:55:21 +0000351 *
352 * Returns A pointer to the entity structure or NULL if not found.
353 */
354xmlEntityPtr
355xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
356 xmlEntitiesTablePtr table;
357
Daniel Veillard36065812002-01-24 15:02:46 +0000358 if (doc == NULL)
359 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000360 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
361 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
362 return(xmlGetEntityFromTable(table, name));
363 }
364 return(NULL);
365}
366
367/**
368 * xmlGetDocEntity:
369 * @doc: the document referencing the entity
370 * @name: the entity name
371 *
372 * Do an entity lookup in the document entity hash table and
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000373 * returns the corresponding entity, otherwise a lookup is done
Owen Taylor3473f882001-02-23 17:55:21 +0000374 * in the predefined entities too.
375 *
376 * Returns A pointer to the entity structure or NULL if not found.
377 */
378xmlEntityPtr
379xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
380 xmlEntityPtr cur;
381 xmlEntitiesTablePtr table;
382
383 if (doc != NULL) {
384 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
385 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
386 cur = xmlGetEntityFromTable(table, name);
387 if (cur != NULL)
388 return(cur);
389 }
Daniel Veillard28757702002-02-18 11:19:30 +0000390 if (doc->standalone != 1) {
391 if ((doc->extSubset != NULL) &&
392 (doc->extSubset->entities != NULL)) {
393 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
394 cur = xmlGetEntityFromTable(table, name);
395 if (cur != NULL)
396 return(cur);
397 }
Owen Taylor3473f882001-02-23 17:55:21 +0000398 }
399 }
400 if (xmlPredefinedEntities == NULL)
401 xmlInitializePredefinedEntities();
402 table = xmlPredefinedEntities;
403 return(xmlGetEntityFromTable(table, name));
404}
405
406/*
407 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
408 * | [#x10000-#x10FFFF]
409 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
410 */
411#define IS_CHAR(c) \
412 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
413 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
414
415/*
416 * A buffer used for converting entities to their equivalent and back.
417 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000418static int static_buffer_size = 0;
419static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000420
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000421static int growBuffer(void) {
422 static_buffer_size *= 2;
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000423 static_buffer = (xmlChar *) xmlRealloc(static_buffer,
424 static_buffer_size * sizeof(xmlChar));
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000425 if (static_buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000426 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000427 return(-1);
428 }
429 return(0);
430}
431
432
433/**
434 * xmlEncodeEntities:
435 * @doc: the document containing the string
436 * @input: A string to convert to XML.
437 *
438 * Do a global encoding of a string, replacing the predefined entities
439 * and non ASCII values with their entities and CharRef counterparts.
440 *
441 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
442 * compatibility
443 *
444 * People must migrate their code to xmlEncodeEntitiesReentrant !
445 * This routine will issue a warning when encountered.
446 *
447 * Returns A newly allocated string with the substitution done.
448 */
449const xmlChar *
450xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
451 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000452 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000453 static int warning = 1;
454 int html = 0;
455
456
457 if (warning) {
458 xmlGenericError(xmlGenericErrorContext,
459 "Deprecated API xmlEncodeEntities() used\n");
460 xmlGenericError(xmlGenericErrorContext,
461 " change code to use xmlEncodeEntitiesReentrant()\n");
462 warning = 0;
463 }
464
465 if (input == NULL) return(NULL);
466 if (doc != NULL)
467 html = (doc->type == XML_HTML_DOCUMENT_NODE);
468
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000469 if (static_buffer == NULL) {
470 static_buffer_size = 1000;
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000471 static_buffer = (xmlChar *)
472 xmlMalloc(static_buffer_size * sizeof(xmlChar));
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000473 if (static_buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000474 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000475 return(NULL);
476 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000477 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000478 }
479 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000480 if (out - static_buffer > static_buffer_size - 100) {
481 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000482
483 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000484 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000485 }
486
487 /*
488 * By default one have to encode at least '<', '>', '"' and '&' !
489 */
490 if (*cur == '<') {
491 *out++ = '&';
492 *out++ = 'l';
493 *out++ = 't';
494 *out++ = ';';
495 } else if (*cur == '>') {
496 *out++ = '&';
497 *out++ = 'g';
498 *out++ = 't';
499 *out++ = ';';
500 } else if (*cur == '&') {
501 *out++ = '&';
502 *out++ = 'a';
503 *out++ = 'm';
504 *out++ = 'p';
505 *out++ = ';';
506 } else if (*cur == '"') {
507 *out++ = '&';
508 *out++ = 'q';
509 *out++ = 'u';
510 *out++ = 'o';
511 *out++ = 't';
512 *out++ = ';';
513 } else if ((*cur == '\'') && (!html)) {
514 *out++ = '&';
515 *out++ = 'a';
516 *out++ = 'p';
517 *out++ = 'o';
518 *out++ = 's';
519 *out++ = ';';
520 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
521 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
522 /*
523 * default case, just copy !
524 */
525 *out++ = *cur;
526#ifndef USE_UTF_8
527 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
528 char buf[10], *ptr;
529
Owen Taylor3473f882001-02-23 17:55:21 +0000530 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000531 buf[sizeof(buf) - 1] = 0;
532 ptr = buf;
533 while (*ptr != 0) *out++ = *ptr++;
534#endif
Daniel Veillard34ba3872003-07-15 13:34:05 +0000535 } else if (IS_CHAR((unsigned int) *cur)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000536 char buf[10], *ptr;
537
Owen Taylor3473f882001-02-23 17:55:21 +0000538 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000539 buf[sizeof(buf) - 1] = 0;
540 ptr = buf;
541 while (*ptr != 0) *out++ = *ptr++;
542 }
543#if 0
544 else {
545 /*
546 * default case, this is not a valid char !
547 * Skip it...
548 */
549 xmlGenericError(xmlGenericErrorContext,
550 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
551 }
552#endif
553 cur++;
554 }
555 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000556 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000557}
558
559/*
560 * Macro used to grow the current buffer.
561 */
562#define growBufferReentrant() { \
563 buffer_size *= 2; \
564 buffer = (xmlChar *) \
565 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
566 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000567 xmlGenericError(xmlGenericErrorContext, "realloc failed\n"); \
Owen Taylor3473f882001-02-23 17:55:21 +0000568 return(NULL); \
569 } \
570}
571
572
573/**
574 * xmlEncodeEntitiesReentrant:
575 * @doc: the document containing the string
576 * @input: A string to convert to XML.
577 *
578 * Do a global encoding of a string, replacing the predefined entities
579 * and non ASCII values with their entities and CharRef counterparts.
580 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
581 * must be deallocated.
582 *
583 * Returns A newly allocated string with the substitution done.
584 */
585xmlChar *
586xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
587 const xmlChar *cur = input;
588 xmlChar *buffer = NULL;
589 xmlChar *out = NULL;
590 int buffer_size = 0;
591 int html = 0;
592
593 if (input == NULL) return(NULL);
594 if (doc != NULL)
595 html = (doc->type == XML_HTML_DOCUMENT_NODE);
596
597 /*
598 * allocate an translation buffer.
599 */
600 buffer_size = 1000;
601 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
602 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000603 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000604 return(NULL);
605 }
606 out = buffer;
607
608 while (*cur != '\0') {
609 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000610 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000611
612 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000613 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000614 }
615
616 /*
617 * By default one have to encode at least '<', '>', '"' and '&' !
618 */
619 if (*cur == '<') {
620 *out++ = '&';
621 *out++ = 'l';
622 *out++ = 't';
623 *out++ = ';';
624 } else if (*cur == '>') {
625 *out++ = '&';
626 *out++ = 'g';
627 *out++ = 't';
628 *out++ = ';';
629 } else if (*cur == '&') {
630 *out++ = '&';
631 *out++ = 'a';
632 *out++ = 'm';
633 *out++ = 'p';
634 *out++ = ';';
Daniel Veillard8265a182003-06-13 10:05:56 +0000635#if 0
Owen Taylor3473f882001-02-23 17:55:21 +0000636 } else if (*cur == '"') {
637 *out++ = '&';
638 *out++ = 'q';
639 *out++ = 'u';
640 *out++ = 'o';
641 *out++ = 't';
642 *out++ = ';';
Owen Taylor3473f882001-02-23 17:55:21 +0000643 } else if ((*cur == '\'') && (!html)) {
644 *out++ = '&';
645 *out++ = 'a';
646 *out++ = 'p';
647 *out++ = 'o';
648 *out++ = 's';
649 *out++ = ';';
650#endif
651 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
Daniel Veillard0046c0f2003-02-23 13:52:30 +0000652 (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
Owen Taylor3473f882001-02-23 17:55:21 +0000653 /*
654 * default case, just copy !
655 */
656 *out++ = *cur;
657 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000658 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000659 /*
660 * Bjørn Reese <br@sseusa.com> provided the patch
661 xmlChar xc;
662 xc = (*cur & 0x3F) << 6;
663 if (cur[1] != 0) {
664 xc += *(++cur) & 0x3F;
665 *out++ = xc;
666 } else
667 */
668 *out++ = *cur;
669 } else {
670 /*
671 * We assume we have UTF-8 input.
672 */
673 char buf[10], *ptr;
674 int val = 0, l = 1;
675
676 if (*cur < 0xC0) {
677 xmlGenericError(xmlGenericErrorContext,
678 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000679 if (doc != NULL)
680 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000681 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000682 buf[sizeof(buf) - 1] = 0;
683 ptr = buf;
684 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000685 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000686 continue;
687 } else if (*cur < 0xE0) {
688 val = (cur[0]) & 0x1F;
689 val <<= 6;
690 val |= (cur[1]) & 0x3F;
691 l = 2;
692 } else if (*cur < 0xF0) {
693 val = (cur[0]) & 0x0F;
694 val <<= 6;
695 val |= (cur[1]) & 0x3F;
696 val <<= 6;
697 val |= (cur[2]) & 0x3F;
698 l = 3;
699 } else if (*cur < 0xF8) {
700 val = (cur[0]) & 0x07;
701 val <<= 6;
702 val |= (cur[1]) & 0x3F;
703 val <<= 6;
704 val |= (cur[2]) & 0x3F;
705 val <<= 6;
706 val |= (cur[3]) & 0x3F;
707 l = 4;
708 }
709 if ((l == 1) || (!IS_CHAR(val))) {
710 xmlGenericError(xmlGenericErrorContext,
711 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000712 if (doc != NULL)
713 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000714 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000715 buf[sizeof(buf) - 1] = 0;
716 ptr = buf;
717 while (*ptr != 0) *out++ = *ptr++;
718 cur++;
719 continue;
720 }
721 /*
722 * We could do multiple things here. Just save as a char ref
723 */
Daniel Veillard16698282001-09-14 10:29:27 +0000724 if (html)
725 snprintf(buf, sizeof(buf), "&#%d;", val);
726 else
727 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000728 buf[sizeof(buf) - 1] = 0;
729 ptr = buf;
730 while (*ptr != 0) *out++ = *ptr++;
731 cur += l;
732 continue;
733 }
Daniel Veillard34ba3872003-07-15 13:34:05 +0000734 } else if (IS_CHAR((unsigned int) *cur)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000735 char buf[10], *ptr;
736
Owen Taylor3473f882001-02-23 17:55:21 +0000737 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000738 buf[sizeof(buf) - 1] = 0;
739 ptr = buf;
740 while (*ptr != 0) *out++ = *ptr++;
741 }
742#if 0
743 else {
744 /*
745 * default case, this is not a valid char !
746 * Skip it...
747 */
748 xmlGenericError(xmlGenericErrorContext,
749 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
750 }
751#endif
752 cur++;
753 }
754 *out++ = 0;
755 return(buffer);
756}
757
758/**
759 * xmlEncodeSpecialChars:
760 * @doc: the document containing the string
761 * @input: A string to convert to XML.
762 *
763 * Do a global encoding of a string, replacing the predefined entities
764 * this routine is reentrant, and result must be deallocated.
765 *
766 * Returns A newly allocated string with the substitution done.
767 */
768xmlChar *
Daniel Veillard9ee35f32003-09-28 00:19:54 +0000769xmlEncodeSpecialChars(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *input) {
Owen Taylor3473f882001-02-23 17:55:21 +0000770 const xmlChar *cur = input;
771 xmlChar *buffer = NULL;
772 xmlChar *out = NULL;
773 int buffer_size = 0;
William M. Brack899e64a2003-09-26 18:03:42 +0000774#if 0
Owen Taylor3473f882001-02-23 17:55:21 +0000775 int html = 0;
776
Owen Taylor3473f882001-02-23 17:55:21 +0000777 if (doc != NULL)
778 html = (doc->type == XML_HTML_DOCUMENT_NODE);
William M. Brack899e64a2003-09-26 18:03:42 +0000779#endif
780 if (input == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000781
782 /*
783 * allocate an translation buffer.
784 */
785 buffer_size = 1000;
786 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
787 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +0000788 xmlGenericError(xmlGenericErrorContext, "malloc failed\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000789 return(NULL);
790 }
791 out = buffer;
792
793 while (*cur != '\0') {
794 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000795 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000796
797 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000798 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000799 }
800
801 /*
802 * By default one have to encode at least '<', '>', '"' and '&' !
803 */
804 if (*cur == '<') {
805 *out++ = '&';
806 *out++ = 'l';
807 *out++ = 't';
808 *out++ = ';';
809 } else if (*cur == '>') {
810 *out++ = '&';
811 *out++ = 'g';
812 *out++ = 't';
813 *out++ = ';';
814 } else if (*cur == '&') {
815 *out++ = '&';
816 *out++ = 'a';
817 *out++ = 'm';
818 *out++ = 'p';
819 *out++ = ';';
820 } else if (*cur == '"') {
821 *out++ = '&';
822 *out++ = 'q';
823 *out++ = 'u';
824 *out++ = 'o';
825 *out++ = 't';
826 *out++ = ';';
Daniel Veillard19ab45b2003-02-26 15:49:03 +0000827 } else if (*cur == '\r') {
828 *out++ = '&';
829 *out++ = '#';
830 *out++ = '1';
831 *out++ = '3';
832 *out++ = ';';
Owen Taylor3473f882001-02-23 17:55:21 +0000833 } else {
834 /*
835 * Works because on UTF-8, all extended sequences cannot
836 * result in bytes in the ASCII range.
837 */
838 *out++ = *cur;
839 }
840 cur++;
841 }
842 *out++ = 0;
843 return(buffer);
844}
845
846/**
847 * xmlCreateEntitiesTable:
848 *
849 * create and initialize an empty entities hash table.
850 *
851 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
852 */
853xmlEntitiesTablePtr
854xmlCreateEntitiesTable(void) {
855 return((xmlEntitiesTablePtr) xmlHashCreate(0));
856}
857
858/**
Daniel Veillard2d84a892002-12-30 00:01:08 +0000859 * xmlFreeEntityWrapper:
860 * @entity: An entity
861 * @name: its name
862 *
863 * Deallocate the memory used by an entities in the hash table.
864 */
865static void
866xmlFreeEntityWrapper(xmlEntityPtr entity,
867 const xmlChar *name ATTRIBUTE_UNUSED) {
868 if (entity != NULL)
869 xmlFreeEntity(entity);
870}
871
872/**
Owen Taylor3473f882001-02-23 17:55:21 +0000873 * xmlFreeEntitiesTable:
874 * @table: An entity table
875 *
876 * Deallocate the memory used by an entities hash table.
877 */
878void
879xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
Daniel Veillard2d84a892002-12-30 00:01:08 +0000880 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntityWrapper);
Owen Taylor3473f882001-02-23 17:55:21 +0000881}
882
883/**
884 * xmlCopyEntity:
885 * @ent: An entity
886 *
887 * Build a copy of an entity
888 *
889 * Returns the new xmlEntitiesPtr or NULL in case of error.
890 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000891static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000892xmlCopyEntity(xmlEntityPtr ent) {
893 xmlEntityPtr cur;
894
895 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
896 if (cur == NULL) {
897 xmlGenericError(xmlGenericErrorContext,
898 "xmlCopyEntity: out of memory !\n");
899 return(NULL);
900 }
901 memset(cur, 0, sizeof(xmlEntity));
Daniel Veillard845cce42002-01-09 11:51:37 +0000902 cur->type = XML_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +0000903
904 cur->etype = ent->etype;
905 if (ent->name != NULL)
906 cur->name = xmlStrdup(ent->name);
907 if (ent->ExternalID != NULL)
908 cur->ExternalID = xmlStrdup(ent->ExternalID);
909 if (ent->SystemID != NULL)
910 cur->SystemID = xmlStrdup(ent->SystemID);
911 if (ent->content != NULL)
912 cur->content = xmlStrdup(ent->content);
913 if (ent->orig != NULL)
914 cur->orig = xmlStrdup(ent->orig);
Daniel Veillard8ee9c8f2002-01-26 21:42:58 +0000915 if (ent->URI != NULL)
916 cur->URI = xmlStrdup(ent->URI);
Owen Taylor3473f882001-02-23 17:55:21 +0000917 return(cur);
918}
919
920/**
921 * xmlCopyEntitiesTable:
922 * @table: An entity table
923 *
924 * Build a copy of an entity table.
925 *
926 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
927 */
928xmlEntitiesTablePtr
929xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
930 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
931}
932
933/**
934 * xmlDumpEntityDecl:
935 * @buf: An XML buffer.
936 * @ent: An entity table
937 *
938 * This will dump the content of the entity table as an XML DTD definition
939 */
940void
941xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
942 switch (ent->etype) {
943 case XML_INTERNAL_GENERAL_ENTITY:
944 xmlBufferWriteChar(buf, "<!ENTITY ");
945 xmlBufferWriteCHAR(buf, ent->name);
946 xmlBufferWriteChar(buf, " ");
947 if (ent->orig != NULL)
948 xmlBufferWriteQuotedString(buf, ent->orig);
949 else
950 xmlBufferWriteQuotedString(buf, ent->content);
951 xmlBufferWriteChar(buf, ">\n");
952 break;
953 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
954 xmlBufferWriteChar(buf, "<!ENTITY ");
955 xmlBufferWriteCHAR(buf, ent->name);
956 if (ent->ExternalID != NULL) {
957 xmlBufferWriteChar(buf, " PUBLIC ");
958 xmlBufferWriteQuotedString(buf, ent->ExternalID);
959 xmlBufferWriteChar(buf, " ");
960 xmlBufferWriteQuotedString(buf, ent->SystemID);
961 } else {
962 xmlBufferWriteChar(buf, " SYSTEM ");
963 xmlBufferWriteQuotedString(buf, ent->SystemID);
964 }
965 xmlBufferWriteChar(buf, ">\n");
966 break;
967 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
968 xmlBufferWriteChar(buf, "<!ENTITY ");
969 xmlBufferWriteCHAR(buf, ent->name);
970 if (ent->ExternalID != NULL) {
971 xmlBufferWriteChar(buf, " PUBLIC ");
972 xmlBufferWriteQuotedString(buf, ent->ExternalID);
973 xmlBufferWriteChar(buf, " ");
974 xmlBufferWriteQuotedString(buf, ent->SystemID);
975 } else {
976 xmlBufferWriteChar(buf, " SYSTEM ");
977 xmlBufferWriteQuotedString(buf, ent->SystemID);
978 }
979 if (ent->content != NULL) { /* Should be true ! */
980 xmlBufferWriteChar(buf, " NDATA ");
981 if (ent->orig != NULL)
982 xmlBufferWriteCHAR(buf, ent->orig);
983 else
984 xmlBufferWriteCHAR(buf, ent->content);
985 }
986 xmlBufferWriteChar(buf, ">\n");
987 break;
988 case XML_INTERNAL_PARAMETER_ENTITY:
989 xmlBufferWriteChar(buf, "<!ENTITY % ");
990 xmlBufferWriteCHAR(buf, ent->name);
991 xmlBufferWriteChar(buf, " ");
992 if (ent->orig == NULL)
993 xmlBufferWriteQuotedString(buf, ent->content);
994 else
995 xmlBufferWriteQuotedString(buf, ent->orig);
996 xmlBufferWriteChar(buf, ">\n");
997 break;
998 case XML_EXTERNAL_PARAMETER_ENTITY:
999 xmlBufferWriteChar(buf, "<!ENTITY % ");
1000 xmlBufferWriteCHAR(buf, ent->name);
1001 if (ent->ExternalID != NULL) {
1002 xmlBufferWriteChar(buf, " PUBLIC ");
1003 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1004 xmlBufferWriteChar(buf, " ");
1005 xmlBufferWriteQuotedString(buf, ent->SystemID);
1006 } else {
1007 xmlBufferWriteChar(buf, " SYSTEM ");
1008 xmlBufferWriteQuotedString(buf, ent->SystemID);
1009 }
1010 xmlBufferWriteChar(buf, ">\n");
1011 break;
1012 default:
1013 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 "xmlDumpEntitiesDecl: internal: unknown type %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001015 ent->etype);
1016 }
1017}
1018
1019/**
1020 * xmlDumpEntitiesTable:
1021 * @buf: An XML buffer.
1022 * @table: An entity table
1023 *
1024 * This will dump the content of the entity table as an XML DTD definition
1025 */
1026void
1027xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1028 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
1029}