blob: efc7b9fd8dfac06058482d1e0d20e225a1af6120 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * entities.c : implementation for the XML entities handling
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Daniel Veillard34ce8be2002-03-18 19:37:11 +00009#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000010#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000011
Owen Taylor3473f882001-02-23 17:55:21 +000012#include <string.h>
13#ifdef HAVE_STDLIB_H
14#include <stdlib.h>
15#endif
16#include <libxml/xmlmemory.h>
17#include <libxml/hash.h>
18#include <libxml/entities.h>
19#include <libxml/parser.h>
20#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000021#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000022
Owen Taylor3473f882001-02-23 17:55:21 +000023/*
24 * The XML predefined entities.
25 */
26
27struct xmlPredefinedEntityValue {
28 const char *name;
29 const char *value;
30};
Daniel Veillard22090732001-07-16 00:06:07 +000031static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000032 { "lt", "<" },
33 { "gt", ">" },
34 { "apos", "'" },
35 { "quot", "\"" },
36 { "amp", "&" }
37};
38
39/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000040 * TODO: This is GROSS, allocation of a 256 entry hash for
41 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000042 */
Daniel Veillardb44025c2001-10-11 22:55:55 +000043static xmlHashTablePtr xmlPredefinedEntities = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000044
45/*
46 * xmlFreeEntity : clean-up an entity record.
47 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000048static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000049 if (entity == NULL) return;
50
Daniel Veillard22090732001-07-16 00:06:07 +000051 if ((entity->children) &&
52 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000053 xmlFreeNodeList(entity->children);
54 if (entity->name != NULL)
55 xmlFree((char *) entity->name);
56 if (entity->ExternalID != NULL)
57 xmlFree((char *) entity->ExternalID);
58 if (entity->SystemID != NULL)
59 xmlFree((char *) entity->SystemID);
60 if (entity->URI != NULL)
61 xmlFree((char *) entity->URI);
62 if (entity->content != NULL)
63 xmlFree((char *) entity->content);
64 if (entity->orig != NULL)
65 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000066 xmlFree(entity);
67}
68
69/*
70 * xmlAddEntity : register a new entity for an entities table.
71 */
72static xmlEntityPtr
73xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
74 const xmlChar *ExternalID, const xmlChar *SystemID,
75 const xmlChar *content) {
76 xmlEntitiesTablePtr table = NULL;
77 xmlEntityPtr ret;
78
79 if (name == NULL)
80 return(NULL);
81 switch (type) {
82 case XML_INTERNAL_GENERAL_ENTITY:
83 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
84 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
85 if (dtd->entities == NULL)
86 dtd->entities = xmlHashCreate(0);
87 table = dtd->entities;
88 break;
89 case XML_INTERNAL_PARAMETER_ENTITY:
90 case XML_EXTERNAL_PARAMETER_ENTITY:
91 if (dtd->pentities == NULL)
92 dtd->pentities = xmlHashCreate(0);
93 table = dtd->pentities;
94 break;
95 case XML_INTERNAL_PREDEFINED_ENTITY:
96 if (xmlPredefinedEntities == NULL)
97 xmlPredefinedEntities = xmlHashCreate(8);
98 table = xmlPredefinedEntities;
99 }
100 if (table == NULL)
101 return(NULL);
102 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
103 if (ret == NULL) {
104 xmlGenericError(xmlGenericErrorContext,
105 "xmlAddEntity: out of memory\n");
106 return(NULL);
107 }
108 memset(ret, 0, sizeof(xmlEntity));
109 ret->type = XML_ENTITY_DECL;
110
111 /*
112 * fill the structure.
113 */
114 ret->name = xmlStrdup(name);
115 ret->etype = (xmlEntityType) type;
116 if (ExternalID != NULL)
117 ret->ExternalID = xmlStrdup(ExternalID);
118 if (SystemID != NULL)
119 ret->SystemID = xmlStrdup(SystemID);
120 if (content != NULL) {
121 ret->length = xmlStrlen(content);
122 ret->content = xmlStrndup(content, ret->length);
123 } else {
124 ret->length = 0;
125 ret->content = NULL;
126 }
127 ret->URI = NULL; /* to be computed by the layer knowing
128 the defining entity */
129 ret->orig = NULL;
130
131 if (xmlHashAddEntry(table, name, ret)) {
132 /*
133 * entity was already defined at another level.
134 */
135 xmlFreeEntity(ret);
136 return(NULL);
137 }
138 return(ret);
139}
140
141/**
142 * xmlInitializePredefinedEntities:
143 *
144 * Set up the predefined entities.
145 */
146void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000147 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000148 xmlChar name[50];
149 xmlChar value[50];
150 const char *in;
151 xmlChar *out;
152
153 if (xmlPredefinedEntities != NULL) return;
154
155 xmlPredefinedEntities = xmlCreateEntitiesTable();
156 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
157 sizeof(xmlPredefinedEntityValues[0]);i++) {
158 in = xmlPredefinedEntityValues[i].name;
159 out = &name[0];
160 for (;(*out++ = (xmlChar) *in);)in++;
161 in = xmlPredefinedEntityValues[i].value;
162 out = &value[0];
163 for (;(*out++ = (xmlChar) *in);)in++;
164
165 xmlAddEntity(NULL, (const xmlChar *) &name[0],
166 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
167 &value[0]);
168 }
169}
170
171/**
172 * xmlCleanupPredefinedEntities:
173 *
174 * Cleanup up the predefined entities table.
175 */
176void xmlCleanupPredefinedEntities(void) {
177 if (xmlPredefinedEntities == NULL) return;
178
179 xmlFreeEntitiesTable(xmlPredefinedEntities);
180 xmlPredefinedEntities = NULL;
181}
182
183/**
184 * xmlGetPredefinedEntity:
185 * @name: the entity name
186 *
187 * Check whether this name is an predefined entity.
188 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000189 * Returns NULL if not, otherwise the entity
Owen Taylor3473f882001-02-23 17:55:21 +0000190 */
191xmlEntityPtr
192xmlGetPredefinedEntity(const xmlChar *name) {
193 if (xmlPredefinedEntities == NULL)
194 xmlInitializePredefinedEntities();
195 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
196}
197
198/**
199 * xmlAddDtdEntity:
200 * @doc: the document
201 * @name: the entity name
202 * @type: the entity type XML_xxx_yyy_ENTITY
203 * @ExternalID: the entity external ID if available
204 * @SystemID: the entity system ID if available
205 * @content: the entity content
206 *
207 * Register a new entity for this document DTD external subset.
208 *
209 * Returns a pointer to the entity or NULL in case of error
210 */
211xmlEntityPtr
212xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
213 const xmlChar *ExternalID, const xmlChar *SystemID,
214 const xmlChar *content) {
215 xmlEntityPtr ret;
216 xmlDtdPtr dtd;
217
218 if (doc == NULL) {
219 xmlGenericError(xmlGenericErrorContext,
220 "xmlAddDtdEntity: doc == NULL !\n");
221 return(NULL);
222 }
223 if (doc->extSubset == NULL) {
224 xmlGenericError(xmlGenericErrorContext,
225 "xmlAddDtdEntity: document without external subset !\n");
226 return(NULL);
227 }
228 dtd = doc->extSubset;
229 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
230 if (ret == NULL) return(NULL);
231
232 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000233 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000234 */
235 ret->parent = dtd;
236 ret->doc = dtd->doc;
237 if (dtd->last == NULL) {
238 dtd->children = dtd->last = (xmlNodePtr) ret;
239 } else {
240 dtd->last->next = (xmlNodePtr) ret;
241 ret->prev = dtd->last;
242 dtd->last = (xmlNodePtr) ret;
243 }
244 return(ret);
245}
246
247/**
248 * xmlAddDocEntity:
249 * @doc: the document
250 * @name: the entity name
251 * @type: the entity type XML_xxx_yyy_ENTITY
252 * @ExternalID: the entity external ID if available
253 * @SystemID: the entity system ID if available
254 * @content: the entity content
255 *
256 * Register a new entity for this document.
257 *
258 * Returns a pointer to the entity or NULL in case of error
259 */
260xmlEntityPtr
261xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
262 const xmlChar *ExternalID, const xmlChar *SystemID,
263 const xmlChar *content) {
264 xmlEntityPtr ret;
265 xmlDtdPtr dtd;
266
267 if (doc == NULL) {
268 xmlGenericError(xmlGenericErrorContext,
269 "xmlAddDocEntity: document is NULL !\n");
270 return(NULL);
271 }
272 if (doc->intSubset == NULL) {
273 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000274 "xmlAddDocEntity: document without internal subset !\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000275 return(NULL);
276 }
277 dtd = doc->intSubset;
278 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
279 if (ret == NULL) return(NULL);
280
281 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000282 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000283 */
284 ret->parent = dtd;
285 ret->doc = dtd->doc;
286 if (dtd->last == NULL) {
287 dtd->children = dtd->last = (xmlNodePtr) ret;
288 } else {
289 dtd->last->next = (xmlNodePtr) ret;
290 ret->prev = dtd->last;
291 dtd->last = (xmlNodePtr) ret;
292 }
293 return(ret);
294}
295
296/**
297 * xmlGetEntityFromTable:
298 * @table: an entity table
299 * @name: the entity name
300 * @parameter: look for parameter entities
301 *
302 * Do an entity lookup in the table.
303 * returns the corresponding parameter entity, if found.
304 *
305 * Returns A pointer to the entity structure or NULL if not found.
306 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000307static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000308xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
309 return((xmlEntityPtr) xmlHashLookup(table, name));
310}
311
312/**
313 * xmlGetParameterEntity:
314 * @doc: the document referencing the entity
315 * @name: the entity name
316 *
317 * Do an entity lookup in the internal and external subsets and
318 * returns the corresponding parameter entity, if found.
319 *
320 * Returns A pointer to the entity structure or NULL if not found.
321 */
322xmlEntityPtr
323xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
324 xmlEntitiesTablePtr table;
325 xmlEntityPtr ret;
326
Daniel Veillard36065812002-01-24 15:02:46 +0000327 if (doc == NULL)
328 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000329 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
330 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
331 ret = xmlGetEntityFromTable(table, name);
332 if (ret != NULL)
333 return(ret);
334 }
335 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
336 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
337 return(xmlGetEntityFromTable(table, name));
338 }
339 return(NULL);
340}
341
342/**
343 * xmlGetDtdEntity:
344 * @doc: the document referencing the entity
345 * @name: the entity name
346 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000347 * Do an entity lookup in the DTD entity hash table and
Owen Taylor3473f882001-02-23 17:55:21 +0000348 * returns the corresponding entity, if found.
Daniel Veillard36065812002-01-24 15:02:46 +0000349 * Note: the first argument is the document node, not the DTD node.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 *
351 * Returns A pointer to the entity structure or NULL if not found.
352 */
353xmlEntityPtr
354xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
355 xmlEntitiesTablePtr table;
356
Daniel Veillard36065812002-01-24 15:02:46 +0000357 if (doc == NULL)
358 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000359 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
360 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
361 return(xmlGetEntityFromTable(table, name));
362 }
363 return(NULL);
364}
365
366/**
367 * xmlGetDocEntity:
368 * @doc: the document referencing the entity
369 * @name: the entity name
370 *
371 * Do an entity lookup in the document entity hash table and
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000372 * returns the corresponding entity, otherwise a lookup is done
Owen Taylor3473f882001-02-23 17:55:21 +0000373 * in the predefined entities too.
374 *
375 * Returns A pointer to the entity structure or NULL if not found.
376 */
377xmlEntityPtr
378xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
379 xmlEntityPtr cur;
380 xmlEntitiesTablePtr table;
381
382 if (doc != NULL) {
383 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
384 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
385 cur = xmlGetEntityFromTable(table, name);
386 if (cur != NULL)
387 return(cur);
388 }
Daniel Veillard28757702002-02-18 11:19:30 +0000389 if (doc->standalone != 1) {
390 if ((doc->extSubset != NULL) &&
391 (doc->extSubset->entities != NULL)) {
392 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
393 cur = xmlGetEntityFromTable(table, name);
394 if (cur != NULL)
395 return(cur);
396 }
Owen Taylor3473f882001-02-23 17:55:21 +0000397 }
398 }
399 if (xmlPredefinedEntities == NULL)
400 xmlInitializePredefinedEntities();
401 table = xmlPredefinedEntities;
402 return(xmlGetEntityFromTable(table, name));
403}
404
405/*
406 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
407 * | [#x10000-#x10FFFF]
408 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
409 */
410#define IS_CHAR(c) \
411 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
412 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
413
414/*
415 * A buffer used for converting entities to their equivalent and back.
416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000417static int static_buffer_size = 0;
418static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000419
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000420static int growBuffer(void) {
421 static_buffer_size *= 2;
422 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
423 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000424 perror("realloc failed");
425 return(-1);
426 }
427 return(0);
428}
429
430
431/**
432 * xmlEncodeEntities:
433 * @doc: the document containing the string
434 * @input: A string to convert to XML.
435 *
436 * Do a global encoding of a string, replacing the predefined entities
437 * and non ASCII values with their entities and CharRef counterparts.
438 *
439 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
440 * compatibility
441 *
442 * People must migrate their code to xmlEncodeEntitiesReentrant !
443 * This routine will issue a warning when encountered.
444 *
445 * Returns A newly allocated string with the substitution done.
446 */
447const xmlChar *
448xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
449 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000450 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 static int warning = 1;
452 int html = 0;
453
454
455 if (warning) {
456 xmlGenericError(xmlGenericErrorContext,
457 "Deprecated API xmlEncodeEntities() used\n");
458 xmlGenericError(xmlGenericErrorContext,
459 " change code to use xmlEncodeEntitiesReentrant()\n");
460 warning = 0;
461 }
462
463 if (input == NULL) return(NULL);
464 if (doc != NULL)
465 html = (doc->type == XML_HTML_DOCUMENT_NODE);
466
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000467 if (static_buffer == NULL) {
468 static_buffer_size = 1000;
469 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
470 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000471 perror("malloc failed");
472 return(NULL);
473 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000474 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000475 }
476 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000477 if (out - static_buffer > static_buffer_size - 100) {
478 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000479
480 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000481 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000482 }
483
484 /*
485 * By default one have to encode at least '<', '>', '"' and '&' !
486 */
487 if (*cur == '<') {
488 *out++ = '&';
489 *out++ = 'l';
490 *out++ = 't';
491 *out++ = ';';
492 } else if (*cur == '>') {
493 *out++ = '&';
494 *out++ = 'g';
495 *out++ = 't';
496 *out++ = ';';
497 } else if (*cur == '&') {
498 *out++ = '&';
499 *out++ = 'a';
500 *out++ = 'm';
501 *out++ = 'p';
502 *out++ = ';';
503 } else if (*cur == '"') {
504 *out++ = '&';
505 *out++ = 'q';
506 *out++ = 'u';
507 *out++ = 'o';
508 *out++ = 't';
509 *out++ = ';';
510 } else if ((*cur == '\'') && (!html)) {
511 *out++ = '&';
512 *out++ = 'a';
513 *out++ = 'p';
514 *out++ = 'o';
515 *out++ = 's';
516 *out++ = ';';
517 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
518 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
519 /*
520 * default case, just copy !
521 */
522 *out++ = *cur;
523#ifndef USE_UTF_8
524 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
525 char buf[10], *ptr;
526
Owen Taylor3473f882001-02-23 17:55:21 +0000527 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000528 buf[sizeof(buf) - 1] = 0;
529 ptr = buf;
530 while (*ptr != 0) *out++ = *ptr++;
531#endif
532 } else if (IS_CHAR(*cur)) {
533 char buf[10], *ptr;
534
Owen Taylor3473f882001-02-23 17:55:21 +0000535 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000536 buf[sizeof(buf) - 1] = 0;
537 ptr = buf;
538 while (*ptr != 0) *out++ = *ptr++;
539 }
540#if 0
541 else {
542 /*
543 * default case, this is not a valid char !
544 * Skip it...
545 */
546 xmlGenericError(xmlGenericErrorContext,
547 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
548 }
549#endif
550 cur++;
551 }
552 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000553 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000554}
555
556/*
557 * Macro used to grow the current buffer.
558 */
559#define growBufferReentrant() { \
560 buffer_size *= 2; \
561 buffer = (xmlChar *) \
562 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
563 if (buffer == NULL) { \
564 perror("realloc failed"); \
565 return(NULL); \
566 } \
567}
568
569
570/**
571 * xmlEncodeEntitiesReentrant:
572 * @doc: the document containing the string
573 * @input: A string to convert to XML.
574 *
575 * Do a global encoding of a string, replacing the predefined entities
576 * and non ASCII values with their entities and CharRef counterparts.
577 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
578 * must be deallocated.
579 *
580 * Returns A newly allocated string with the substitution done.
581 */
582xmlChar *
583xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
584 const xmlChar *cur = input;
585 xmlChar *buffer = NULL;
586 xmlChar *out = NULL;
587 int buffer_size = 0;
588 int html = 0;
589
590 if (input == NULL) return(NULL);
591 if (doc != NULL)
592 html = (doc->type == XML_HTML_DOCUMENT_NODE);
593
594 /*
595 * allocate an translation buffer.
596 */
597 buffer_size = 1000;
598 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
599 if (buffer == NULL) {
600 perror("malloc failed");
601 return(NULL);
602 }
603 out = buffer;
604
605 while (*cur != '\0') {
606 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000607 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000608
609 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000610 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000611 }
612
613 /*
614 * By default one have to encode at least '<', '>', '"' and '&' !
615 */
616 if (*cur == '<') {
617 *out++ = '&';
618 *out++ = 'l';
619 *out++ = 't';
620 *out++ = ';';
621 } else if (*cur == '>') {
622 *out++ = '&';
623 *out++ = 'g';
624 *out++ = 't';
625 *out++ = ';';
626 } else if (*cur == '&') {
627 *out++ = '&';
628 *out++ = 'a';
629 *out++ = 'm';
630 *out++ = 'p';
631 *out++ = ';';
632 } else if (*cur == '"') {
633 *out++ = '&';
634 *out++ = 'q';
635 *out++ = 'u';
636 *out++ = 'o';
637 *out++ = 't';
638 *out++ = ';';
639#if 0
640 } else if ((*cur == '\'') && (!html)) {
641 *out++ = '&';
642 *out++ = 'a';
643 *out++ = 'p';
644 *out++ = 'o';
645 *out++ = 's';
646 *out++ = ';';
647#endif
648 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
649 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
650 /*
651 * default case, just copy !
652 */
653 *out++ = *cur;
654 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000655 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000656 /*
657 * Bjørn Reese <br@sseusa.com> provided the patch
658 xmlChar xc;
659 xc = (*cur & 0x3F) << 6;
660 if (cur[1] != 0) {
661 xc += *(++cur) & 0x3F;
662 *out++ = xc;
663 } else
664 */
665 *out++ = *cur;
666 } else {
667 /*
668 * We assume we have UTF-8 input.
669 */
670 char buf[10], *ptr;
671 int val = 0, l = 1;
672
673 if (*cur < 0xC0) {
674 xmlGenericError(xmlGenericErrorContext,
675 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000676 if (doc != NULL)
677 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000678 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000679 buf[sizeof(buf) - 1] = 0;
680 ptr = buf;
681 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000682 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000683 continue;
684 } else if (*cur < 0xE0) {
685 val = (cur[0]) & 0x1F;
686 val <<= 6;
687 val |= (cur[1]) & 0x3F;
688 l = 2;
689 } else if (*cur < 0xF0) {
690 val = (cur[0]) & 0x0F;
691 val <<= 6;
692 val |= (cur[1]) & 0x3F;
693 val <<= 6;
694 val |= (cur[2]) & 0x3F;
695 l = 3;
696 } else if (*cur < 0xF8) {
697 val = (cur[0]) & 0x07;
698 val <<= 6;
699 val |= (cur[1]) & 0x3F;
700 val <<= 6;
701 val |= (cur[2]) & 0x3F;
702 val <<= 6;
703 val |= (cur[3]) & 0x3F;
704 l = 4;
705 }
706 if ((l == 1) || (!IS_CHAR(val))) {
707 xmlGenericError(xmlGenericErrorContext,
708 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000709 if (doc != NULL)
710 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000711 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000712 buf[sizeof(buf) - 1] = 0;
713 ptr = buf;
714 while (*ptr != 0) *out++ = *ptr++;
715 cur++;
716 continue;
717 }
718 /*
719 * We could do multiple things here. Just save as a char ref
720 */
Daniel Veillard16698282001-09-14 10:29:27 +0000721 if (html)
722 snprintf(buf, sizeof(buf), "&#%d;", val);
723 else
724 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000725 buf[sizeof(buf) - 1] = 0;
726 ptr = buf;
727 while (*ptr != 0) *out++ = *ptr++;
728 cur += l;
729 continue;
730 }
731 } else if (IS_CHAR(*cur)) {
732 char buf[10], *ptr;
733
Owen Taylor3473f882001-02-23 17:55:21 +0000734 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000735 buf[sizeof(buf) - 1] = 0;
736 ptr = buf;
737 while (*ptr != 0) *out++ = *ptr++;
738 }
739#if 0
740 else {
741 /*
742 * default case, this is not a valid char !
743 * Skip it...
744 */
745 xmlGenericError(xmlGenericErrorContext,
746 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
747 }
748#endif
749 cur++;
750 }
751 *out++ = 0;
752 return(buffer);
753}
754
755/**
756 * xmlEncodeSpecialChars:
757 * @doc: the document containing the string
758 * @input: A string to convert to XML.
759 *
760 * Do a global encoding of a string, replacing the predefined entities
761 * this routine is reentrant, and result must be deallocated.
762 *
763 * Returns A newly allocated string with the substitution done.
764 */
765xmlChar *
766xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
767 const xmlChar *cur = input;
768 xmlChar *buffer = NULL;
769 xmlChar *out = NULL;
770 int buffer_size = 0;
771 int html = 0;
772
773 if (input == NULL) return(NULL);
774 if (doc != NULL)
775 html = (doc->type == XML_HTML_DOCUMENT_NODE);
776
777 /*
778 * allocate an translation buffer.
779 */
780 buffer_size = 1000;
781 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
782 if (buffer == NULL) {
783 perror("malloc failed");
784 return(NULL);
785 }
786 out = buffer;
787
788 while (*cur != '\0') {
789 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000790 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000791
792 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000793 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000794 }
795
796 /*
797 * By default one have to encode at least '<', '>', '"' and '&' !
798 */
799 if (*cur == '<') {
800 *out++ = '&';
801 *out++ = 'l';
802 *out++ = 't';
803 *out++ = ';';
804 } else if (*cur == '>') {
805 *out++ = '&';
806 *out++ = 'g';
807 *out++ = 't';
808 *out++ = ';';
809 } else if (*cur == '&') {
810 *out++ = '&';
811 *out++ = 'a';
812 *out++ = 'm';
813 *out++ = 'p';
814 *out++ = ';';
815 } else if (*cur == '"') {
816 *out++ = '&';
817 *out++ = 'q';
818 *out++ = 'u';
819 *out++ = 'o';
820 *out++ = 't';
821 *out++ = ';';
822 } else {
823 /*
824 * Works because on UTF-8, all extended sequences cannot
825 * result in bytes in the ASCII range.
826 */
827 *out++ = *cur;
828 }
829 cur++;
830 }
831 *out++ = 0;
832 return(buffer);
833}
834
835/**
836 * xmlCreateEntitiesTable:
837 *
838 * create and initialize an empty entities hash table.
839 *
840 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
841 */
842xmlEntitiesTablePtr
843xmlCreateEntitiesTable(void) {
844 return((xmlEntitiesTablePtr) xmlHashCreate(0));
845}
846
847/**
848 * xmlFreeEntitiesTable:
849 * @table: An entity table
850 *
851 * Deallocate the memory used by an entities hash table.
852 */
853void
854xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
855 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
856}
857
858/**
859 * xmlCopyEntity:
860 * @ent: An entity
861 *
862 * Build a copy of an entity
863 *
864 * Returns the new xmlEntitiesPtr or NULL in case of error.
865 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000866static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000867xmlCopyEntity(xmlEntityPtr ent) {
868 xmlEntityPtr cur;
869
870 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
871 if (cur == NULL) {
872 xmlGenericError(xmlGenericErrorContext,
873 "xmlCopyEntity: out of memory !\n");
874 return(NULL);
875 }
876 memset(cur, 0, sizeof(xmlEntity));
Daniel Veillard845cce42002-01-09 11:51:37 +0000877 cur->type = XML_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +0000878
879 cur->etype = ent->etype;
880 if (ent->name != NULL)
881 cur->name = xmlStrdup(ent->name);
882 if (ent->ExternalID != NULL)
883 cur->ExternalID = xmlStrdup(ent->ExternalID);
884 if (ent->SystemID != NULL)
885 cur->SystemID = xmlStrdup(ent->SystemID);
886 if (ent->content != NULL)
887 cur->content = xmlStrdup(ent->content);
888 if (ent->orig != NULL)
889 cur->orig = xmlStrdup(ent->orig);
Daniel Veillard8ee9c8f2002-01-26 21:42:58 +0000890 if (ent->URI != NULL)
891 cur->URI = xmlStrdup(ent->URI);
Owen Taylor3473f882001-02-23 17:55:21 +0000892 return(cur);
893}
894
895/**
896 * xmlCopyEntitiesTable:
897 * @table: An entity table
898 *
899 * Build a copy of an entity table.
900 *
901 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
902 */
903xmlEntitiesTablePtr
904xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
905 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
906}
907
908/**
909 * xmlDumpEntityDecl:
910 * @buf: An XML buffer.
911 * @ent: An entity table
912 *
913 * This will dump the content of the entity table as an XML DTD definition
914 */
915void
916xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
917 switch (ent->etype) {
918 case XML_INTERNAL_GENERAL_ENTITY:
919 xmlBufferWriteChar(buf, "<!ENTITY ");
920 xmlBufferWriteCHAR(buf, ent->name);
921 xmlBufferWriteChar(buf, " ");
922 if (ent->orig != NULL)
923 xmlBufferWriteQuotedString(buf, ent->orig);
924 else
925 xmlBufferWriteQuotedString(buf, ent->content);
926 xmlBufferWriteChar(buf, ">\n");
927 break;
928 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
929 xmlBufferWriteChar(buf, "<!ENTITY ");
930 xmlBufferWriteCHAR(buf, ent->name);
931 if (ent->ExternalID != NULL) {
932 xmlBufferWriteChar(buf, " PUBLIC ");
933 xmlBufferWriteQuotedString(buf, ent->ExternalID);
934 xmlBufferWriteChar(buf, " ");
935 xmlBufferWriteQuotedString(buf, ent->SystemID);
936 } else {
937 xmlBufferWriteChar(buf, " SYSTEM ");
938 xmlBufferWriteQuotedString(buf, ent->SystemID);
939 }
940 xmlBufferWriteChar(buf, ">\n");
941 break;
942 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
943 xmlBufferWriteChar(buf, "<!ENTITY ");
944 xmlBufferWriteCHAR(buf, ent->name);
945 if (ent->ExternalID != NULL) {
946 xmlBufferWriteChar(buf, " PUBLIC ");
947 xmlBufferWriteQuotedString(buf, ent->ExternalID);
948 xmlBufferWriteChar(buf, " ");
949 xmlBufferWriteQuotedString(buf, ent->SystemID);
950 } else {
951 xmlBufferWriteChar(buf, " SYSTEM ");
952 xmlBufferWriteQuotedString(buf, ent->SystemID);
953 }
954 if (ent->content != NULL) { /* Should be true ! */
955 xmlBufferWriteChar(buf, " NDATA ");
956 if (ent->orig != NULL)
957 xmlBufferWriteCHAR(buf, ent->orig);
958 else
959 xmlBufferWriteCHAR(buf, ent->content);
960 }
961 xmlBufferWriteChar(buf, ">\n");
962 break;
963 case XML_INTERNAL_PARAMETER_ENTITY:
964 xmlBufferWriteChar(buf, "<!ENTITY % ");
965 xmlBufferWriteCHAR(buf, ent->name);
966 xmlBufferWriteChar(buf, " ");
967 if (ent->orig == NULL)
968 xmlBufferWriteQuotedString(buf, ent->content);
969 else
970 xmlBufferWriteQuotedString(buf, ent->orig);
971 xmlBufferWriteChar(buf, ">\n");
972 break;
973 case XML_EXTERNAL_PARAMETER_ENTITY:
974 xmlBufferWriteChar(buf, "<!ENTITY % ");
975 xmlBufferWriteCHAR(buf, ent->name);
976 if (ent->ExternalID != NULL) {
977 xmlBufferWriteChar(buf, " PUBLIC ");
978 xmlBufferWriteQuotedString(buf, ent->ExternalID);
979 xmlBufferWriteChar(buf, " ");
980 xmlBufferWriteQuotedString(buf, ent->SystemID);
981 } else {
982 xmlBufferWriteChar(buf, " SYSTEM ");
983 xmlBufferWriteQuotedString(buf, ent->SystemID);
984 }
985 xmlBufferWriteChar(buf, ">\n");
986 break;
987 default:
988 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000989 "xmlDumpEntitiesDecl: internal: unknown type %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000990 ent->etype);
991 }
992}
993
994/**
995 * xmlDumpEntitiesTable:
996 * @buf: An XML buffer.
997 * @table: An entity table
998 *
999 * This will dump the content of the entity table as an XML DTD definition
1000 */
1001void
1002xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1003 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
1004}