blob: a2306665f74d552c1c24926512f323fb8ed57a17 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * entities.c : implementation for the XML entities handking
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000010
Owen Taylor3473f882001-02-23 17:55:21 +000011#include <string.h>
12#ifdef HAVE_STDLIB_H
13#include <stdlib.h>
14#endif
15#include <libxml/xmlmemory.h>
16#include <libxml/hash.h>
17#include <libxml/entities.h>
18#include <libxml/parser.h>
19#include <libxml/xmlerror.h>
20
Owen Taylor3473f882001-02-23 17:55:21 +000021/*
22 * The XML predefined entities.
23 */
24
25struct xmlPredefinedEntityValue {
26 const char *name;
27 const char *value;
28};
Daniel Veillard22090732001-07-16 00:06:07 +000029static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000030 { "lt", "<" },
31 { "gt", ">" },
32 { "apos", "'" },
33 { "quot", "\"" },
34 { "amp", "&" }
35};
36
37/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000038 * TODO: This is GROSS, allocation of a 256 entry hash for
39 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000040 */
41xmlHashTablePtr xmlPredefinedEntities = NULL;
42
43/*
44 * xmlFreeEntity : clean-up an entity record.
45 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000046static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000047 if (entity == NULL) return;
48
Daniel Veillard22090732001-07-16 00:06:07 +000049 if ((entity->children) &&
50 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000051 xmlFreeNodeList(entity->children);
52 if (entity->name != NULL)
53 xmlFree((char *) entity->name);
54 if (entity->ExternalID != NULL)
55 xmlFree((char *) entity->ExternalID);
56 if (entity->SystemID != NULL)
57 xmlFree((char *) entity->SystemID);
58 if (entity->URI != NULL)
59 xmlFree((char *) entity->URI);
60 if (entity->content != NULL)
61 xmlFree((char *) entity->content);
62 if (entity->orig != NULL)
63 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000064 xmlFree(entity);
65}
66
67/*
68 * xmlAddEntity : register a new entity for an entities table.
69 */
70static xmlEntityPtr
71xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
72 const xmlChar *ExternalID, const xmlChar *SystemID,
73 const xmlChar *content) {
74 xmlEntitiesTablePtr table = NULL;
75 xmlEntityPtr ret;
76
77 if (name == NULL)
78 return(NULL);
79 switch (type) {
80 case XML_INTERNAL_GENERAL_ENTITY:
81 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
82 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
83 if (dtd->entities == NULL)
84 dtd->entities = xmlHashCreate(0);
85 table = dtd->entities;
86 break;
87 case XML_INTERNAL_PARAMETER_ENTITY:
88 case XML_EXTERNAL_PARAMETER_ENTITY:
89 if (dtd->pentities == NULL)
90 dtd->pentities = xmlHashCreate(0);
91 table = dtd->pentities;
92 break;
93 case XML_INTERNAL_PREDEFINED_ENTITY:
94 if (xmlPredefinedEntities == NULL)
95 xmlPredefinedEntities = xmlHashCreate(8);
96 table = xmlPredefinedEntities;
97 }
98 if (table == NULL)
99 return(NULL);
100 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
101 if (ret == NULL) {
102 xmlGenericError(xmlGenericErrorContext,
103 "xmlAddEntity: out of memory\n");
104 return(NULL);
105 }
106 memset(ret, 0, sizeof(xmlEntity));
107 ret->type = XML_ENTITY_DECL;
108
109 /*
110 * fill the structure.
111 */
112 ret->name = xmlStrdup(name);
113 ret->etype = (xmlEntityType) type;
114 if (ExternalID != NULL)
115 ret->ExternalID = xmlStrdup(ExternalID);
116 if (SystemID != NULL)
117 ret->SystemID = xmlStrdup(SystemID);
118 if (content != NULL) {
119 ret->length = xmlStrlen(content);
120 ret->content = xmlStrndup(content, ret->length);
121 } else {
122 ret->length = 0;
123 ret->content = NULL;
124 }
125 ret->URI = NULL; /* to be computed by the layer knowing
126 the defining entity */
127 ret->orig = NULL;
128
129 if (xmlHashAddEntry(table, name, ret)) {
130 /*
131 * entity was already defined at another level.
132 */
133 xmlFreeEntity(ret);
134 return(NULL);
135 }
136 return(ret);
137}
138
139/**
140 * xmlInitializePredefinedEntities:
141 *
142 * Set up the predefined entities.
143 */
144void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000145 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000146 xmlChar name[50];
147 xmlChar value[50];
148 const char *in;
149 xmlChar *out;
150
151 if (xmlPredefinedEntities != NULL) return;
152
153 xmlPredefinedEntities = xmlCreateEntitiesTable();
154 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
155 sizeof(xmlPredefinedEntityValues[0]);i++) {
156 in = xmlPredefinedEntityValues[i].name;
157 out = &name[0];
158 for (;(*out++ = (xmlChar) *in);)in++;
159 in = xmlPredefinedEntityValues[i].value;
160 out = &value[0];
161 for (;(*out++ = (xmlChar) *in);)in++;
162
163 xmlAddEntity(NULL, (const xmlChar *) &name[0],
164 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
165 &value[0]);
166 }
167}
168
169/**
170 * xmlCleanupPredefinedEntities:
171 *
172 * Cleanup up the predefined entities table.
173 */
174void xmlCleanupPredefinedEntities(void) {
175 if (xmlPredefinedEntities == NULL) return;
176
177 xmlFreeEntitiesTable(xmlPredefinedEntities);
178 xmlPredefinedEntities = NULL;
179}
180
181/**
182 * xmlGetPredefinedEntity:
183 * @name: the entity name
184 *
185 * Check whether this name is an predefined entity.
186 *
187 * Returns NULL if not, othervise the entity
188 */
189xmlEntityPtr
190xmlGetPredefinedEntity(const xmlChar *name) {
191 if (xmlPredefinedEntities == NULL)
192 xmlInitializePredefinedEntities();
193 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
194}
195
196/**
197 * xmlAddDtdEntity:
198 * @doc: the document
199 * @name: the entity name
200 * @type: the entity type XML_xxx_yyy_ENTITY
201 * @ExternalID: the entity external ID if available
202 * @SystemID: the entity system ID if available
203 * @content: the entity content
204 *
205 * Register a new entity for this document DTD external subset.
206 *
207 * Returns a pointer to the entity or NULL in case of error
208 */
209xmlEntityPtr
210xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
211 const xmlChar *ExternalID, const xmlChar *SystemID,
212 const xmlChar *content) {
213 xmlEntityPtr ret;
214 xmlDtdPtr dtd;
215
216 if (doc == NULL) {
217 xmlGenericError(xmlGenericErrorContext,
218 "xmlAddDtdEntity: doc == NULL !\n");
219 return(NULL);
220 }
221 if (doc->extSubset == NULL) {
222 xmlGenericError(xmlGenericErrorContext,
223 "xmlAddDtdEntity: document without external subset !\n");
224 return(NULL);
225 }
226 dtd = doc->extSubset;
227 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
228 if (ret == NULL) return(NULL);
229
230 /*
231 * Link it to the Dtd
232 */
233 ret->parent = dtd;
234 ret->doc = dtd->doc;
235 if (dtd->last == NULL) {
236 dtd->children = dtd->last = (xmlNodePtr) ret;
237 } else {
238 dtd->last->next = (xmlNodePtr) ret;
239 ret->prev = dtd->last;
240 dtd->last = (xmlNodePtr) ret;
241 }
242 return(ret);
243}
244
245/**
246 * xmlAddDocEntity:
247 * @doc: the document
248 * @name: the entity name
249 * @type: the entity type XML_xxx_yyy_ENTITY
250 * @ExternalID: the entity external ID if available
251 * @SystemID: the entity system ID if available
252 * @content: the entity content
253 *
254 * Register a new entity for this document.
255 *
256 * Returns a pointer to the entity or NULL in case of error
257 */
258xmlEntityPtr
259xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
260 const xmlChar *ExternalID, const xmlChar *SystemID,
261 const xmlChar *content) {
262 xmlEntityPtr ret;
263 xmlDtdPtr dtd;
264
265 if (doc == NULL) {
266 xmlGenericError(xmlGenericErrorContext,
267 "xmlAddDocEntity: document is NULL !\n");
268 return(NULL);
269 }
270 if (doc->intSubset == NULL) {
271 xmlGenericError(xmlGenericErrorContext,
272 "xmlAddDtdEntity: document without internal subset !\n");
273 return(NULL);
274 }
275 dtd = doc->intSubset;
276 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
277 if (ret == NULL) return(NULL);
278
279 /*
280 * Link it to the Dtd
281 */
282 ret->parent = dtd;
283 ret->doc = dtd->doc;
284 if (dtd->last == NULL) {
285 dtd->children = dtd->last = (xmlNodePtr) ret;
286 } else {
287 dtd->last->next = (xmlNodePtr) ret;
288 ret->prev = dtd->last;
289 dtd->last = (xmlNodePtr) ret;
290 }
291 return(ret);
292}
293
294/**
295 * xmlGetEntityFromTable:
296 * @table: an entity table
297 * @name: the entity name
298 * @parameter: look for parameter entities
299 *
300 * Do an entity lookup in the table.
301 * returns the corresponding parameter entity, if found.
302 *
303 * Returns A pointer to the entity structure or NULL if not found.
304 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000305static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000306xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
307 return((xmlEntityPtr) xmlHashLookup(table, name));
308}
309
310/**
311 * xmlGetParameterEntity:
312 * @doc: the document referencing the entity
313 * @name: the entity name
314 *
315 * Do an entity lookup in the internal and external subsets and
316 * returns the corresponding parameter entity, if found.
317 *
318 * Returns A pointer to the entity structure or NULL if not found.
319 */
320xmlEntityPtr
321xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
322 xmlEntitiesTablePtr table;
323 xmlEntityPtr ret;
324
325 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
326 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
327 ret = xmlGetEntityFromTable(table, name);
328 if (ret != NULL)
329 return(ret);
330 }
331 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
332 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
333 return(xmlGetEntityFromTable(table, name));
334 }
335 return(NULL);
336}
337
338/**
339 * xmlGetDtdEntity:
340 * @doc: the document referencing the entity
341 * @name: the entity name
342 *
343 * Do an entity lookup in the Dtd entity hash table and
344 * returns the corresponding entity, if found.
345 *
346 * Returns A pointer to the entity structure or NULL if not found.
347 */
348xmlEntityPtr
349xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
350 xmlEntitiesTablePtr table;
351
352 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
353 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
354 return(xmlGetEntityFromTable(table, name));
355 }
356 return(NULL);
357}
358
359/**
360 * xmlGetDocEntity:
361 * @doc: the document referencing the entity
362 * @name: the entity name
363 *
364 * Do an entity lookup in the document entity hash table and
365 * returns the corrsponding entity, otherwise a lookup is done
366 * in the predefined entities too.
367 *
368 * Returns A pointer to the entity structure or NULL if not found.
369 */
370xmlEntityPtr
371xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
372 xmlEntityPtr cur;
373 xmlEntitiesTablePtr table;
374
375 if (doc != NULL) {
376 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
377 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
378 cur = xmlGetEntityFromTable(table, name);
379 if (cur != NULL)
380 return(cur);
381 }
382 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
383 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
384 cur = xmlGetEntityFromTable(table, name);
385 if (cur != NULL)
386 return(cur);
387 }
388 }
389 if (xmlPredefinedEntities == NULL)
390 xmlInitializePredefinedEntities();
391 table = xmlPredefinedEntities;
392 return(xmlGetEntityFromTable(table, name));
393}
394
395/*
396 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
397 * | [#x10000-#x10FFFF]
398 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
399 */
400#define IS_CHAR(c) \
401 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
402 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
403
404/*
405 * A buffer used for converting entities to their equivalent and back.
406 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000407static int static_buffer_size = 0;
408static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000410static int growBuffer(void) {
411 static_buffer_size *= 2;
412 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
413 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000414 perror("realloc failed");
415 return(-1);
416 }
417 return(0);
418}
419
420
421/**
422 * xmlEncodeEntities:
423 * @doc: the document containing the string
424 * @input: A string to convert to XML.
425 *
426 * Do a global encoding of a string, replacing the predefined entities
427 * and non ASCII values with their entities and CharRef counterparts.
428 *
429 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
430 * compatibility
431 *
432 * People must migrate their code to xmlEncodeEntitiesReentrant !
433 * This routine will issue a warning when encountered.
434 *
435 * Returns A newly allocated string with the substitution done.
436 */
437const xmlChar *
438xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
439 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000440 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000441 static int warning = 1;
442 int html = 0;
443
444
445 if (warning) {
446 xmlGenericError(xmlGenericErrorContext,
447 "Deprecated API xmlEncodeEntities() used\n");
448 xmlGenericError(xmlGenericErrorContext,
449 " change code to use xmlEncodeEntitiesReentrant()\n");
450 warning = 0;
451 }
452
453 if (input == NULL) return(NULL);
454 if (doc != NULL)
455 html = (doc->type == XML_HTML_DOCUMENT_NODE);
456
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000457 if (static_buffer == NULL) {
458 static_buffer_size = 1000;
459 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
460 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000461 perror("malloc failed");
462 return(NULL);
463 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000464 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000465 }
466 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000467 if (out - static_buffer > static_buffer_size - 100) {
468 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000469
470 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000471 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000472 }
473
474 /*
475 * By default one have to encode at least '<', '>', '"' and '&' !
476 */
477 if (*cur == '<') {
478 *out++ = '&';
479 *out++ = 'l';
480 *out++ = 't';
481 *out++ = ';';
482 } else if (*cur == '>') {
483 *out++ = '&';
484 *out++ = 'g';
485 *out++ = 't';
486 *out++ = ';';
487 } else if (*cur == '&') {
488 *out++ = '&';
489 *out++ = 'a';
490 *out++ = 'm';
491 *out++ = 'p';
492 *out++ = ';';
493 } else if (*cur == '"') {
494 *out++ = '&';
495 *out++ = 'q';
496 *out++ = 'u';
497 *out++ = 'o';
498 *out++ = 't';
499 *out++ = ';';
500 } else if ((*cur == '\'') && (!html)) {
501 *out++ = '&';
502 *out++ = 'a';
503 *out++ = 'p';
504 *out++ = 'o';
505 *out++ = 's';
506 *out++ = ';';
507 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
508 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
509 /*
510 * default case, just copy !
511 */
512 *out++ = *cur;
513#ifndef USE_UTF_8
514 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
515 char buf[10], *ptr;
516
Owen Taylor3473f882001-02-23 17:55:21 +0000517 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000518 buf[sizeof(buf) - 1] = 0;
519 ptr = buf;
520 while (*ptr != 0) *out++ = *ptr++;
521#endif
522 } else if (IS_CHAR(*cur)) {
523 char buf[10], *ptr;
524
Owen Taylor3473f882001-02-23 17:55:21 +0000525 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000526 buf[sizeof(buf) - 1] = 0;
527 ptr = buf;
528 while (*ptr != 0) *out++ = *ptr++;
529 }
530#if 0
531 else {
532 /*
533 * default case, this is not a valid char !
534 * Skip it...
535 */
536 xmlGenericError(xmlGenericErrorContext,
537 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
538 }
539#endif
540 cur++;
541 }
542 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000543 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000544}
545
546/*
547 * Macro used to grow the current buffer.
548 */
549#define growBufferReentrant() { \
550 buffer_size *= 2; \
551 buffer = (xmlChar *) \
552 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
553 if (buffer == NULL) { \
554 perror("realloc failed"); \
555 return(NULL); \
556 } \
557}
558
559
560/**
561 * xmlEncodeEntitiesReentrant:
562 * @doc: the document containing the string
563 * @input: A string to convert to XML.
564 *
565 * Do a global encoding of a string, replacing the predefined entities
566 * and non ASCII values with their entities and CharRef counterparts.
567 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
568 * must be deallocated.
569 *
570 * Returns A newly allocated string with the substitution done.
571 */
572xmlChar *
573xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
574 const xmlChar *cur = input;
575 xmlChar *buffer = NULL;
576 xmlChar *out = NULL;
577 int buffer_size = 0;
578 int html = 0;
579
580 if (input == NULL) return(NULL);
581 if (doc != NULL)
582 html = (doc->type == XML_HTML_DOCUMENT_NODE);
583
584 /*
585 * allocate an translation buffer.
586 */
587 buffer_size = 1000;
588 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
589 if (buffer == NULL) {
590 perror("malloc failed");
591 return(NULL);
592 }
593 out = buffer;
594
595 while (*cur != '\0') {
596 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000597 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000598
599 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000600 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000601 }
602
603 /*
604 * By default one have to encode at least '<', '>', '"' and '&' !
605 */
606 if (*cur == '<') {
607 *out++ = '&';
608 *out++ = 'l';
609 *out++ = 't';
610 *out++ = ';';
611 } else if (*cur == '>') {
612 *out++ = '&';
613 *out++ = 'g';
614 *out++ = 't';
615 *out++ = ';';
616 } else if (*cur == '&') {
617 *out++ = '&';
618 *out++ = 'a';
619 *out++ = 'm';
620 *out++ = 'p';
621 *out++ = ';';
622 } else if (*cur == '"') {
623 *out++ = '&';
624 *out++ = 'q';
625 *out++ = 'u';
626 *out++ = 'o';
627 *out++ = 't';
628 *out++ = ';';
629#if 0
630 } else if ((*cur == '\'') && (!html)) {
631 *out++ = '&';
632 *out++ = 'a';
633 *out++ = 'p';
634 *out++ = 'o';
635 *out++ = 's';
636 *out++ = ';';
637#endif
638 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
639 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
640 /*
641 * default case, just copy !
642 */
643 *out++ = *cur;
644 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000645 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000646 /*
647 * Bjørn Reese <br@sseusa.com> provided the patch
648 xmlChar xc;
649 xc = (*cur & 0x3F) << 6;
650 if (cur[1] != 0) {
651 xc += *(++cur) & 0x3F;
652 *out++ = xc;
653 } else
654 */
655 *out++ = *cur;
656 } else {
657 /*
658 * We assume we have UTF-8 input.
659 */
660 char buf[10], *ptr;
661 int val = 0, l = 1;
662
663 if (*cur < 0xC0) {
664 xmlGenericError(xmlGenericErrorContext,
665 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000666 if (doc != NULL)
667 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000668 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000669 buf[sizeof(buf) - 1] = 0;
670 ptr = buf;
671 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000672 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000673 continue;
674 } else if (*cur < 0xE0) {
675 val = (cur[0]) & 0x1F;
676 val <<= 6;
677 val |= (cur[1]) & 0x3F;
678 l = 2;
679 } else if (*cur < 0xF0) {
680 val = (cur[0]) & 0x0F;
681 val <<= 6;
682 val |= (cur[1]) & 0x3F;
683 val <<= 6;
684 val |= (cur[2]) & 0x3F;
685 l = 3;
686 } else if (*cur < 0xF8) {
687 val = (cur[0]) & 0x07;
688 val <<= 6;
689 val |= (cur[1]) & 0x3F;
690 val <<= 6;
691 val |= (cur[2]) & 0x3F;
692 val <<= 6;
693 val |= (cur[3]) & 0x3F;
694 l = 4;
695 }
696 if ((l == 1) || (!IS_CHAR(val))) {
697 xmlGenericError(xmlGenericErrorContext,
698 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000699 if (doc != NULL)
700 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000701 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000702 buf[sizeof(buf) - 1] = 0;
703 ptr = buf;
704 while (*ptr != 0) *out++ = *ptr++;
705 cur++;
706 continue;
707 }
708 /*
709 * We could do multiple things here. Just save as a char ref
710 */
Daniel Veillard16698282001-09-14 10:29:27 +0000711 if (html)
712 snprintf(buf, sizeof(buf), "&#%d;", val);
713 else
714 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000715 buf[sizeof(buf) - 1] = 0;
716 ptr = buf;
717 while (*ptr != 0) *out++ = *ptr++;
718 cur += l;
719 continue;
720 }
721 } else if (IS_CHAR(*cur)) {
722 char buf[10], *ptr;
723
Owen Taylor3473f882001-02-23 17:55:21 +0000724 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000725 buf[sizeof(buf) - 1] = 0;
726 ptr = buf;
727 while (*ptr != 0) *out++ = *ptr++;
728 }
729#if 0
730 else {
731 /*
732 * default case, this is not a valid char !
733 * Skip it...
734 */
735 xmlGenericError(xmlGenericErrorContext,
736 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
737 }
738#endif
739 cur++;
740 }
741 *out++ = 0;
742 return(buffer);
743}
744
745/**
746 * xmlEncodeSpecialChars:
747 * @doc: the document containing the string
748 * @input: A string to convert to XML.
749 *
750 * Do a global encoding of a string, replacing the predefined entities
751 * this routine is reentrant, and result must be deallocated.
752 *
753 * Returns A newly allocated string with the substitution done.
754 */
755xmlChar *
756xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
757 const xmlChar *cur = input;
758 xmlChar *buffer = NULL;
759 xmlChar *out = NULL;
760 int buffer_size = 0;
761 int html = 0;
762
763 if (input == NULL) return(NULL);
764 if (doc != NULL)
765 html = (doc->type == XML_HTML_DOCUMENT_NODE);
766
767 /*
768 * allocate an translation buffer.
769 */
770 buffer_size = 1000;
771 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
772 if (buffer == NULL) {
773 perror("malloc failed");
774 return(NULL);
775 }
776 out = buffer;
777
778 while (*cur != '\0') {
779 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000780 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000781
782 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000783 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000784 }
785
786 /*
787 * By default one have to encode at least '<', '>', '"' and '&' !
788 */
789 if (*cur == '<') {
790 *out++ = '&';
791 *out++ = 'l';
792 *out++ = 't';
793 *out++ = ';';
794 } else if (*cur == '>') {
795 *out++ = '&';
796 *out++ = 'g';
797 *out++ = 't';
798 *out++ = ';';
799 } else if (*cur == '&') {
800 *out++ = '&';
801 *out++ = 'a';
802 *out++ = 'm';
803 *out++ = 'p';
804 *out++ = ';';
805 } else if (*cur == '"') {
806 *out++ = '&';
807 *out++ = 'q';
808 *out++ = 'u';
809 *out++ = 'o';
810 *out++ = 't';
811 *out++ = ';';
812 } else {
813 /*
814 * Works because on UTF-8, all extended sequences cannot
815 * result in bytes in the ASCII range.
816 */
817 *out++ = *cur;
818 }
819 cur++;
820 }
821 *out++ = 0;
822 return(buffer);
823}
824
825/**
826 * xmlCreateEntitiesTable:
827 *
828 * create and initialize an empty entities hash table.
829 *
830 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
831 */
832xmlEntitiesTablePtr
833xmlCreateEntitiesTable(void) {
834 return((xmlEntitiesTablePtr) xmlHashCreate(0));
835}
836
837/**
838 * xmlFreeEntitiesTable:
839 * @table: An entity table
840 *
841 * Deallocate the memory used by an entities hash table.
842 */
843void
844xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
845 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
846}
847
848/**
849 * xmlCopyEntity:
850 * @ent: An entity
851 *
852 * Build a copy of an entity
853 *
854 * Returns the new xmlEntitiesPtr or NULL in case of error.
855 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000856static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000857xmlCopyEntity(xmlEntityPtr ent) {
858 xmlEntityPtr cur;
859
860 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
861 if (cur == NULL) {
862 xmlGenericError(xmlGenericErrorContext,
863 "xmlCopyEntity: out of memory !\n");
864 return(NULL);
865 }
866 memset(cur, 0, sizeof(xmlEntity));
867 cur->type = XML_ELEMENT_DECL;
868
869 cur->etype = ent->etype;
870 if (ent->name != NULL)
871 cur->name = xmlStrdup(ent->name);
872 if (ent->ExternalID != NULL)
873 cur->ExternalID = xmlStrdup(ent->ExternalID);
874 if (ent->SystemID != NULL)
875 cur->SystemID = xmlStrdup(ent->SystemID);
876 if (ent->content != NULL)
877 cur->content = xmlStrdup(ent->content);
878 if (ent->orig != NULL)
879 cur->orig = xmlStrdup(ent->orig);
880 return(cur);
881}
882
883/**
884 * xmlCopyEntitiesTable:
885 * @table: An entity table
886 *
887 * Build a copy of an entity table.
888 *
889 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
890 */
891xmlEntitiesTablePtr
892xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
893 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
894}
895
896/**
897 * xmlDumpEntityDecl:
898 * @buf: An XML buffer.
899 * @ent: An entity table
900 *
901 * This will dump the content of the entity table as an XML DTD definition
902 */
903void
904xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
905 switch (ent->etype) {
906 case XML_INTERNAL_GENERAL_ENTITY:
907 xmlBufferWriteChar(buf, "<!ENTITY ");
908 xmlBufferWriteCHAR(buf, ent->name);
909 xmlBufferWriteChar(buf, " ");
910 if (ent->orig != NULL)
911 xmlBufferWriteQuotedString(buf, ent->orig);
912 else
913 xmlBufferWriteQuotedString(buf, ent->content);
914 xmlBufferWriteChar(buf, ">\n");
915 break;
916 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
917 xmlBufferWriteChar(buf, "<!ENTITY ");
918 xmlBufferWriteCHAR(buf, ent->name);
919 if (ent->ExternalID != NULL) {
920 xmlBufferWriteChar(buf, " PUBLIC ");
921 xmlBufferWriteQuotedString(buf, ent->ExternalID);
922 xmlBufferWriteChar(buf, " ");
923 xmlBufferWriteQuotedString(buf, ent->SystemID);
924 } else {
925 xmlBufferWriteChar(buf, " SYSTEM ");
926 xmlBufferWriteQuotedString(buf, ent->SystemID);
927 }
928 xmlBufferWriteChar(buf, ">\n");
929 break;
930 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
931 xmlBufferWriteChar(buf, "<!ENTITY ");
932 xmlBufferWriteCHAR(buf, ent->name);
933 if (ent->ExternalID != NULL) {
934 xmlBufferWriteChar(buf, " PUBLIC ");
935 xmlBufferWriteQuotedString(buf, ent->ExternalID);
936 xmlBufferWriteChar(buf, " ");
937 xmlBufferWriteQuotedString(buf, ent->SystemID);
938 } else {
939 xmlBufferWriteChar(buf, " SYSTEM ");
940 xmlBufferWriteQuotedString(buf, ent->SystemID);
941 }
942 if (ent->content != NULL) { /* Should be true ! */
943 xmlBufferWriteChar(buf, " NDATA ");
944 if (ent->orig != NULL)
945 xmlBufferWriteCHAR(buf, ent->orig);
946 else
947 xmlBufferWriteCHAR(buf, ent->content);
948 }
949 xmlBufferWriteChar(buf, ">\n");
950 break;
951 case XML_INTERNAL_PARAMETER_ENTITY:
952 xmlBufferWriteChar(buf, "<!ENTITY % ");
953 xmlBufferWriteCHAR(buf, ent->name);
954 xmlBufferWriteChar(buf, " ");
955 if (ent->orig == NULL)
956 xmlBufferWriteQuotedString(buf, ent->content);
957 else
958 xmlBufferWriteQuotedString(buf, ent->orig);
959 xmlBufferWriteChar(buf, ">\n");
960 break;
961 case XML_EXTERNAL_PARAMETER_ENTITY:
962 xmlBufferWriteChar(buf, "<!ENTITY % ");
963 xmlBufferWriteCHAR(buf, ent->name);
964 if (ent->ExternalID != NULL) {
965 xmlBufferWriteChar(buf, " PUBLIC ");
966 xmlBufferWriteQuotedString(buf, ent->ExternalID);
967 xmlBufferWriteChar(buf, " ");
968 xmlBufferWriteQuotedString(buf, ent->SystemID);
969 } else {
970 xmlBufferWriteChar(buf, " SYSTEM ");
971 xmlBufferWriteQuotedString(buf, ent->SystemID);
972 }
973 xmlBufferWriteChar(buf, ">\n");
974 break;
975 default:
976 xmlGenericError(xmlGenericErrorContext,
977 "xmlDumpEntitiesTable: internal: unknown type %d\n",
978 ent->etype);
979 }
980}
981
982/**
983 * xmlDumpEntitiesTable:
984 * @buf: An XML buffer.
985 * @table: An entity table
986 *
987 * This will dump the content of the entity table as an XML DTD definition
988 */
989void
990xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
991 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
992}