blob: 3e6b581d65d987b023ac8c9c2efd9894dd4b493e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * entities.c : implementation for the XML entities handking
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000010
Owen Taylor3473f882001-02-23 17:55:21 +000011#include <string.h>
12#ifdef HAVE_STDLIB_H
13#include <stdlib.h>
14#endif
15#include <libxml/xmlmemory.h>
16#include <libxml/hash.h>
17#include <libxml/entities.h>
18#include <libxml/parser.h>
19#include <libxml/xmlerror.h>
20
21#define DEBUG_ENT_REF /* debugging of cross entities dependancies */
Owen Taylor3473f882001-02-23 17:55:21 +000022
23/*
24 * The XML predefined entities.
25 */
26
27struct xmlPredefinedEntityValue {
28 const char *name;
29 const char *value;
30};
31struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
32 { "lt", "<" },
33 { "gt", ">" },
34 { "apos", "'" },
35 { "quot", "\"" },
36 { "amp", "&" }
37};
38
39/*
40 * TODO: !!!!!!! This is GROSS, allocation of a 256 entry hash for
41 * a fixed number of 4 elements !
42 */
43xmlHashTablePtr xmlPredefinedEntities = NULL;
44
45/*
46 * xmlFreeEntity : clean-up an entity record.
47 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000048static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000049 if (entity == NULL) return;
50
51 if (entity->children)
52 xmlFreeNodeList(entity->children);
53 if (entity->name != NULL)
54 xmlFree((char *) entity->name);
55 if (entity->ExternalID != NULL)
56 xmlFree((char *) entity->ExternalID);
57 if (entity->SystemID != NULL)
58 xmlFree((char *) entity->SystemID);
59 if (entity->URI != NULL)
60 xmlFree((char *) entity->URI);
61 if (entity->content != NULL)
62 xmlFree((char *) entity->content);
63 if (entity->orig != NULL)
64 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000065 xmlFree(entity);
66}
67
68/*
69 * xmlAddEntity : register a new entity for an entities table.
70 */
71static xmlEntityPtr
72xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
73 const xmlChar *ExternalID, const xmlChar *SystemID,
74 const xmlChar *content) {
75 xmlEntitiesTablePtr table = NULL;
76 xmlEntityPtr ret;
77
78 if (name == NULL)
79 return(NULL);
80 switch (type) {
81 case XML_INTERNAL_GENERAL_ENTITY:
82 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
83 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
84 if (dtd->entities == NULL)
85 dtd->entities = xmlHashCreate(0);
86 table = dtd->entities;
87 break;
88 case XML_INTERNAL_PARAMETER_ENTITY:
89 case XML_EXTERNAL_PARAMETER_ENTITY:
90 if (dtd->pentities == NULL)
91 dtd->pentities = xmlHashCreate(0);
92 table = dtd->pentities;
93 break;
94 case XML_INTERNAL_PREDEFINED_ENTITY:
95 if (xmlPredefinedEntities == NULL)
96 xmlPredefinedEntities = xmlHashCreate(8);
97 table = xmlPredefinedEntities;
98 }
99 if (table == NULL)
100 return(NULL);
101 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
102 if (ret == NULL) {
103 xmlGenericError(xmlGenericErrorContext,
104 "xmlAddEntity: out of memory\n");
105 return(NULL);
106 }
107 memset(ret, 0, sizeof(xmlEntity));
108 ret->type = XML_ENTITY_DECL;
109
110 /*
111 * fill the structure.
112 */
113 ret->name = xmlStrdup(name);
114 ret->etype = (xmlEntityType) type;
115 if (ExternalID != NULL)
116 ret->ExternalID = xmlStrdup(ExternalID);
117 if (SystemID != NULL)
118 ret->SystemID = xmlStrdup(SystemID);
119 if (content != NULL) {
120 ret->length = xmlStrlen(content);
121 ret->content = xmlStrndup(content, ret->length);
122 } else {
123 ret->length = 0;
124 ret->content = NULL;
125 }
126 ret->URI = NULL; /* to be computed by the layer knowing
127 the defining entity */
128 ret->orig = NULL;
129
130 if (xmlHashAddEntry(table, name, ret)) {
131 /*
132 * entity was already defined at another level.
133 */
134 xmlFreeEntity(ret);
135 return(NULL);
136 }
137 return(ret);
138}
139
140/**
141 * xmlInitializePredefinedEntities:
142 *
143 * Set up the predefined entities.
144 */
145void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000146 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000147 xmlChar name[50];
148 xmlChar value[50];
149 const char *in;
150 xmlChar *out;
151
152 if (xmlPredefinedEntities != NULL) return;
153
154 xmlPredefinedEntities = xmlCreateEntitiesTable();
155 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
156 sizeof(xmlPredefinedEntityValues[0]);i++) {
157 in = xmlPredefinedEntityValues[i].name;
158 out = &name[0];
159 for (;(*out++ = (xmlChar) *in);)in++;
160 in = xmlPredefinedEntityValues[i].value;
161 out = &value[0];
162 for (;(*out++ = (xmlChar) *in);)in++;
163
164 xmlAddEntity(NULL, (const xmlChar *) &name[0],
165 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
166 &value[0]);
167 }
168}
169
170/**
171 * xmlCleanupPredefinedEntities:
172 *
173 * Cleanup up the predefined entities table.
174 */
175void xmlCleanupPredefinedEntities(void) {
176 if (xmlPredefinedEntities == NULL) return;
177
178 xmlFreeEntitiesTable(xmlPredefinedEntities);
179 xmlPredefinedEntities = NULL;
180}
181
182/**
183 * xmlGetPredefinedEntity:
184 * @name: the entity name
185 *
186 * Check whether this name is an predefined entity.
187 *
188 * Returns NULL if not, othervise the entity
189 */
190xmlEntityPtr
191xmlGetPredefinedEntity(const xmlChar *name) {
192 if (xmlPredefinedEntities == NULL)
193 xmlInitializePredefinedEntities();
194 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
195}
196
197/**
198 * xmlAddDtdEntity:
199 * @doc: the document
200 * @name: the entity name
201 * @type: the entity type XML_xxx_yyy_ENTITY
202 * @ExternalID: the entity external ID if available
203 * @SystemID: the entity system ID if available
204 * @content: the entity content
205 *
206 * Register a new entity for this document DTD external subset.
207 *
208 * Returns a pointer to the entity or NULL in case of error
209 */
210xmlEntityPtr
211xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
212 const xmlChar *ExternalID, const xmlChar *SystemID,
213 const xmlChar *content) {
214 xmlEntityPtr ret;
215 xmlDtdPtr dtd;
216
217 if (doc == NULL) {
218 xmlGenericError(xmlGenericErrorContext,
219 "xmlAddDtdEntity: doc == NULL !\n");
220 return(NULL);
221 }
222 if (doc->extSubset == NULL) {
223 xmlGenericError(xmlGenericErrorContext,
224 "xmlAddDtdEntity: document without external subset !\n");
225 return(NULL);
226 }
227 dtd = doc->extSubset;
228 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
229 if (ret == NULL) return(NULL);
230
231 /*
232 * Link it to the Dtd
233 */
234 ret->parent = dtd;
235 ret->doc = dtd->doc;
236 if (dtd->last == NULL) {
237 dtd->children = dtd->last = (xmlNodePtr) ret;
238 } else {
239 dtd->last->next = (xmlNodePtr) ret;
240 ret->prev = dtd->last;
241 dtd->last = (xmlNodePtr) ret;
242 }
243 return(ret);
244}
245
246/**
247 * xmlAddDocEntity:
248 * @doc: the document
249 * @name: the entity name
250 * @type: the entity type XML_xxx_yyy_ENTITY
251 * @ExternalID: the entity external ID if available
252 * @SystemID: the entity system ID if available
253 * @content: the entity content
254 *
255 * Register a new entity for this document.
256 *
257 * Returns a pointer to the entity or NULL in case of error
258 */
259xmlEntityPtr
260xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
261 const xmlChar *ExternalID, const xmlChar *SystemID,
262 const xmlChar *content) {
263 xmlEntityPtr ret;
264 xmlDtdPtr dtd;
265
266 if (doc == NULL) {
267 xmlGenericError(xmlGenericErrorContext,
268 "xmlAddDocEntity: document is NULL !\n");
269 return(NULL);
270 }
271 if (doc->intSubset == NULL) {
272 xmlGenericError(xmlGenericErrorContext,
273 "xmlAddDtdEntity: document without internal subset !\n");
274 return(NULL);
275 }
276 dtd = doc->intSubset;
277 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
278 if (ret == NULL) return(NULL);
279
280 /*
281 * Link it to the Dtd
282 */
283 ret->parent = dtd;
284 ret->doc = dtd->doc;
285 if (dtd->last == NULL) {
286 dtd->children = dtd->last = (xmlNodePtr) ret;
287 } else {
288 dtd->last->next = (xmlNodePtr) ret;
289 ret->prev = dtd->last;
290 dtd->last = (xmlNodePtr) ret;
291 }
292 return(ret);
293}
294
295/**
296 * xmlGetEntityFromTable:
297 * @table: an entity table
298 * @name: the entity name
299 * @parameter: look for parameter entities
300 *
301 * Do an entity lookup in the table.
302 * returns the corresponding parameter entity, if found.
303 *
304 * Returns A pointer to the entity structure or NULL if not found.
305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000306static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000307xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
308 return((xmlEntityPtr) xmlHashLookup(table, name));
309}
310
311/**
312 * xmlGetParameterEntity:
313 * @doc: the document referencing the entity
314 * @name: the entity name
315 *
316 * Do an entity lookup in the internal and external subsets and
317 * returns the corresponding parameter entity, if found.
318 *
319 * Returns A pointer to the entity structure or NULL if not found.
320 */
321xmlEntityPtr
322xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
323 xmlEntitiesTablePtr table;
324 xmlEntityPtr ret;
325
326 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
327 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
328 ret = xmlGetEntityFromTable(table, name);
329 if (ret != NULL)
330 return(ret);
331 }
332 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
333 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
334 return(xmlGetEntityFromTable(table, name));
335 }
336 return(NULL);
337}
338
339/**
340 * xmlGetDtdEntity:
341 * @doc: the document referencing the entity
342 * @name: the entity name
343 *
344 * Do an entity lookup in the Dtd entity hash table and
345 * returns the corresponding entity, if found.
346 *
347 * Returns A pointer to the entity structure or NULL if not found.
348 */
349xmlEntityPtr
350xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
351 xmlEntitiesTablePtr table;
352
353 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
354 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
355 return(xmlGetEntityFromTable(table, name));
356 }
357 return(NULL);
358}
359
360/**
361 * xmlGetDocEntity:
362 * @doc: the document referencing the entity
363 * @name: the entity name
364 *
365 * Do an entity lookup in the document entity hash table and
366 * returns the corrsponding entity, otherwise a lookup is done
367 * in the predefined entities too.
368 *
369 * Returns A pointer to the entity structure or NULL if not found.
370 */
371xmlEntityPtr
372xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
373 xmlEntityPtr cur;
374 xmlEntitiesTablePtr table;
375
376 if (doc != NULL) {
377 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
378 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
379 cur = xmlGetEntityFromTable(table, name);
380 if (cur != NULL)
381 return(cur);
382 }
383 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
384 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
385 cur = xmlGetEntityFromTable(table, name);
386 if (cur != NULL)
387 return(cur);
388 }
389 }
390 if (xmlPredefinedEntities == NULL)
391 xmlInitializePredefinedEntities();
392 table = xmlPredefinedEntities;
393 return(xmlGetEntityFromTable(table, name));
394}
395
396/*
397 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
398 * | [#x10000-#x10FFFF]
399 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
400 */
401#define IS_CHAR(c) \
402 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
403 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
404
405/*
406 * A buffer used for converting entities to their equivalent and back.
407 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000408static int static_buffer_size = 0;
409static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000410
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000411static int growBuffer(void) {
412 static_buffer_size *= 2;
413 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
414 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000415 perror("realloc failed");
416 return(-1);
417 }
418 return(0);
419}
420
421
422/**
423 * xmlEncodeEntities:
424 * @doc: the document containing the string
425 * @input: A string to convert to XML.
426 *
427 * Do a global encoding of a string, replacing the predefined entities
428 * and non ASCII values with their entities and CharRef counterparts.
429 *
430 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
431 * compatibility
432 *
433 * People must migrate their code to xmlEncodeEntitiesReentrant !
434 * This routine will issue a warning when encountered.
435 *
436 * Returns A newly allocated string with the substitution done.
437 */
438const xmlChar *
439xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
440 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000441 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000442 static int warning = 1;
443 int html = 0;
444
445
446 if (warning) {
447 xmlGenericError(xmlGenericErrorContext,
448 "Deprecated API xmlEncodeEntities() used\n");
449 xmlGenericError(xmlGenericErrorContext,
450 " change code to use xmlEncodeEntitiesReentrant()\n");
451 warning = 0;
452 }
453
454 if (input == NULL) return(NULL);
455 if (doc != NULL)
456 html = (doc->type == XML_HTML_DOCUMENT_NODE);
457
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000458 if (static_buffer == NULL) {
459 static_buffer_size = 1000;
460 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
461 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000462 perror("malloc failed");
463 return(NULL);
464 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000465 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000466 }
467 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000468 if (out - static_buffer > static_buffer_size - 100) {
469 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000470
471 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000472 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000473 }
474
475 /*
476 * By default one have to encode at least '<', '>', '"' and '&' !
477 */
478 if (*cur == '<') {
479 *out++ = '&';
480 *out++ = 'l';
481 *out++ = 't';
482 *out++ = ';';
483 } else if (*cur == '>') {
484 *out++ = '&';
485 *out++ = 'g';
486 *out++ = 't';
487 *out++ = ';';
488 } else if (*cur == '&') {
489 *out++ = '&';
490 *out++ = 'a';
491 *out++ = 'm';
492 *out++ = 'p';
493 *out++ = ';';
494 } else if (*cur == '"') {
495 *out++ = '&';
496 *out++ = 'q';
497 *out++ = 'u';
498 *out++ = 'o';
499 *out++ = 't';
500 *out++ = ';';
501 } else if ((*cur == '\'') && (!html)) {
502 *out++ = '&';
503 *out++ = 'a';
504 *out++ = 'p';
505 *out++ = 'o';
506 *out++ = 's';
507 *out++ = ';';
508 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
509 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
510 /*
511 * default case, just copy !
512 */
513 *out++ = *cur;
514#ifndef USE_UTF_8
515 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
516 char buf[10], *ptr;
517
Owen Taylor3473f882001-02-23 17:55:21 +0000518 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000519 buf[sizeof(buf) - 1] = 0;
520 ptr = buf;
521 while (*ptr != 0) *out++ = *ptr++;
522#endif
523 } else if (IS_CHAR(*cur)) {
524 char buf[10], *ptr;
525
Owen Taylor3473f882001-02-23 17:55:21 +0000526 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000527 buf[sizeof(buf) - 1] = 0;
528 ptr = buf;
529 while (*ptr != 0) *out++ = *ptr++;
530 }
531#if 0
532 else {
533 /*
534 * default case, this is not a valid char !
535 * Skip it...
536 */
537 xmlGenericError(xmlGenericErrorContext,
538 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
539 }
540#endif
541 cur++;
542 }
543 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000544 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000545}
546
547/*
548 * Macro used to grow the current buffer.
549 */
550#define growBufferReentrant() { \
551 buffer_size *= 2; \
552 buffer = (xmlChar *) \
553 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
554 if (buffer == NULL) { \
555 perror("realloc failed"); \
556 return(NULL); \
557 } \
558}
559
560
561/**
562 * xmlEncodeEntitiesReentrant:
563 * @doc: the document containing the string
564 * @input: A string to convert to XML.
565 *
566 * Do a global encoding of a string, replacing the predefined entities
567 * and non ASCII values with their entities and CharRef counterparts.
568 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
569 * must be deallocated.
570 *
571 * Returns A newly allocated string with the substitution done.
572 */
573xmlChar *
574xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
575 const xmlChar *cur = input;
576 xmlChar *buffer = NULL;
577 xmlChar *out = NULL;
578 int buffer_size = 0;
579 int html = 0;
580
581 if (input == NULL) return(NULL);
582 if (doc != NULL)
583 html = (doc->type == XML_HTML_DOCUMENT_NODE);
584
585 /*
586 * allocate an translation buffer.
587 */
588 buffer_size = 1000;
589 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
590 if (buffer == NULL) {
591 perror("malloc failed");
592 return(NULL);
593 }
594 out = buffer;
595
596 while (*cur != '\0') {
597 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000598 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000599
600 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000601 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000602 }
603
604 /*
605 * By default one have to encode at least '<', '>', '"' and '&' !
606 */
607 if (*cur == '<') {
608 *out++ = '&';
609 *out++ = 'l';
610 *out++ = 't';
611 *out++ = ';';
612 } else if (*cur == '>') {
613 *out++ = '&';
614 *out++ = 'g';
615 *out++ = 't';
616 *out++ = ';';
617 } else if (*cur == '&') {
618 *out++ = '&';
619 *out++ = 'a';
620 *out++ = 'm';
621 *out++ = 'p';
622 *out++ = ';';
623 } else if (*cur == '"') {
624 *out++ = '&';
625 *out++ = 'q';
626 *out++ = 'u';
627 *out++ = 'o';
628 *out++ = 't';
629 *out++ = ';';
630#if 0
631 } else if ((*cur == '\'') && (!html)) {
632 *out++ = '&';
633 *out++ = 'a';
634 *out++ = 'p';
635 *out++ = 'o';
636 *out++ = 's';
637 *out++ = ';';
638#endif
639 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
640 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
641 /*
642 * default case, just copy !
643 */
644 *out++ = *cur;
645 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000646 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000647 /*
648 * Bjørn Reese <br@sseusa.com> provided the patch
649 xmlChar xc;
650 xc = (*cur & 0x3F) << 6;
651 if (cur[1] != 0) {
652 xc += *(++cur) & 0x3F;
653 *out++ = xc;
654 } else
655 */
656 *out++ = *cur;
657 } else {
658 /*
659 * We assume we have UTF-8 input.
660 */
661 char buf[10], *ptr;
662 int val = 0, l = 1;
663
664 if (*cur < 0xC0) {
665 xmlGenericError(xmlGenericErrorContext,
666 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000667 if (doc != NULL)
668 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000669 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000670 buf[sizeof(buf) - 1] = 0;
671 ptr = buf;
672 while (*ptr != 0) *out++ = *ptr++;
673 continue;
674 } else if (*cur < 0xE0) {
675 val = (cur[0]) & 0x1F;
676 val <<= 6;
677 val |= (cur[1]) & 0x3F;
678 l = 2;
679 } else if (*cur < 0xF0) {
680 val = (cur[0]) & 0x0F;
681 val <<= 6;
682 val |= (cur[1]) & 0x3F;
683 val <<= 6;
684 val |= (cur[2]) & 0x3F;
685 l = 3;
686 } else if (*cur < 0xF8) {
687 val = (cur[0]) & 0x07;
688 val <<= 6;
689 val |= (cur[1]) & 0x3F;
690 val <<= 6;
691 val |= (cur[2]) & 0x3F;
692 val <<= 6;
693 val |= (cur[3]) & 0x3F;
694 l = 4;
695 }
696 if ((l == 1) || (!IS_CHAR(val))) {
697 xmlGenericError(xmlGenericErrorContext,
698 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000699 if (doc != NULL)
700 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000701 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000702 buf[sizeof(buf) - 1] = 0;
703 ptr = buf;
704 while (*ptr != 0) *out++ = *ptr++;
705 cur++;
706 continue;
707 }
708 /*
709 * We could do multiple things here. Just save as a char ref
710 */
Owen Taylor3473f882001-02-23 17:55:21 +0000711 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000712 buf[sizeof(buf) - 1] = 0;
713 ptr = buf;
714 while (*ptr != 0) *out++ = *ptr++;
715 cur += l;
716 continue;
717 }
718 } else if (IS_CHAR(*cur)) {
719 char buf[10], *ptr;
720
Owen Taylor3473f882001-02-23 17:55:21 +0000721 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000722 buf[sizeof(buf) - 1] = 0;
723 ptr = buf;
724 while (*ptr != 0) *out++ = *ptr++;
725 }
726#if 0
727 else {
728 /*
729 * default case, this is not a valid char !
730 * Skip it...
731 */
732 xmlGenericError(xmlGenericErrorContext,
733 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
734 }
735#endif
736 cur++;
737 }
738 *out++ = 0;
739 return(buffer);
740}
741
742/**
743 * xmlEncodeSpecialChars:
744 * @doc: the document containing the string
745 * @input: A string to convert to XML.
746 *
747 * Do a global encoding of a string, replacing the predefined entities
748 * this routine is reentrant, and result must be deallocated.
749 *
750 * Returns A newly allocated string with the substitution done.
751 */
752xmlChar *
753xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
754 const xmlChar *cur = input;
755 xmlChar *buffer = NULL;
756 xmlChar *out = NULL;
757 int buffer_size = 0;
758 int html = 0;
759
760 if (input == NULL) return(NULL);
761 if (doc != NULL)
762 html = (doc->type == XML_HTML_DOCUMENT_NODE);
763
764 /*
765 * allocate an translation buffer.
766 */
767 buffer_size = 1000;
768 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
769 if (buffer == NULL) {
770 perror("malloc failed");
771 return(NULL);
772 }
773 out = buffer;
774
775 while (*cur != '\0') {
776 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000777 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000778
779 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000780 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000781 }
782
783 /*
784 * By default one have to encode at least '<', '>', '"' and '&' !
785 */
786 if (*cur == '<') {
787 *out++ = '&';
788 *out++ = 'l';
789 *out++ = 't';
790 *out++ = ';';
791 } else if (*cur == '>') {
792 *out++ = '&';
793 *out++ = 'g';
794 *out++ = 't';
795 *out++ = ';';
796 } else if (*cur == '&') {
797 *out++ = '&';
798 *out++ = 'a';
799 *out++ = 'm';
800 *out++ = 'p';
801 *out++ = ';';
802 } else if (*cur == '"') {
803 *out++ = '&';
804 *out++ = 'q';
805 *out++ = 'u';
806 *out++ = 'o';
807 *out++ = 't';
808 *out++ = ';';
809 } else {
810 /*
811 * Works because on UTF-8, all extended sequences cannot
812 * result in bytes in the ASCII range.
813 */
814 *out++ = *cur;
815 }
816 cur++;
817 }
818 *out++ = 0;
819 return(buffer);
820}
821
822/**
823 * xmlCreateEntitiesTable:
824 *
825 * create and initialize an empty entities hash table.
826 *
827 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
828 */
829xmlEntitiesTablePtr
830xmlCreateEntitiesTable(void) {
831 return((xmlEntitiesTablePtr) xmlHashCreate(0));
832}
833
834/**
835 * xmlFreeEntitiesTable:
836 * @table: An entity table
837 *
838 * Deallocate the memory used by an entities hash table.
839 */
840void
841xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
842 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
843}
844
845/**
846 * xmlCopyEntity:
847 * @ent: An entity
848 *
849 * Build a copy of an entity
850 *
851 * Returns the new xmlEntitiesPtr or NULL in case of error.
852 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000853static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000854xmlCopyEntity(xmlEntityPtr ent) {
855 xmlEntityPtr cur;
856
857 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
858 if (cur == NULL) {
859 xmlGenericError(xmlGenericErrorContext,
860 "xmlCopyEntity: out of memory !\n");
861 return(NULL);
862 }
863 memset(cur, 0, sizeof(xmlEntity));
864 cur->type = XML_ELEMENT_DECL;
865
866 cur->etype = ent->etype;
867 if (ent->name != NULL)
868 cur->name = xmlStrdup(ent->name);
869 if (ent->ExternalID != NULL)
870 cur->ExternalID = xmlStrdup(ent->ExternalID);
871 if (ent->SystemID != NULL)
872 cur->SystemID = xmlStrdup(ent->SystemID);
873 if (ent->content != NULL)
874 cur->content = xmlStrdup(ent->content);
875 if (ent->orig != NULL)
876 cur->orig = xmlStrdup(ent->orig);
877 return(cur);
878}
879
880/**
881 * xmlCopyEntitiesTable:
882 * @table: An entity table
883 *
884 * Build a copy of an entity table.
885 *
886 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
887 */
888xmlEntitiesTablePtr
889xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
890 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
891}
892
893/**
894 * xmlDumpEntityDecl:
895 * @buf: An XML buffer.
896 * @ent: An entity table
897 *
898 * This will dump the content of the entity table as an XML DTD definition
899 */
900void
901xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
902 switch (ent->etype) {
903 case XML_INTERNAL_GENERAL_ENTITY:
904 xmlBufferWriteChar(buf, "<!ENTITY ");
905 xmlBufferWriteCHAR(buf, ent->name);
906 xmlBufferWriteChar(buf, " ");
907 if (ent->orig != NULL)
908 xmlBufferWriteQuotedString(buf, ent->orig);
909 else
910 xmlBufferWriteQuotedString(buf, ent->content);
911 xmlBufferWriteChar(buf, ">\n");
912 break;
913 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
914 xmlBufferWriteChar(buf, "<!ENTITY ");
915 xmlBufferWriteCHAR(buf, ent->name);
916 if (ent->ExternalID != NULL) {
917 xmlBufferWriteChar(buf, " PUBLIC ");
918 xmlBufferWriteQuotedString(buf, ent->ExternalID);
919 xmlBufferWriteChar(buf, " ");
920 xmlBufferWriteQuotedString(buf, ent->SystemID);
921 } else {
922 xmlBufferWriteChar(buf, " SYSTEM ");
923 xmlBufferWriteQuotedString(buf, ent->SystemID);
924 }
925 xmlBufferWriteChar(buf, ">\n");
926 break;
927 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
928 xmlBufferWriteChar(buf, "<!ENTITY ");
929 xmlBufferWriteCHAR(buf, ent->name);
930 if (ent->ExternalID != NULL) {
931 xmlBufferWriteChar(buf, " PUBLIC ");
932 xmlBufferWriteQuotedString(buf, ent->ExternalID);
933 xmlBufferWriteChar(buf, " ");
934 xmlBufferWriteQuotedString(buf, ent->SystemID);
935 } else {
936 xmlBufferWriteChar(buf, " SYSTEM ");
937 xmlBufferWriteQuotedString(buf, ent->SystemID);
938 }
939 if (ent->content != NULL) { /* Should be true ! */
940 xmlBufferWriteChar(buf, " NDATA ");
941 if (ent->orig != NULL)
942 xmlBufferWriteCHAR(buf, ent->orig);
943 else
944 xmlBufferWriteCHAR(buf, ent->content);
945 }
946 xmlBufferWriteChar(buf, ">\n");
947 break;
948 case XML_INTERNAL_PARAMETER_ENTITY:
949 xmlBufferWriteChar(buf, "<!ENTITY % ");
950 xmlBufferWriteCHAR(buf, ent->name);
951 xmlBufferWriteChar(buf, " ");
952 if (ent->orig == NULL)
953 xmlBufferWriteQuotedString(buf, ent->content);
954 else
955 xmlBufferWriteQuotedString(buf, ent->orig);
956 xmlBufferWriteChar(buf, ">\n");
957 break;
958 case XML_EXTERNAL_PARAMETER_ENTITY:
959 xmlBufferWriteChar(buf, "<!ENTITY % ");
960 xmlBufferWriteCHAR(buf, ent->name);
961 if (ent->ExternalID != NULL) {
962 xmlBufferWriteChar(buf, " PUBLIC ");
963 xmlBufferWriteQuotedString(buf, ent->ExternalID);
964 xmlBufferWriteChar(buf, " ");
965 xmlBufferWriteQuotedString(buf, ent->SystemID);
966 } else {
967 xmlBufferWriteChar(buf, " SYSTEM ");
968 xmlBufferWriteQuotedString(buf, ent->SystemID);
969 }
970 xmlBufferWriteChar(buf, ">\n");
971 break;
972 default:
973 xmlGenericError(xmlGenericErrorContext,
974 "xmlDumpEntitiesTable: internal: unknown type %d\n",
975 ent->etype);
976 }
977}
978
979/**
980 * xmlDumpEntitiesTable:
981 * @buf: An XML buffer.
982 * @table: An entity table
983 *
984 * This will dump the content of the entity table as an XML DTD definition
985 */
986void
987xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
988 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
989}