blob: 05c525c1c21772dd74ecb43fb7f001e1bcd04ddd [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * entities.c : implementation for the XML entities handking
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000010
Owen Taylor3473f882001-02-23 17:55:21 +000011#include <string.h>
12#ifdef HAVE_STDLIB_H
13#include <stdlib.h>
14#endif
15#include <libxml/xmlmemory.h>
16#include <libxml/hash.h>
17#include <libxml/entities.h>
18#include <libxml/parser.h>
19#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000020#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000021
Owen Taylor3473f882001-02-23 17:55:21 +000022/*
23 * The XML predefined entities.
24 */
25
26struct xmlPredefinedEntityValue {
27 const char *name;
28 const char *value;
29};
Daniel Veillard22090732001-07-16 00:06:07 +000030static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000031 { "lt", "<" },
32 { "gt", ">" },
33 { "apos", "'" },
34 { "quot", "\"" },
35 { "amp", "&" }
36};
37
38/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000039 * TODO: This is GROSS, allocation of a 256 entry hash for
40 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000041 */
Daniel Veillardb44025c2001-10-11 22:55:55 +000042static xmlHashTablePtr xmlPredefinedEntities = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000043
44/*
45 * xmlFreeEntity : clean-up an entity record.
46 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000047static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000048 if (entity == NULL) return;
49
Daniel Veillard22090732001-07-16 00:06:07 +000050 if ((entity->children) &&
51 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000052 xmlFreeNodeList(entity->children);
53 if (entity->name != NULL)
54 xmlFree((char *) entity->name);
55 if (entity->ExternalID != NULL)
56 xmlFree((char *) entity->ExternalID);
57 if (entity->SystemID != NULL)
58 xmlFree((char *) entity->SystemID);
59 if (entity->URI != NULL)
60 xmlFree((char *) entity->URI);
61 if (entity->content != NULL)
62 xmlFree((char *) entity->content);
63 if (entity->orig != NULL)
64 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000065 xmlFree(entity);
66}
67
68/*
69 * xmlAddEntity : register a new entity for an entities table.
70 */
71static xmlEntityPtr
72xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
73 const xmlChar *ExternalID, const xmlChar *SystemID,
74 const xmlChar *content) {
75 xmlEntitiesTablePtr table = NULL;
76 xmlEntityPtr ret;
77
78 if (name == NULL)
79 return(NULL);
80 switch (type) {
81 case XML_INTERNAL_GENERAL_ENTITY:
82 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
83 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
84 if (dtd->entities == NULL)
85 dtd->entities = xmlHashCreate(0);
86 table = dtd->entities;
87 break;
88 case XML_INTERNAL_PARAMETER_ENTITY:
89 case XML_EXTERNAL_PARAMETER_ENTITY:
90 if (dtd->pentities == NULL)
91 dtd->pentities = xmlHashCreate(0);
92 table = dtd->pentities;
93 break;
94 case XML_INTERNAL_PREDEFINED_ENTITY:
95 if (xmlPredefinedEntities == NULL)
96 xmlPredefinedEntities = xmlHashCreate(8);
97 table = xmlPredefinedEntities;
98 }
99 if (table == NULL)
100 return(NULL);
101 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
102 if (ret == NULL) {
103 xmlGenericError(xmlGenericErrorContext,
104 "xmlAddEntity: out of memory\n");
105 return(NULL);
106 }
107 memset(ret, 0, sizeof(xmlEntity));
108 ret->type = XML_ENTITY_DECL;
109
110 /*
111 * fill the structure.
112 */
113 ret->name = xmlStrdup(name);
114 ret->etype = (xmlEntityType) type;
115 if (ExternalID != NULL)
116 ret->ExternalID = xmlStrdup(ExternalID);
117 if (SystemID != NULL)
118 ret->SystemID = xmlStrdup(SystemID);
119 if (content != NULL) {
120 ret->length = xmlStrlen(content);
121 ret->content = xmlStrndup(content, ret->length);
122 } else {
123 ret->length = 0;
124 ret->content = NULL;
125 }
126 ret->URI = NULL; /* to be computed by the layer knowing
127 the defining entity */
128 ret->orig = NULL;
129
130 if (xmlHashAddEntry(table, name, ret)) {
131 /*
132 * entity was already defined at another level.
133 */
134 xmlFreeEntity(ret);
135 return(NULL);
136 }
137 return(ret);
138}
139
140/**
141 * xmlInitializePredefinedEntities:
142 *
143 * Set up the predefined entities.
144 */
145void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000146 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000147 xmlChar name[50];
148 xmlChar value[50];
149 const char *in;
150 xmlChar *out;
151
152 if (xmlPredefinedEntities != NULL) return;
153
154 xmlPredefinedEntities = xmlCreateEntitiesTable();
155 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
156 sizeof(xmlPredefinedEntityValues[0]);i++) {
157 in = xmlPredefinedEntityValues[i].name;
158 out = &name[0];
159 for (;(*out++ = (xmlChar) *in);)in++;
160 in = xmlPredefinedEntityValues[i].value;
161 out = &value[0];
162 for (;(*out++ = (xmlChar) *in);)in++;
163
164 xmlAddEntity(NULL, (const xmlChar *) &name[0],
165 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
166 &value[0]);
167 }
168}
169
170/**
171 * xmlCleanupPredefinedEntities:
172 *
173 * Cleanup up the predefined entities table.
174 */
175void xmlCleanupPredefinedEntities(void) {
176 if (xmlPredefinedEntities == NULL) return;
177
178 xmlFreeEntitiesTable(xmlPredefinedEntities);
179 xmlPredefinedEntities = NULL;
180}
181
182/**
183 * xmlGetPredefinedEntity:
184 * @name: the entity name
185 *
186 * Check whether this name is an predefined entity.
187 *
188 * Returns NULL if not, othervise the entity
189 */
190xmlEntityPtr
191xmlGetPredefinedEntity(const xmlChar *name) {
192 if (xmlPredefinedEntities == NULL)
193 xmlInitializePredefinedEntities();
194 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
195}
196
197/**
198 * xmlAddDtdEntity:
199 * @doc: the document
200 * @name: the entity name
201 * @type: the entity type XML_xxx_yyy_ENTITY
202 * @ExternalID: the entity external ID if available
203 * @SystemID: the entity system ID if available
204 * @content: the entity content
205 *
206 * Register a new entity for this document DTD external subset.
207 *
208 * Returns a pointer to the entity or NULL in case of error
209 */
210xmlEntityPtr
211xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
212 const xmlChar *ExternalID, const xmlChar *SystemID,
213 const xmlChar *content) {
214 xmlEntityPtr ret;
215 xmlDtdPtr dtd;
216
217 if (doc == NULL) {
218 xmlGenericError(xmlGenericErrorContext,
219 "xmlAddDtdEntity: doc == NULL !\n");
220 return(NULL);
221 }
222 if (doc->extSubset == NULL) {
223 xmlGenericError(xmlGenericErrorContext,
224 "xmlAddDtdEntity: document without external subset !\n");
225 return(NULL);
226 }
227 dtd = doc->extSubset;
228 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
229 if (ret == NULL) return(NULL);
230
231 /*
232 * Link it to the Dtd
233 */
234 ret->parent = dtd;
235 ret->doc = dtd->doc;
236 if (dtd->last == NULL) {
237 dtd->children = dtd->last = (xmlNodePtr) ret;
238 } else {
239 dtd->last->next = (xmlNodePtr) ret;
240 ret->prev = dtd->last;
241 dtd->last = (xmlNodePtr) ret;
242 }
243 return(ret);
244}
245
246/**
247 * xmlAddDocEntity:
248 * @doc: the document
249 * @name: the entity name
250 * @type: the entity type XML_xxx_yyy_ENTITY
251 * @ExternalID: the entity external ID if available
252 * @SystemID: the entity system ID if available
253 * @content: the entity content
254 *
255 * Register a new entity for this document.
256 *
257 * Returns a pointer to the entity or NULL in case of error
258 */
259xmlEntityPtr
260xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
261 const xmlChar *ExternalID, const xmlChar *SystemID,
262 const xmlChar *content) {
263 xmlEntityPtr ret;
264 xmlDtdPtr dtd;
265
266 if (doc == NULL) {
267 xmlGenericError(xmlGenericErrorContext,
268 "xmlAddDocEntity: document is NULL !\n");
269 return(NULL);
270 }
271 if (doc->intSubset == NULL) {
272 xmlGenericError(xmlGenericErrorContext,
273 "xmlAddDtdEntity: document without internal subset !\n");
274 return(NULL);
275 }
276 dtd = doc->intSubset;
277 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
278 if (ret == NULL) return(NULL);
279
280 /*
281 * Link it to the Dtd
282 */
283 ret->parent = dtd;
284 ret->doc = dtd->doc;
285 if (dtd->last == NULL) {
286 dtd->children = dtd->last = (xmlNodePtr) ret;
287 } else {
288 dtd->last->next = (xmlNodePtr) ret;
289 ret->prev = dtd->last;
290 dtd->last = (xmlNodePtr) ret;
291 }
292 return(ret);
293}
294
295/**
296 * xmlGetEntityFromTable:
297 * @table: an entity table
298 * @name: the entity name
299 * @parameter: look for parameter entities
300 *
301 * Do an entity lookup in the table.
302 * returns the corresponding parameter entity, if found.
303 *
304 * Returns A pointer to the entity structure or NULL if not found.
305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000306static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000307xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
308 return((xmlEntityPtr) xmlHashLookup(table, name));
309}
310
311/**
312 * xmlGetParameterEntity:
313 * @doc: the document referencing the entity
314 * @name: the entity name
315 *
316 * Do an entity lookup in the internal and external subsets and
317 * returns the corresponding parameter entity, if found.
318 *
319 * Returns A pointer to the entity structure or NULL if not found.
320 */
321xmlEntityPtr
322xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
323 xmlEntitiesTablePtr table;
324 xmlEntityPtr ret;
325
326 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
327 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
328 ret = xmlGetEntityFromTable(table, name);
329 if (ret != NULL)
330 return(ret);
331 }
332 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
333 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
334 return(xmlGetEntityFromTable(table, name));
335 }
336 return(NULL);
337}
338
339/**
340 * xmlGetDtdEntity:
341 * @doc: the document referencing the entity
342 * @name: the entity name
343 *
344 * Do an entity lookup in the Dtd entity hash table and
345 * returns the corresponding entity, if found.
346 *
347 * Returns A pointer to the entity structure or NULL if not found.
348 */
349xmlEntityPtr
350xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
351 xmlEntitiesTablePtr table;
352
353 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
354 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
355 return(xmlGetEntityFromTable(table, name));
356 }
357 return(NULL);
358}
359
360/**
361 * xmlGetDocEntity:
362 * @doc: the document referencing the entity
363 * @name: the entity name
364 *
365 * Do an entity lookup in the document entity hash table and
366 * returns the corrsponding entity, otherwise a lookup is done
367 * in the predefined entities too.
368 *
369 * Returns A pointer to the entity structure or NULL if not found.
370 */
371xmlEntityPtr
372xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
373 xmlEntityPtr cur;
374 xmlEntitiesTablePtr table;
375
376 if (doc != NULL) {
377 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
378 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
379 cur = xmlGetEntityFromTable(table, name);
380 if (cur != NULL)
381 return(cur);
382 }
383 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
384 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
385 cur = xmlGetEntityFromTable(table, name);
386 if (cur != NULL)
387 return(cur);
388 }
389 }
390 if (xmlPredefinedEntities == NULL)
391 xmlInitializePredefinedEntities();
392 table = xmlPredefinedEntities;
393 return(xmlGetEntityFromTable(table, name));
394}
395
396/*
397 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
398 * | [#x10000-#x10FFFF]
399 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
400 */
401#define IS_CHAR(c) \
402 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
403 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
404
405/*
406 * A buffer used for converting entities to their equivalent and back.
407 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000408static int static_buffer_size = 0;
409static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000410
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000411static int growBuffer(void) {
412 static_buffer_size *= 2;
413 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
414 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000415 perror("realloc failed");
416 return(-1);
417 }
418 return(0);
419}
420
421
422/**
423 * xmlEncodeEntities:
424 * @doc: the document containing the string
425 * @input: A string to convert to XML.
426 *
427 * Do a global encoding of a string, replacing the predefined entities
428 * and non ASCII values with their entities and CharRef counterparts.
429 *
430 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
431 * compatibility
432 *
433 * People must migrate their code to xmlEncodeEntitiesReentrant !
434 * This routine will issue a warning when encountered.
435 *
436 * Returns A newly allocated string with the substitution done.
437 */
438const xmlChar *
439xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
440 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000441 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000442 static int warning = 1;
443 int html = 0;
444
445
446 if (warning) {
447 xmlGenericError(xmlGenericErrorContext,
448 "Deprecated API xmlEncodeEntities() used\n");
449 xmlGenericError(xmlGenericErrorContext,
450 " change code to use xmlEncodeEntitiesReentrant()\n");
451 warning = 0;
452 }
453
454 if (input == NULL) return(NULL);
455 if (doc != NULL)
456 html = (doc->type == XML_HTML_DOCUMENT_NODE);
457
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000458 if (static_buffer == NULL) {
459 static_buffer_size = 1000;
460 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
461 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000462 perror("malloc failed");
463 return(NULL);
464 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000465 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000466 }
467 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000468 if (out - static_buffer > static_buffer_size - 100) {
469 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000470
471 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000472 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000473 }
474
475 /*
476 * By default one have to encode at least '<', '>', '"' and '&' !
477 */
478 if (*cur == '<') {
479 *out++ = '&';
480 *out++ = 'l';
481 *out++ = 't';
482 *out++ = ';';
483 } else if (*cur == '>') {
484 *out++ = '&';
485 *out++ = 'g';
486 *out++ = 't';
487 *out++ = ';';
488 } else if (*cur == '&') {
489 *out++ = '&';
490 *out++ = 'a';
491 *out++ = 'm';
492 *out++ = 'p';
493 *out++ = ';';
494 } else if (*cur == '"') {
495 *out++ = '&';
496 *out++ = 'q';
497 *out++ = 'u';
498 *out++ = 'o';
499 *out++ = 't';
500 *out++ = ';';
501 } else if ((*cur == '\'') && (!html)) {
502 *out++ = '&';
503 *out++ = 'a';
504 *out++ = 'p';
505 *out++ = 'o';
506 *out++ = 's';
507 *out++ = ';';
508 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
509 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
510 /*
511 * default case, just copy !
512 */
513 *out++ = *cur;
514#ifndef USE_UTF_8
515 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
516 char buf[10], *ptr;
517
Owen Taylor3473f882001-02-23 17:55:21 +0000518 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000519 buf[sizeof(buf) - 1] = 0;
520 ptr = buf;
521 while (*ptr != 0) *out++ = *ptr++;
522#endif
523 } else if (IS_CHAR(*cur)) {
524 char buf[10], *ptr;
525
Owen Taylor3473f882001-02-23 17:55:21 +0000526 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000527 buf[sizeof(buf) - 1] = 0;
528 ptr = buf;
529 while (*ptr != 0) *out++ = *ptr++;
530 }
531#if 0
532 else {
533 /*
534 * default case, this is not a valid char !
535 * Skip it...
536 */
537 xmlGenericError(xmlGenericErrorContext,
538 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
539 }
540#endif
541 cur++;
542 }
543 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000544 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000545}
546
547/*
548 * Macro used to grow the current buffer.
549 */
550#define growBufferReentrant() { \
551 buffer_size *= 2; \
552 buffer = (xmlChar *) \
553 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
554 if (buffer == NULL) { \
555 perror("realloc failed"); \
556 return(NULL); \
557 } \
558}
559
560
561/**
562 * xmlEncodeEntitiesReentrant:
563 * @doc: the document containing the string
564 * @input: A string to convert to XML.
565 *
566 * Do a global encoding of a string, replacing the predefined entities
567 * and non ASCII values with their entities and CharRef counterparts.
568 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
569 * must be deallocated.
570 *
571 * Returns A newly allocated string with the substitution done.
572 */
573xmlChar *
574xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
575 const xmlChar *cur = input;
576 xmlChar *buffer = NULL;
577 xmlChar *out = NULL;
578 int buffer_size = 0;
579 int html = 0;
580
581 if (input == NULL) return(NULL);
582 if (doc != NULL)
583 html = (doc->type == XML_HTML_DOCUMENT_NODE);
584
585 /*
586 * allocate an translation buffer.
587 */
588 buffer_size = 1000;
589 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
590 if (buffer == NULL) {
591 perror("malloc failed");
592 return(NULL);
593 }
594 out = buffer;
595
596 while (*cur != '\0') {
597 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000598 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000599
600 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000601 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000602 }
603
604 /*
605 * By default one have to encode at least '<', '>', '"' and '&' !
606 */
607 if (*cur == '<') {
608 *out++ = '&';
609 *out++ = 'l';
610 *out++ = 't';
611 *out++ = ';';
612 } else if (*cur == '>') {
613 *out++ = '&';
614 *out++ = 'g';
615 *out++ = 't';
616 *out++ = ';';
617 } else if (*cur == '&') {
618 *out++ = '&';
619 *out++ = 'a';
620 *out++ = 'm';
621 *out++ = 'p';
622 *out++ = ';';
623 } else if (*cur == '"') {
624 *out++ = '&';
625 *out++ = 'q';
626 *out++ = 'u';
627 *out++ = 'o';
628 *out++ = 't';
629 *out++ = ';';
630#if 0
631 } else if ((*cur == '\'') && (!html)) {
632 *out++ = '&';
633 *out++ = 'a';
634 *out++ = 'p';
635 *out++ = 'o';
636 *out++ = 's';
637 *out++ = ';';
638#endif
639 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
640 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
641 /*
642 * default case, just copy !
643 */
644 *out++ = *cur;
645 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000646 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000647 /*
648 * Bjørn Reese <br@sseusa.com> provided the patch
649 xmlChar xc;
650 xc = (*cur & 0x3F) << 6;
651 if (cur[1] != 0) {
652 xc += *(++cur) & 0x3F;
653 *out++ = xc;
654 } else
655 */
656 *out++ = *cur;
657 } else {
658 /*
659 * We assume we have UTF-8 input.
660 */
661 char buf[10], *ptr;
662 int val = 0, l = 1;
663
664 if (*cur < 0xC0) {
665 xmlGenericError(xmlGenericErrorContext,
666 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000667 if (doc != NULL)
668 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000669 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000670 buf[sizeof(buf) - 1] = 0;
671 ptr = buf;
672 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000673 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000674 continue;
675 } else if (*cur < 0xE0) {
676 val = (cur[0]) & 0x1F;
677 val <<= 6;
678 val |= (cur[1]) & 0x3F;
679 l = 2;
680 } else if (*cur < 0xF0) {
681 val = (cur[0]) & 0x0F;
682 val <<= 6;
683 val |= (cur[1]) & 0x3F;
684 val <<= 6;
685 val |= (cur[2]) & 0x3F;
686 l = 3;
687 } else if (*cur < 0xF8) {
688 val = (cur[0]) & 0x07;
689 val <<= 6;
690 val |= (cur[1]) & 0x3F;
691 val <<= 6;
692 val |= (cur[2]) & 0x3F;
693 val <<= 6;
694 val |= (cur[3]) & 0x3F;
695 l = 4;
696 }
697 if ((l == 1) || (!IS_CHAR(val))) {
698 xmlGenericError(xmlGenericErrorContext,
699 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000700 if (doc != NULL)
701 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000702 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000703 buf[sizeof(buf) - 1] = 0;
704 ptr = buf;
705 while (*ptr != 0) *out++ = *ptr++;
706 cur++;
707 continue;
708 }
709 /*
710 * We could do multiple things here. Just save as a char ref
711 */
Daniel Veillard16698282001-09-14 10:29:27 +0000712 if (html)
713 snprintf(buf, sizeof(buf), "&#%d;", val);
714 else
715 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 buf[sizeof(buf) - 1] = 0;
717 ptr = buf;
718 while (*ptr != 0) *out++ = *ptr++;
719 cur += l;
720 continue;
721 }
722 } else if (IS_CHAR(*cur)) {
723 char buf[10], *ptr;
724
Owen Taylor3473f882001-02-23 17:55:21 +0000725 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000726 buf[sizeof(buf) - 1] = 0;
727 ptr = buf;
728 while (*ptr != 0) *out++ = *ptr++;
729 }
730#if 0
731 else {
732 /*
733 * default case, this is not a valid char !
734 * Skip it...
735 */
736 xmlGenericError(xmlGenericErrorContext,
737 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
738 }
739#endif
740 cur++;
741 }
742 *out++ = 0;
743 return(buffer);
744}
745
746/**
747 * xmlEncodeSpecialChars:
748 * @doc: the document containing the string
749 * @input: A string to convert to XML.
750 *
751 * Do a global encoding of a string, replacing the predefined entities
752 * this routine is reentrant, and result must be deallocated.
753 *
754 * Returns A newly allocated string with the substitution done.
755 */
756xmlChar *
757xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
758 const xmlChar *cur = input;
759 xmlChar *buffer = NULL;
760 xmlChar *out = NULL;
761 int buffer_size = 0;
762 int html = 0;
763
764 if (input == NULL) return(NULL);
765 if (doc != NULL)
766 html = (doc->type == XML_HTML_DOCUMENT_NODE);
767
768 /*
769 * allocate an translation buffer.
770 */
771 buffer_size = 1000;
772 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
773 if (buffer == NULL) {
774 perror("malloc failed");
775 return(NULL);
776 }
777 out = buffer;
778
779 while (*cur != '\0') {
780 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000781 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000782
783 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000784 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000785 }
786
787 /*
788 * By default one have to encode at least '<', '>', '"' and '&' !
789 */
790 if (*cur == '<') {
791 *out++ = '&';
792 *out++ = 'l';
793 *out++ = 't';
794 *out++ = ';';
795 } else if (*cur == '>') {
796 *out++ = '&';
797 *out++ = 'g';
798 *out++ = 't';
799 *out++ = ';';
800 } else if (*cur == '&') {
801 *out++ = '&';
802 *out++ = 'a';
803 *out++ = 'm';
804 *out++ = 'p';
805 *out++ = ';';
806 } else if (*cur == '"') {
807 *out++ = '&';
808 *out++ = 'q';
809 *out++ = 'u';
810 *out++ = 'o';
811 *out++ = 't';
812 *out++ = ';';
813 } else {
814 /*
815 * Works because on UTF-8, all extended sequences cannot
816 * result in bytes in the ASCII range.
817 */
818 *out++ = *cur;
819 }
820 cur++;
821 }
822 *out++ = 0;
823 return(buffer);
824}
825
826/**
827 * xmlCreateEntitiesTable:
828 *
829 * create and initialize an empty entities hash table.
830 *
831 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
832 */
833xmlEntitiesTablePtr
834xmlCreateEntitiesTable(void) {
835 return((xmlEntitiesTablePtr) xmlHashCreate(0));
836}
837
838/**
839 * xmlFreeEntitiesTable:
840 * @table: An entity table
841 *
842 * Deallocate the memory used by an entities hash table.
843 */
844void
845xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
846 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
847}
848
849/**
850 * xmlCopyEntity:
851 * @ent: An entity
852 *
853 * Build a copy of an entity
854 *
855 * Returns the new xmlEntitiesPtr or NULL in case of error.
856 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000857static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000858xmlCopyEntity(xmlEntityPtr ent) {
859 xmlEntityPtr cur;
860
861 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
862 if (cur == NULL) {
863 xmlGenericError(xmlGenericErrorContext,
864 "xmlCopyEntity: out of memory !\n");
865 return(NULL);
866 }
867 memset(cur, 0, sizeof(xmlEntity));
868 cur->type = XML_ELEMENT_DECL;
869
870 cur->etype = ent->etype;
871 if (ent->name != NULL)
872 cur->name = xmlStrdup(ent->name);
873 if (ent->ExternalID != NULL)
874 cur->ExternalID = xmlStrdup(ent->ExternalID);
875 if (ent->SystemID != NULL)
876 cur->SystemID = xmlStrdup(ent->SystemID);
877 if (ent->content != NULL)
878 cur->content = xmlStrdup(ent->content);
879 if (ent->orig != NULL)
880 cur->orig = xmlStrdup(ent->orig);
881 return(cur);
882}
883
884/**
885 * xmlCopyEntitiesTable:
886 * @table: An entity table
887 *
888 * Build a copy of an entity table.
889 *
890 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
891 */
892xmlEntitiesTablePtr
893xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
894 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
895}
896
897/**
898 * xmlDumpEntityDecl:
899 * @buf: An XML buffer.
900 * @ent: An entity table
901 *
902 * This will dump the content of the entity table as an XML DTD definition
903 */
904void
905xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
906 switch (ent->etype) {
907 case XML_INTERNAL_GENERAL_ENTITY:
908 xmlBufferWriteChar(buf, "<!ENTITY ");
909 xmlBufferWriteCHAR(buf, ent->name);
910 xmlBufferWriteChar(buf, " ");
911 if (ent->orig != NULL)
912 xmlBufferWriteQuotedString(buf, ent->orig);
913 else
914 xmlBufferWriteQuotedString(buf, ent->content);
915 xmlBufferWriteChar(buf, ">\n");
916 break;
917 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
918 xmlBufferWriteChar(buf, "<!ENTITY ");
919 xmlBufferWriteCHAR(buf, ent->name);
920 if (ent->ExternalID != NULL) {
921 xmlBufferWriteChar(buf, " PUBLIC ");
922 xmlBufferWriteQuotedString(buf, ent->ExternalID);
923 xmlBufferWriteChar(buf, " ");
924 xmlBufferWriteQuotedString(buf, ent->SystemID);
925 } else {
926 xmlBufferWriteChar(buf, " SYSTEM ");
927 xmlBufferWriteQuotedString(buf, ent->SystemID);
928 }
929 xmlBufferWriteChar(buf, ">\n");
930 break;
931 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
932 xmlBufferWriteChar(buf, "<!ENTITY ");
933 xmlBufferWriteCHAR(buf, ent->name);
934 if (ent->ExternalID != NULL) {
935 xmlBufferWriteChar(buf, " PUBLIC ");
936 xmlBufferWriteQuotedString(buf, ent->ExternalID);
937 xmlBufferWriteChar(buf, " ");
938 xmlBufferWriteQuotedString(buf, ent->SystemID);
939 } else {
940 xmlBufferWriteChar(buf, " SYSTEM ");
941 xmlBufferWriteQuotedString(buf, ent->SystemID);
942 }
943 if (ent->content != NULL) { /* Should be true ! */
944 xmlBufferWriteChar(buf, " NDATA ");
945 if (ent->orig != NULL)
946 xmlBufferWriteCHAR(buf, ent->orig);
947 else
948 xmlBufferWriteCHAR(buf, ent->content);
949 }
950 xmlBufferWriteChar(buf, ">\n");
951 break;
952 case XML_INTERNAL_PARAMETER_ENTITY:
953 xmlBufferWriteChar(buf, "<!ENTITY % ");
954 xmlBufferWriteCHAR(buf, ent->name);
955 xmlBufferWriteChar(buf, " ");
956 if (ent->orig == NULL)
957 xmlBufferWriteQuotedString(buf, ent->content);
958 else
959 xmlBufferWriteQuotedString(buf, ent->orig);
960 xmlBufferWriteChar(buf, ">\n");
961 break;
962 case XML_EXTERNAL_PARAMETER_ENTITY:
963 xmlBufferWriteChar(buf, "<!ENTITY % ");
964 xmlBufferWriteCHAR(buf, ent->name);
965 if (ent->ExternalID != NULL) {
966 xmlBufferWriteChar(buf, " PUBLIC ");
967 xmlBufferWriteQuotedString(buf, ent->ExternalID);
968 xmlBufferWriteChar(buf, " ");
969 xmlBufferWriteQuotedString(buf, ent->SystemID);
970 } else {
971 xmlBufferWriteChar(buf, " SYSTEM ");
972 xmlBufferWriteQuotedString(buf, ent->SystemID);
973 }
974 xmlBufferWriteChar(buf, ">\n");
975 break;
976 default:
977 xmlGenericError(xmlGenericErrorContext,
978 "xmlDumpEntitiesTable: internal: unknown type %d\n",
979 ent->etype);
980 }
981}
982
983/**
984 * xmlDumpEntitiesTable:
985 * @buf: An XML buffer.
986 * @table: An entity table
987 *
988 * This will dump the content of the entity table as an XML DTD definition
989 */
990void
991xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
992 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
993}