blob: 94f1f6ffaf35040367c4453f22a1c919c8844559 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * entities.c : implementation for the XML entities handking
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000010
Owen Taylor3473f882001-02-23 17:55:21 +000011#include <string.h>
12#ifdef HAVE_STDLIB_H
13#include <stdlib.h>
14#endif
15#include <libxml/xmlmemory.h>
16#include <libxml/hash.h>
17#include <libxml/entities.h>
18#include <libxml/parser.h>
19#include <libxml/xmlerror.h>
20
Owen Taylor3473f882001-02-23 17:55:21 +000021/*
22 * The XML predefined entities.
23 */
24
25struct xmlPredefinedEntityValue {
26 const char *name;
27 const char *value;
28};
29struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
30 { "lt", "<" },
31 { "gt", ">" },
32 { "apos", "'" },
33 { "quot", "\"" },
34 { "amp", "&" }
35};
36
37/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000038 * TODO: This is GROSS, allocation of a 256 entry hash for
39 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000040 */
41xmlHashTablePtr xmlPredefinedEntities = NULL;
42
43/*
44 * xmlFreeEntity : clean-up an entity record.
45 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000046static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000047 if (entity == NULL) return;
48
49 if (entity->children)
50 xmlFreeNodeList(entity->children);
51 if (entity->name != NULL)
52 xmlFree((char *) entity->name);
53 if (entity->ExternalID != NULL)
54 xmlFree((char *) entity->ExternalID);
55 if (entity->SystemID != NULL)
56 xmlFree((char *) entity->SystemID);
57 if (entity->URI != NULL)
58 xmlFree((char *) entity->URI);
59 if (entity->content != NULL)
60 xmlFree((char *) entity->content);
61 if (entity->orig != NULL)
62 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000063 xmlFree(entity);
64}
65
66/*
67 * xmlAddEntity : register a new entity for an entities table.
68 */
69static xmlEntityPtr
70xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
71 const xmlChar *ExternalID, const xmlChar *SystemID,
72 const xmlChar *content) {
73 xmlEntitiesTablePtr table = NULL;
74 xmlEntityPtr ret;
75
76 if (name == NULL)
77 return(NULL);
78 switch (type) {
79 case XML_INTERNAL_GENERAL_ENTITY:
80 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
81 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
82 if (dtd->entities == NULL)
83 dtd->entities = xmlHashCreate(0);
84 table = dtd->entities;
85 break;
86 case XML_INTERNAL_PARAMETER_ENTITY:
87 case XML_EXTERNAL_PARAMETER_ENTITY:
88 if (dtd->pentities == NULL)
89 dtd->pentities = xmlHashCreate(0);
90 table = dtd->pentities;
91 break;
92 case XML_INTERNAL_PREDEFINED_ENTITY:
93 if (xmlPredefinedEntities == NULL)
94 xmlPredefinedEntities = xmlHashCreate(8);
95 table = xmlPredefinedEntities;
96 }
97 if (table == NULL)
98 return(NULL);
99 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
100 if (ret == NULL) {
101 xmlGenericError(xmlGenericErrorContext,
102 "xmlAddEntity: out of memory\n");
103 return(NULL);
104 }
105 memset(ret, 0, sizeof(xmlEntity));
106 ret->type = XML_ENTITY_DECL;
107
108 /*
109 * fill the structure.
110 */
111 ret->name = xmlStrdup(name);
112 ret->etype = (xmlEntityType) type;
113 if (ExternalID != NULL)
114 ret->ExternalID = xmlStrdup(ExternalID);
115 if (SystemID != NULL)
116 ret->SystemID = xmlStrdup(SystemID);
117 if (content != NULL) {
118 ret->length = xmlStrlen(content);
119 ret->content = xmlStrndup(content, ret->length);
120 } else {
121 ret->length = 0;
122 ret->content = NULL;
123 }
124 ret->URI = NULL; /* to be computed by the layer knowing
125 the defining entity */
126 ret->orig = NULL;
127
128 if (xmlHashAddEntry(table, name, ret)) {
129 /*
130 * entity was already defined at another level.
131 */
132 xmlFreeEntity(ret);
133 return(NULL);
134 }
135 return(ret);
136}
137
138/**
139 * xmlInitializePredefinedEntities:
140 *
141 * Set up the predefined entities.
142 */
143void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000144 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000145 xmlChar name[50];
146 xmlChar value[50];
147 const char *in;
148 xmlChar *out;
149
150 if (xmlPredefinedEntities != NULL) return;
151
152 xmlPredefinedEntities = xmlCreateEntitiesTable();
153 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
154 sizeof(xmlPredefinedEntityValues[0]);i++) {
155 in = xmlPredefinedEntityValues[i].name;
156 out = &name[0];
157 for (;(*out++ = (xmlChar) *in);)in++;
158 in = xmlPredefinedEntityValues[i].value;
159 out = &value[0];
160 for (;(*out++ = (xmlChar) *in);)in++;
161
162 xmlAddEntity(NULL, (const xmlChar *) &name[0],
163 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
164 &value[0]);
165 }
166}
167
168/**
169 * xmlCleanupPredefinedEntities:
170 *
171 * Cleanup up the predefined entities table.
172 */
173void xmlCleanupPredefinedEntities(void) {
174 if (xmlPredefinedEntities == NULL) return;
175
176 xmlFreeEntitiesTable(xmlPredefinedEntities);
177 xmlPredefinedEntities = NULL;
178}
179
180/**
181 * xmlGetPredefinedEntity:
182 * @name: the entity name
183 *
184 * Check whether this name is an predefined entity.
185 *
186 * Returns NULL if not, othervise the entity
187 */
188xmlEntityPtr
189xmlGetPredefinedEntity(const xmlChar *name) {
190 if (xmlPredefinedEntities == NULL)
191 xmlInitializePredefinedEntities();
192 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
193}
194
195/**
196 * xmlAddDtdEntity:
197 * @doc: the document
198 * @name: the entity name
199 * @type: the entity type XML_xxx_yyy_ENTITY
200 * @ExternalID: the entity external ID if available
201 * @SystemID: the entity system ID if available
202 * @content: the entity content
203 *
204 * Register a new entity for this document DTD external subset.
205 *
206 * Returns a pointer to the entity or NULL in case of error
207 */
208xmlEntityPtr
209xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
210 const xmlChar *ExternalID, const xmlChar *SystemID,
211 const xmlChar *content) {
212 xmlEntityPtr ret;
213 xmlDtdPtr dtd;
214
215 if (doc == NULL) {
216 xmlGenericError(xmlGenericErrorContext,
217 "xmlAddDtdEntity: doc == NULL !\n");
218 return(NULL);
219 }
220 if (doc->extSubset == NULL) {
221 xmlGenericError(xmlGenericErrorContext,
222 "xmlAddDtdEntity: document without external subset !\n");
223 return(NULL);
224 }
225 dtd = doc->extSubset;
226 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
227 if (ret == NULL) return(NULL);
228
229 /*
230 * Link it to the Dtd
231 */
232 ret->parent = dtd;
233 ret->doc = dtd->doc;
234 if (dtd->last == NULL) {
235 dtd->children = dtd->last = (xmlNodePtr) ret;
236 } else {
237 dtd->last->next = (xmlNodePtr) ret;
238 ret->prev = dtd->last;
239 dtd->last = (xmlNodePtr) ret;
240 }
241 return(ret);
242}
243
244/**
245 * xmlAddDocEntity:
246 * @doc: the document
247 * @name: the entity name
248 * @type: the entity type XML_xxx_yyy_ENTITY
249 * @ExternalID: the entity external ID if available
250 * @SystemID: the entity system ID if available
251 * @content: the entity content
252 *
253 * Register a new entity for this document.
254 *
255 * Returns a pointer to the entity or NULL in case of error
256 */
257xmlEntityPtr
258xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
259 const xmlChar *ExternalID, const xmlChar *SystemID,
260 const xmlChar *content) {
261 xmlEntityPtr ret;
262 xmlDtdPtr dtd;
263
264 if (doc == NULL) {
265 xmlGenericError(xmlGenericErrorContext,
266 "xmlAddDocEntity: document is NULL !\n");
267 return(NULL);
268 }
269 if (doc->intSubset == NULL) {
270 xmlGenericError(xmlGenericErrorContext,
271 "xmlAddDtdEntity: document without internal subset !\n");
272 return(NULL);
273 }
274 dtd = doc->intSubset;
275 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
276 if (ret == NULL) return(NULL);
277
278 /*
279 * Link it to the Dtd
280 */
281 ret->parent = dtd;
282 ret->doc = dtd->doc;
283 if (dtd->last == NULL) {
284 dtd->children = dtd->last = (xmlNodePtr) ret;
285 } else {
286 dtd->last->next = (xmlNodePtr) ret;
287 ret->prev = dtd->last;
288 dtd->last = (xmlNodePtr) ret;
289 }
290 return(ret);
291}
292
293/**
294 * xmlGetEntityFromTable:
295 * @table: an entity table
296 * @name: the entity name
297 * @parameter: look for parameter entities
298 *
299 * Do an entity lookup in the table.
300 * returns the corresponding parameter entity, if found.
301 *
302 * Returns A pointer to the entity structure or NULL if not found.
303 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000304static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000305xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
306 return((xmlEntityPtr) xmlHashLookup(table, name));
307}
308
309/**
310 * xmlGetParameterEntity:
311 * @doc: the document referencing the entity
312 * @name: the entity name
313 *
314 * Do an entity lookup in the internal and external subsets and
315 * returns the corresponding parameter entity, if found.
316 *
317 * Returns A pointer to the entity structure or NULL if not found.
318 */
319xmlEntityPtr
320xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
321 xmlEntitiesTablePtr table;
322 xmlEntityPtr ret;
323
324 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
325 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
326 ret = xmlGetEntityFromTable(table, name);
327 if (ret != NULL)
328 return(ret);
329 }
330 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
331 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
332 return(xmlGetEntityFromTable(table, name));
333 }
334 return(NULL);
335}
336
337/**
338 * xmlGetDtdEntity:
339 * @doc: the document referencing the entity
340 * @name: the entity name
341 *
342 * Do an entity lookup in the Dtd entity hash table and
343 * returns the corresponding entity, if found.
344 *
345 * Returns A pointer to the entity structure or NULL if not found.
346 */
347xmlEntityPtr
348xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
349 xmlEntitiesTablePtr table;
350
351 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
352 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
353 return(xmlGetEntityFromTable(table, name));
354 }
355 return(NULL);
356}
357
358/**
359 * xmlGetDocEntity:
360 * @doc: the document referencing the entity
361 * @name: the entity name
362 *
363 * Do an entity lookup in the document entity hash table and
364 * returns the corrsponding entity, otherwise a lookup is done
365 * in the predefined entities too.
366 *
367 * Returns A pointer to the entity structure or NULL if not found.
368 */
369xmlEntityPtr
370xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
371 xmlEntityPtr cur;
372 xmlEntitiesTablePtr table;
373
374 if (doc != NULL) {
375 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
376 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
377 cur = xmlGetEntityFromTable(table, name);
378 if (cur != NULL)
379 return(cur);
380 }
381 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
382 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
383 cur = xmlGetEntityFromTable(table, name);
384 if (cur != NULL)
385 return(cur);
386 }
387 }
388 if (xmlPredefinedEntities == NULL)
389 xmlInitializePredefinedEntities();
390 table = xmlPredefinedEntities;
391 return(xmlGetEntityFromTable(table, name));
392}
393
394/*
395 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
396 * | [#x10000-#x10FFFF]
397 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
398 */
399#define IS_CHAR(c) \
400 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
401 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
402
403/*
404 * A buffer used for converting entities to their equivalent and back.
405 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000406static int static_buffer_size = 0;
407static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000408
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000409static int growBuffer(void) {
410 static_buffer_size *= 2;
411 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
412 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000413 perror("realloc failed");
414 return(-1);
415 }
416 return(0);
417}
418
419
420/**
421 * xmlEncodeEntities:
422 * @doc: the document containing the string
423 * @input: A string to convert to XML.
424 *
425 * Do a global encoding of a string, replacing the predefined entities
426 * and non ASCII values with their entities and CharRef counterparts.
427 *
428 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
429 * compatibility
430 *
431 * People must migrate their code to xmlEncodeEntitiesReentrant !
432 * This routine will issue a warning when encountered.
433 *
434 * Returns A newly allocated string with the substitution done.
435 */
436const xmlChar *
437xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
438 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000439 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000440 static int warning = 1;
441 int html = 0;
442
443
444 if (warning) {
445 xmlGenericError(xmlGenericErrorContext,
446 "Deprecated API xmlEncodeEntities() used\n");
447 xmlGenericError(xmlGenericErrorContext,
448 " change code to use xmlEncodeEntitiesReentrant()\n");
449 warning = 0;
450 }
451
452 if (input == NULL) return(NULL);
453 if (doc != NULL)
454 html = (doc->type == XML_HTML_DOCUMENT_NODE);
455
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000456 if (static_buffer == NULL) {
457 static_buffer_size = 1000;
458 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
459 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000460 perror("malloc failed");
461 return(NULL);
462 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000463 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000464 }
465 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000466 if (out - static_buffer > static_buffer_size - 100) {
467 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000468
469 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000470 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000471 }
472
473 /*
474 * By default one have to encode at least '<', '>', '"' and '&' !
475 */
476 if (*cur == '<') {
477 *out++ = '&';
478 *out++ = 'l';
479 *out++ = 't';
480 *out++ = ';';
481 } else if (*cur == '>') {
482 *out++ = '&';
483 *out++ = 'g';
484 *out++ = 't';
485 *out++ = ';';
486 } else if (*cur == '&') {
487 *out++ = '&';
488 *out++ = 'a';
489 *out++ = 'm';
490 *out++ = 'p';
491 *out++ = ';';
492 } else if (*cur == '"') {
493 *out++ = '&';
494 *out++ = 'q';
495 *out++ = 'u';
496 *out++ = 'o';
497 *out++ = 't';
498 *out++ = ';';
499 } else if ((*cur == '\'') && (!html)) {
500 *out++ = '&';
501 *out++ = 'a';
502 *out++ = 'p';
503 *out++ = 'o';
504 *out++ = 's';
505 *out++ = ';';
506 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
507 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
508 /*
509 * default case, just copy !
510 */
511 *out++ = *cur;
512#ifndef USE_UTF_8
513 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
514 char buf[10], *ptr;
515
Owen Taylor3473f882001-02-23 17:55:21 +0000516 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000517 buf[sizeof(buf) - 1] = 0;
518 ptr = buf;
519 while (*ptr != 0) *out++ = *ptr++;
520#endif
521 } else if (IS_CHAR(*cur)) {
522 char buf[10], *ptr;
523
Owen Taylor3473f882001-02-23 17:55:21 +0000524 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000525 buf[sizeof(buf) - 1] = 0;
526 ptr = buf;
527 while (*ptr != 0) *out++ = *ptr++;
528 }
529#if 0
530 else {
531 /*
532 * default case, this is not a valid char !
533 * Skip it...
534 */
535 xmlGenericError(xmlGenericErrorContext,
536 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
537 }
538#endif
539 cur++;
540 }
541 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000542 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000543}
544
545/*
546 * Macro used to grow the current buffer.
547 */
548#define growBufferReentrant() { \
549 buffer_size *= 2; \
550 buffer = (xmlChar *) \
551 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
552 if (buffer == NULL) { \
553 perror("realloc failed"); \
554 return(NULL); \
555 } \
556}
557
558
559/**
560 * xmlEncodeEntitiesReentrant:
561 * @doc: the document containing the string
562 * @input: A string to convert to XML.
563 *
564 * Do a global encoding of a string, replacing the predefined entities
565 * and non ASCII values with their entities and CharRef counterparts.
566 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
567 * must be deallocated.
568 *
569 * Returns A newly allocated string with the substitution done.
570 */
571xmlChar *
572xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
573 const xmlChar *cur = input;
574 xmlChar *buffer = NULL;
575 xmlChar *out = NULL;
576 int buffer_size = 0;
577 int html = 0;
578
579 if (input == NULL) return(NULL);
580 if (doc != NULL)
581 html = (doc->type == XML_HTML_DOCUMENT_NODE);
582
583 /*
584 * allocate an translation buffer.
585 */
586 buffer_size = 1000;
587 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
588 if (buffer == NULL) {
589 perror("malloc failed");
590 return(NULL);
591 }
592 out = buffer;
593
594 while (*cur != '\0') {
595 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000596 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000597
598 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000599 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000600 }
601
602 /*
603 * By default one have to encode at least '<', '>', '"' and '&' !
604 */
605 if (*cur == '<') {
606 *out++ = '&';
607 *out++ = 'l';
608 *out++ = 't';
609 *out++ = ';';
610 } else if (*cur == '>') {
611 *out++ = '&';
612 *out++ = 'g';
613 *out++ = 't';
614 *out++ = ';';
615 } else if (*cur == '&') {
616 *out++ = '&';
617 *out++ = 'a';
618 *out++ = 'm';
619 *out++ = 'p';
620 *out++ = ';';
621 } else if (*cur == '"') {
622 *out++ = '&';
623 *out++ = 'q';
624 *out++ = 'u';
625 *out++ = 'o';
626 *out++ = 't';
627 *out++ = ';';
628#if 0
629 } else if ((*cur == '\'') && (!html)) {
630 *out++ = '&';
631 *out++ = 'a';
632 *out++ = 'p';
633 *out++ = 'o';
634 *out++ = 's';
635 *out++ = ';';
636#endif
637 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
638 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
639 /*
640 * default case, just copy !
641 */
642 *out++ = *cur;
643 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000644 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000645 /*
646 * Bjørn Reese <br@sseusa.com> provided the patch
647 xmlChar xc;
648 xc = (*cur & 0x3F) << 6;
649 if (cur[1] != 0) {
650 xc += *(++cur) & 0x3F;
651 *out++ = xc;
652 } else
653 */
654 *out++ = *cur;
655 } else {
656 /*
657 * We assume we have UTF-8 input.
658 */
659 char buf[10], *ptr;
660 int val = 0, l = 1;
661
662 if (*cur < 0xC0) {
663 xmlGenericError(xmlGenericErrorContext,
664 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000665 if (doc != NULL)
666 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000667 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000668 buf[sizeof(buf) - 1] = 0;
669 ptr = buf;
670 while (*ptr != 0) *out++ = *ptr++;
671 continue;
672 } else if (*cur < 0xE0) {
673 val = (cur[0]) & 0x1F;
674 val <<= 6;
675 val |= (cur[1]) & 0x3F;
676 l = 2;
677 } else if (*cur < 0xF0) {
678 val = (cur[0]) & 0x0F;
679 val <<= 6;
680 val |= (cur[1]) & 0x3F;
681 val <<= 6;
682 val |= (cur[2]) & 0x3F;
683 l = 3;
684 } else if (*cur < 0xF8) {
685 val = (cur[0]) & 0x07;
686 val <<= 6;
687 val |= (cur[1]) & 0x3F;
688 val <<= 6;
689 val |= (cur[2]) & 0x3F;
690 val <<= 6;
691 val |= (cur[3]) & 0x3F;
692 l = 4;
693 }
694 if ((l == 1) || (!IS_CHAR(val))) {
695 xmlGenericError(xmlGenericErrorContext,
696 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000697 if (doc != NULL)
698 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000699 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000700 buf[sizeof(buf) - 1] = 0;
701 ptr = buf;
702 while (*ptr != 0) *out++ = *ptr++;
703 cur++;
704 continue;
705 }
706 /*
707 * We could do multiple things here. Just save as a char ref
708 */
Owen Taylor3473f882001-02-23 17:55:21 +0000709 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000710 buf[sizeof(buf) - 1] = 0;
711 ptr = buf;
712 while (*ptr != 0) *out++ = *ptr++;
713 cur += l;
714 continue;
715 }
716 } else if (IS_CHAR(*cur)) {
717 char buf[10], *ptr;
718
Owen Taylor3473f882001-02-23 17:55:21 +0000719 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000720 buf[sizeof(buf) - 1] = 0;
721 ptr = buf;
722 while (*ptr != 0) *out++ = *ptr++;
723 }
724#if 0
725 else {
726 /*
727 * default case, this is not a valid char !
728 * Skip it...
729 */
730 xmlGenericError(xmlGenericErrorContext,
731 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
732 }
733#endif
734 cur++;
735 }
736 *out++ = 0;
737 return(buffer);
738}
739
740/**
741 * xmlEncodeSpecialChars:
742 * @doc: the document containing the string
743 * @input: A string to convert to XML.
744 *
745 * Do a global encoding of a string, replacing the predefined entities
746 * this routine is reentrant, and result must be deallocated.
747 *
748 * Returns A newly allocated string with the substitution done.
749 */
750xmlChar *
751xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
752 const xmlChar *cur = input;
753 xmlChar *buffer = NULL;
754 xmlChar *out = NULL;
755 int buffer_size = 0;
756 int html = 0;
757
758 if (input == NULL) return(NULL);
759 if (doc != NULL)
760 html = (doc->type == XML_HTML_DOCUMENT_NODE);
761
762 /*
763 * allocate an translation buffer.
764 */
765 buffer_size = 1000;
766 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
767 if (buffer == NULL) {
768 perror("malloc failed");
769 return(NULL);
770 }
771 out = buffer;
772
773 while (*cur != '\0') {
774 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000775 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000776
777 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000778 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000779 }
780
781 /*
782 * By default one have to encode at least '<', '>', '"' and '&' !
783 */
784 if (*cur == '<') {
785 *out++ = '&';
786 *out++ = 'l';
787 *out++ = 't';
788 *out++ = ';';
789 } else if (*cur == '>') {
790 *out++ = '&';
791 *out++ = 'g';
792 *out++ = 't';
793 *out++ = ';';
794 } else if (*cur == '&') {
795 *out++ = '&';
796 *out++ = 'a';
797 *out++ = 'm';
798 *out++ = 'p';
799 *out++ = ';';
800 } else if (*cur == '"') {
801 *out++ = '&';
802 *out++ = 'q';
803 *out++ = 'u';
804 *out++ = 'o';
805 *out++ = 't';
806 *out++ = ';';
807 } else {
808 /*
809 * Works because on UTF-8, all extended sequences cannot
810 * result in bytes in the ASCII range.
811 */
812 *out++ = *cur;
813 }
814 cur++;
815 }
816 *out++ = 0;
817 return(buffer);
818}
819
820/**
821 * xmlCreateEntitiesTable:
822 *
823 * create and initialize an empty entities hash table.
824 *
825 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
826 */
827xmlEntitiesTablePtr
828xmlCreateEntitiesTable(void) {
829 return((xmlEntitiesTablePtr) xmlHashCreate(0));
830}
831
832/**
833 * xmlFreeEntitiesTable:
834 * @table: An entity table
835 *
836 * Deallocate the memory used by an entities hash table.
837 */
838void
839xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
840 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
841}
842
843/**
844 * xmlCopyEntity:
845 * @ent: An entity
846 *
847 * Build a copy of an entity
848 *
849 * Returns the new xmlEntitiesPtr or NULL in case of error.
850 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000851static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000852xmlCopyEntity(xmlEntityPtr ent) {
853 xmlEntityPtr cur;
854
855 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
856 if (cur == NULL) {
857 xmlGenericError(xmlGenericErrorContext,
858 "xmlCopyEntity: out of memory !\n");
859 return(NULL);
860 }
861 memset(cur, 0, sizeof(xmlEntity));
862 cur->type = XML_ELEMENT_DECL;
863
864 cur->etype = ent->etype;
865 if (ent->name != NULL)
866 cur->name = xmlStrdup(ent->name);
867 if (ent->ExternalID != NULL)
868 cur->ExternalID = xmlStrdup(ent->ExternalID);
869 if (ent->SystemID != NULL)
870 cur->SystemID = xmlStrdup(ent->SystemID);
871 if (ent->content != NULL)
872 cur->content = xmlStrdup(ent->content);
873 if (ent->orig != NULL)
874 cur->orig = xmlStrdup(ent->orig);
875 return(cur);
876}
877
878/**
879 * xmlCopyEntitiesTable:
880 * @table: An entity table
881 *
882 * Build a copy of an entity table.
883 *
884 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
885 */
886xmlEntitiesTablePtr
887xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
888 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
889}
890
891/**
892 * xmlDumpEntityDecl:
893 * @buf: An XML buffer.
894 * @ent: An entity table
895 *
896 * This will dump the content of the entity table as an XML DTD definition
897 */
898void
899xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
900 switch (ent->etype) {
901 case XML_INTERNAL_GENERAL_ENTITY:
902 xmlBufferWriteChar(buf, "<!ENTITY ");
903 xmlBufferWriteCHAR(buf, ent->name);
904 xmlBufferWriteChar(buf, " ");
905 if (ent->orig != NULL)
906 xmlBufferWriteQuotedString(buf, ent->orig);
907 else
908 xmlBufferWriteQuotedString(buf, ent->content);
909 xmlBufferWriteChar(buf, ">\n");
910 break;
911 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
912 xmlBufferWriteChar(buf, "<!ENTITY ");
913 xmlBufferWriteCHAR(buf, ent->name);
914 if (ent->ExternalID != NULL) {
915 xmlBufferWriteChar(buf, " PUBLIC ");
916 xmlBufferWriteQuotedString(buf, ent->ExternalID);
917 xmlBufferWriteChar(buf, " ");
918 xmlBufferWriteQuotedString(buf, ent->SystemID);
919 } else {
920 xmlBufferWriteChar(buf, " SYSTEM ");
921 xmlBufferWriteQuotedString(buf, ent->SystemID);
922 }
923 xmlBufferWriteChar(buf, ">\n");
924 break;
925 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
926 xmlBufferWriteChar(buf, "<!ENTITY ");
927 xmlBufferWriteCHAR(buf, ent->name);
928 if (ent->ExternalID != NULL) {
929 xmlBufferWriteChar(buf, " PUBLIC ");
930 xmlBufferWriteQuotedString(buf, ent->ExternalID);
931 xmlBufferWriteChar(buf, " ");
932 xmlBufferWriteQuotedString(buf, ent->SystemID);
933 } else {
934 xmlBufferWriteChar(buf, " SYSTEM ");
935 xmlBufferWriteQuotedString(buf, ent->SystemID);
936 }
937 if (ent->content != NULL) { /* Should be true ! */
938 xmlBufferWriteChar(buf, " NDATA ");
939 if (ent->orig != NULL)
940 xmlBufferWriteCHAR(buf, ent->orig);
941 else
942 xmlBufferWriteCHAR(buf, ent->content);
943 }
944 xmlBufferWriteChar(buf, ">\n");
945 break;
946 case XML_INTERNAL_PARAMETER_ENTITY:
947 xmlBufferWriteChar(buf, "<!ENTITY % ");
948 xmlBufferWriteCHAR(buf, ent->name);
949 xmlBufferWriteChar(buf, " ");
950 if (ent->orig == NULL)
951 xmlBufferWriteQuotedString(buf, ent->content);
952 else
953 xmlBufferWriteQuotedString(buf, ent->orig);
954 xmlBufferWriteChar(buf, ">\n");
955 break;
956 case XML_EXTERNAL_PARAMETER_ENTITY:
957 xmlBufferWriteChar(buf, "<!ENTITY % ");
958 xmlBufferWriteCHAR(buf, ent->name);
959 if (ent->ExternalID != NULL) {
960 xmlBufferWriteChar(buf, " PUBLIC ");
961 xmlBufferWriteQuotedString(buf, ent->ExternalID);
962 xmlBufferWriteChar(buf, " ");
963 xmlBufferWriteQuotedString(buf, ent->SystemID);
964 } else {
965 xmlBufferWriteChar(buf, " SYSTEM ");
966 xmlBufferWriteQuotedString(buf, ent->SystemID);
967 }
968 xmlBufferWriteChar(buf, ">\n");
969 break;
970 default:
971 xmlGenericError(xmlGenericErrorContext,
972 "xmlDumpEntitiesTable: internal: unknown type %d\n",
973 ent->etype);
974 }
975}
976
977/**
978 * xmlDumpEntitiesTable:
979 * @buf: An XML buffer.
980 * @table: An entity table
981 *
982 * This will dump the content of the entity table as an XML DTD definition
983 */
984void
985xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
986 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
987}