blob: af65a38c4281cef87e54c7288e9d9c4937879753 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * entities.c : implementation for the XML entities handling
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000010
Owen Taylor3473f882001-02-23 17:55:21 +000011#include <string.h>
12#ifdef HAVE_STDLIB_H
13#include <stdlib.h>
14#endif
15#include <libxml/xmlmemory.h>
16#include <libxml/hash.h>
17#include <libxml/entities.h>
18#include <libxml/parser.h>
19#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000020#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000021
Owen Taylor3473f882001-02-23 17:55:21 +000022/*
23 * The XML predefined entities.
24 */
25
26struct xmlPredefinedEntityValue {
27 const char *name;
28 const char *value;
29};
Daniel Veillard22090732001-07-16 00:06:07 +000030static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000031 { "lt", "<" },
32 { "gt", ">" },
33 { "apos", "'" },
34 { "quot", "\"" },
35 { "amp", "&" }
36};
37
38/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000039 * TODO: This is GROSS, allocation of a 256 entry hash for
40 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000041 */
Daniel Veillardb44025c2001-10-11 22:55:55 +000042static xmlHashTablePtr xmlPredefinedEntities = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000043
44/*
45 * xmlFreeEntity : clean-up an entity record.
46 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000047static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000048 if (entity == NULL) return;
49
Daniel Veillard22090732001-07-16 00:06:07 +000050 if ((entity->children) &&
51 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000052 xmlFreeNodeList(entity->children);
53 if (entity->name != NULL)
54 xmlFree((char *) entity->name);
55 if (entity->ExternalID != NULL)
56 xmlFree((char *) entity->ExternalID);
57 if (entity->SystemID != NULL)
58 xmlFree((char *) entity->SystemID);
59 if (entity->URI != NULL)
60 xmlFree((char *) entity->URI);
61 if (entity->content != NULL)
62 xmlFree((char *) entity->content);
63 if (entity->orig != NULL)
64 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000065 xmlFree(entity);
66}
67
68/*
69 * xmlAddEntity : register a new entity for an entities table.
70 */
71static xmlEntityPtr
72xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
73 const xmlChar *ExternalID, const xmlChar *SystemID,
74 const xmlChar *content) {
75 xmlEntitiesTablePtr table = NULL;
76 xmlEntityPtr ret;
77
78 if (name == NULL)
79 return(NULL);
80 switch (type) {
81 case XML_INTERNAL_GENERAL_ENTITY:
82 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
83 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
84 if (dtd->entities == NULL)
85 dtd->entities = xmlHashCreate(0);
86 table = dtd->entities;
87 break;
88 case XML_INTERNAL_PARAMETER_ENTITY:
89 case XML_EXTERNAL_PARAMETER_ENTITY:
90 if (dtd->pentities == NULL)
91 dtd->pentities = xmlHashCreate(0);
92 table = dtd->pentities;
93 break;
94 case XML_INTERNAL_PREDEFINED_ENTITY:
95 if (xmlPredefinedEntities == NULL)
96 xmlPredefinedEntities = xmlHashCreate(8);
97 table = xmlPredefinedEntities;
98 }
99 if (table == NULL)
100 return(NULL);
101 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
102 if (ret == NULL) {
103 xmlGenericError(xmlGenericErrorContext,
104 "xmlAddEntity: out of memory\n");
105 return(NULL);
106 }
107 memset(ret, 0, sizeof(xmlEntity));
108 ret->type = XML_ENTITY_DECL;
109
110 /*
111 * fill the structure.
112 */
113 ret->name = xmlStrdup(name);
114 ret->etype = (xmlEntityType) type;
115 if (ExternalID != NULL)
116 ret->ExternalID = xmlStrdup(ExternalID);
117 if (SystemID != NULL)
118 ret->SystemID = xmlStrdup(SystemID);
119 if (content != NULL) {
120 ret->length = xmlStrlen(content);
121 ret->content = xmlStrndup(content, ret->length);
122 } else {
123 ret->length = 0;
124 ret->content = NULL;
125 }
126 ret->URI = NULL; /* to be computed by the layer knowing
127 the defining entity */
128 ret->orig = NULL;
129
130 if (xmlHashAddEntry(table, name, ret)) {
131 /*
132 * entity was already defined at another level.
133 */
134 xmlFreeEntity(ret);
135 return(NULL);
136 }
137 return(ret);
138}
139
140/**
141 * xmlInitializePredefinedEntities:
142 *
143 * Set up the predefined entities.
144 */
145void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000146 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000147 xmlChar name[50];
148 xmlChar value[50];
149 const char *in;
150 xmlChar *out;
151
152 if (xmlPredefinedEntities != NULL) return;
153
154 xmlPredefinedEntities = xmlCreateEntitiesTable();
155 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
156 sizeof(xmlPredefinedEntityValues[0]);i++) {
157 in = xmlPredefinedEntityValues[i].name;
158 out = &name[0];
159 for (;(*out++ = (xmlChar) *in);)in++;
160 in = xmlPredefinedEntityValues[i].value;
161 out = &value[0];
162 for (;(*out++ = (xmlChar) *in);)in++;
163
164 xmlAddEntity(NULL, (const xmlChar *) &name[0],
165 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
166 &value[0]);
167 }
168}
169
170/**
171 * xmlCleanupPredefinedEntities:
172 *
173 * Cleanup up the predefined entities table.
174 */
175void xmlCleanupPredefinedEntities(void) {
176 if (xmlPredefinedEntities == NULL) return;
177
178 xmlFreeEntitiesTable(xmlPredefinedEntities);
179 xmlPredefinedEntities = NULL;
180}
181
182/**
183 * xmlGetPredefinedEntity:
184 * @name: the entity name
185 *
186 * Check whether this name is an predefined entity.
187 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000188 * Returns NULL if not, otherwise the entity
Owen Taylor3473f882001-02-23 17:55:21 +0000189 */
190xmlEntityPtr
191xmlGetPredefinedEntity(const xmlChar *name) {
192 if (xmlPredefinedEntities == NULL)
193 xmlInitializePredefinedEntities();
194 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
195}
196
197/**
198 * xmlAddDtdEntity:
199 * @doc: the document
200 * @name: the entity name
201 * @type: the entity type XML_xxx_yyy_ENTITY
202 * @ExternalID: the entity external ID if available
203 * @SystemID: the entity system ID if available
204 * @content: the entity content
205 *
206 * Register a new entity for this document DTD external subset.
207 *
208 * Returns a pointer to the entity or NULL in case of error
209 */
210xmlEntityPtr
211xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
212 const xmlChar *ExternalID, const xmlChar *SystemID,
213 const xmlChar *content) {
214 xmlEntityPtr ret;
215 xmlDtdPtr dtd;
216
217 if (doc == NULL) {
218 xmlGenericError(xmlGenericErrorContext,
219 "xmlAddDtdEntity: doc == NULL !\n");
220 return(NULL);
221 }
222 if (doc->extSubset == NULL) {
223 xmlGenericError(xmlGenericErrorContext,
224 "xmlAddDtdEntity: document without external subset !\n");
225 return(NULL);
226 }
227 dtd = doc->extSubset;
228 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
229 if (ret == NULL) return(NULL);
230
231 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000232 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000233 */
234 ret->parent = dtd;
235 ret->doc = dtd->doc;
236 if (dtd->last == NULL) {
237 dtd->children = dtd->last = (xmlNodePtr) ret;
238 } else {
239 dtd->last->next = (xmlNodePtr) ret;
240 ret->prev = dtd->last;
241 dtd->last = (xmlNodePtr) ret;
242 }
243 return(ret);
244}
245
246/**
247 * xmlAddDocEntity:
248 * @doc: the document
249 * @name: the entity name
250 * @type: the entity type XML_xxx_yyy_ENTITY
251 * @ExternalID: the entity external ID if available
252 * @SystemID: the entity system ID if available
253 * @content: the entity content
254 *
255 * Register a new entity for this document.
256 *
257 * Returns a pointer to the entity or NULL in case of error
258 */
259xmlEntityPtr
260xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
261 const xmlChar *ExternalID, const xmlChar *SystemID,
262 const xmlChar *content) {
263 xmlEntityPtr ret;
264 xmlDtdPtr dtd;
265
266 if (doc == NULL) {
267 xmlGenericError(xmlGenericErrorContext,
268 "xmlAddDocEntity: document is NULL !\n");
269 return(NULL);
270 }
271 if (doc->intSubset == NULL) {
272 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000273 "xmlAddDocEntity: document without internal subset !\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000274 return(NULL);
275 }
276 dtd = doc->intSubset;
277 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
278 if (ret == NULL) return(NULL);
279
280 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000281 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000282 */
283 ret->parent = dtd;
284 ret->doc = dtd->doc;
285 if (dtd->last == NULL) {
286 dtd->children = dtd->last = (xmlNodePtr) ret;
287 } else {
288 dtd->last->next = (xmlNodePtr) ret;
289 ret->prev = dtd->last;
290 dtd->last = (xmlNodePtr) ret;
291 }
292 return(ret);
293}
294
295/**
296 * xmlGetEntityFromTable:
297 * @table: an entity table
298 * @name: the entity name
299 * @parameter: look for parameter entities
300 *
301 * Do an entity lookup in the table.
302 * returns the corresponding parameter entity, if found.
303 *
304 * Returns A pointer to the entity structure or NULL if not found.
305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000306static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000307xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
308 return((xmlEntityPtr) xmlHashLookup(table, name));
309}
310
311/**
312 * xmlGetParameterEntity:
313 * @doc: the document referencing the entity
314 * @name: the entity name
315 *
316 * Do an entity lookup in the internal and external subsets and
317 * returns the corresponding parameter entity, if found.
318 *
319 * Returns A pointer to the entity structure or NULL if not found.
320 */
321xmlEntityPtr
322xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
323 xmlEntitiesTablePtr table;
324 xmlEntityPtr ret;
325
Daniel Veillard36065812002-01-24 15:02:46 +0000326 if (doc == NULL)
327 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000328 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
329 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
330 ret = xmlGetEntityFromTable(table, name);
331 if (ret != NULL)
332 return(ret);
333 }
334 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
335 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
336 return(xmlGetEntityFromTable(table, name));
337 }
338 return(NULL);
339}
340
341/**
342 * xmlGetDtdEntity:
343 * @doc: the document referencing the entity
344 * @name: the entity name
345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000346 * Do an entity lookup in the DTD entity hash table and
Owen Taylor3473f882001-02-23 17:55:21 +0000347 * returns the corresponding entity, if found.
Daniel Veillard36065812002-01-24 15:02:46 +0000348 * Note: the first argument is the document node, not the DTD node.
Owen Taylor3473f882001-02-23 17:55:21 +0000349 *
350 * Returns A pointer to the entity structure or NULL if not found.
351 */
352xmlEntityPtr
353xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
354 xmlEntitiesTablePtr table;
355
Daniel Veillard36065812002-01-24 15:02:46 +0000356 if (doc == NULL)
357 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000358 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
359 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
360 return(xmlGetEntityFromTable(table, name));
361 }
362 return(NULL);
363}
364
365/**
366 * xmlGetDocEntity:
367 * @doc: the document referencing the entity
368 * @name: the entity name
369 *
370 * Do an entity lookup in the document entity hash table and
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000371 * returns the corresponding entity, otherwise a lookup is done
Owen Taylor3473f882001-02-23 17:55:21 +0000372 * in the predefined entities too.
373 *
374 * Returns A pointer to the entity structure or NULL if not found.
375 */
376xmlEntityPtr
377xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
378 xmlEntityPtr cur;
379 xmlEntitiesTablePtr table;
380
381 if (doc != NULL) {
382 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
383 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
384 cur = xmlGetEntityFromTable(table, name);
385 if (cur != NULL)
386 return(cur);
387 }
388 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
389 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
390 cur = xmlGetEntityFromTable(table, name);
391 if (cur != NULL)
392 return(cur);
393 }
394 }
395 if (xmlPredefinedEntities == NULL)
396 xmlInitializePredefinedEntities();
397 table = xmlPredefinedEntities;
398 return(xmlGetEntityFromTable(table, name));
399}
400
401/*
402 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
403 * | [#x10000-#x10FFFF]
404 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
405 */
406#define IS_CHAR(c) \
407 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
408 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
409
410/*
411 * A buffer used for converting entities to their equivalent and back.
412 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000413static int static_buffer_size = 0;
414static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000415
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000416static int growBuffer(void) {
417 static_buffer_size *= 2;
418 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
419 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000420 perror("realloc failed");
421 return(-1);
422 }
423 return(0);
424}
425
426
427/**
428 * xmlEncodeEntities:
429 * @doc: the document containing the string
430 * @input: A string to convert to XML.
431 *
432 * Do a global encoding of a string, replacing the predefined entities
433 * and non ASCII values with their entities and CharRef counterparts.
434 *
435 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
436 * compatibility
437 *
438 * People must migrate their code to xmlEncodeEntitiesReentrant !
439 * This routine will issue a warning when encountered.
440 *
441 * Returns A newly allocated string with the substitution done.
442 */
443const xmlChar *
444xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
445 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000446 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000447 static int warning = 1;
448 int html = 0;
449
450
451 if (warning) {
452 xmlGenericError(xmlGenericErrorContext,
453 "Deprecated API xmlEncodeEntities() used\n");
454 xmlGenericError(xmlGenericErrorContext,
455 " change code to use xmlEncodeEntitiesReentrant()\n");
456 warning = 0;
457 }
458
459 if (input == NULL) return(NULL);
460 if (doc != NULL)
461 html = (doc->type == XML_HTML_DOCUMENT_NODE);
462
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000463 if (static_buffer == NULL) {
464 static_buffer_size = 1000;
465 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
466 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000467 perror("malloc failed");
468 return(NULL);
469 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000470 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000471 }
472 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000473 if (out - static_buffer > static_buffer_size - 100) {
474 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000475
476 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000477 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000478 }
479
480 /*
481 * By default one have to encode at least '<', '>', '"' and '&' !
482 */
483 if (*cur == '<') {
484 *out++ = '&';
485 *out++ = 'l';
486 *out++ = 't';
487 *out++ = ';';
488 } else if (*cur == '>') {
489 *out++ = '&';
490 *out++ = 'g';
491 *out++ = 't';
492 *out++ = ';';
493 } else if (*cur == '&') {
494 *out++ = '&';
495 *out++ = 'a';
496 *out++ = 'm';
497 *out++ = 'p';
498 *out++ = ';';
499 } else if (*cur == '"') {
500 *out++ = '&';
501 *out++ = 'q';
502 *out++ = 'u';
503 *out++ = 'o';
504 *out++ = 't';
505 *out++ = ';';
506 } else if ((*cur == '\'') && (!html)) {
507 *out++ = '&';
508 *out++ = 'a';
509 *out++ = 'p';
510 *out++ = 'o';
511 *out++ = 's';
512 *out++ = ';';
513 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
514 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
515 /*
516 * default case, just copy !
517 */
518 *out++ = *cur;
519#ifndef USE_UTF_8
520 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
521 char buf[10], *ptr;
522
Owen Taylor3473f882001-02-23 17:55:21 +0000523 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000524 buf[sizeof(buf) - 1] = 0;
525 ptr = buf;
526 while (*ptr != 0) *out++ = *ptr++;
527#endif
528 } else if (IS_CHAR(*cur)) {
529 char buf[10], *ptr;
530
Owen Taylor3473f882001-02-23 17:55:21 +0000531 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000532 buf[sizeof(buf) - 1] = 0;
533 ptr = buf;
534 while (*ptr != 0) *out++ = *ptr++;
535 }
536#if 0
537 else {
538 /*
539 * default case, this is not a valid char !
540 * Skip it...
541 */
542 xmlGenericError(xmlGenericErrorContext,
543 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
544 }
545#endif
546 cur++;
547 }
548 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000549 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000550}
551
552/*
553 * Macro used to grow the current buffer.
554 */
555#define growBufferReentrant() { \
556 buffer_size *= 2; \
557 buffer = (xmlChar *) \
558 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
559 if (buffer == NULL) { \
560 perror("realloc failed"); \
561 return(NULL); \
562 } \
563}
564
565
566/**
567 * xmlEncodeEntitiesReentrant:
568 * @doc: the document containing the string
569 * @input: A string to convert to XML.
570 *
571 * Do a global encoding of a string, replacing the predefined entities
572 * and non ASCII values with their entities and CharRef counterparts.
573 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
574 * must be deallocated.
575 *
576 * Returns A newly allocated string with the substitution done.
577 */
578xmlChar *
579xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
580 const xmlChar *cur = input;
581 xmlChar *buffer = NULL;
582 xmlChar *out = NULL;
583 int buffer_size = 0;
584 int html = 0;
585
586 if (input == NULL) return(NULL);
587 if (doc != NULL)
588 html = (doc->type == XML_HTML_DOCUMENT_NODE);
589
590 /*
591 * allocate an translation buffer.
592 */
593 buffer_size = 1000;
594 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
595 if (buffer == NULL) {
596 perror("malloc failed");
597 return(NULL);
598 }
599 out = buffer;
600
601 while (*cur != '\0') {
602 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000603 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000604
605 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000606 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000607 }
608
609 /*
610 * By default one have to encode at least '<', '>', '"' and '&' !
611 */
612 if (*cur == '<') {
613 *out++ = '&';
614 *out++ = 'l';
615 *out++ = 't';
616 *out++ = ';';
617 } else if (*cur == '>') {
618 *out++ = '&';
619 *out++ = 'g';
620 *out++ = 't';
621 *out++ = ';';
622 } else if (*cur == '&') {
623 *out++ = '&';
624 *out++ = 'a';
625 *out++ = 'm';
626 *out++ = 'p';
627 *out++ = ';';
628 } else if (*cur == '"') {
629 *out++ = '&';
630 *out++ = 'q';
631 *out++ = 'u';
632 *out++ = 'o';
633 *out++ = 't';
634 *out++ = ';';
635#if 0
636 } else if ((*cur == '\'') && (!html)) {
637 *out++ = '&';
638 *out++ = 'a';
639 *out++ = 'p';
640 *out++ = 'o';
641 *out++ = 's';
642 *out++ = ';';
643#endif
644 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
645 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
646 /*
647 * default case, just copy !
648 */
649 *out++ = *cur;
650 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000651 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000652 /*
653 * Bjørn Reese <br@sseusa.com> provided the patch
654 xmlChar xc;
655 xc = (*cur & 0x3F) << 6;
656 if (cur[1] != 0) {
657 xc += *(++cur) & 0x3F;
658 *out++ = xc;
659 } else
660 */
661 *out++ = *cur;
662 } else {
663 /*
664 * We assume we have UTF-8 input.
665 */
666 char buf[10], *ptr;
667 int val = 0, l = 1;
668
669 if (*cur < 0xC0) {
670 xmlGenericError(xmlGenericErrorContext,
671 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000672 if (doc != NULL)
673 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000674 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000675 buf[sizeof(buf) - 1] = 0;
676 ptr = buf;
677 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000678 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000679 continue;
680 } else if (*cur < 0xE0) {
681 val = (cur[0]) & 0x1F;
682 val <<= 6;
683 val |= (cur[1]) & 0x3F;
684 l = 2;
685 } else if (*cur < 0xF0) {
686 val = (cur[0]) & 0x0F;
687 val <<= 6;
688 val |= (cur[1]) & 0x3F;
689 val <<= 6;
690 val |= (cur[2]) & 0x3F;
691 l = 3;
692 } else if (*cur < 0xF8) {
693 val = (cur[0]) & 0x07;
694 val <<= 6;
695 val |= (cur[1]) & 0x3F;
696 val <<= 6;
697 val |= (cur[2]) & 0x3F;
698 val <<= 6;
699 val |= (cur[3]) & 0x3F;
700 l = 4;
701 }
702 if ((l == 1) || (!IS_CHAR(val))) {
703 xmlGenericError(xmlGenericErrorContext,
704 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000705 if (doc != NULL)
706 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000707 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000708 buf[sizeof(buf) - 1] = 0;
709 ptr = buf;
710 while (*ptr != 0) *out++ = *ptr++;
711 cur++;
712 continue;
713 }
714 /*
715 * We could do multiple things here. Just save as a char ref
716 */
Daniel Veillard16698282001-09-14 10:29:27 +0000717 if (html)
718 snprintf(buf, sizeof(buf), "&#%d;", val);
719 else
720 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000721 buf[sizeof(buf) - 1] = 0;
722 ptr = buf;
723 while (*ptr != 0) *out++ = *ptr++;
724 cur += l;
725 continue;
726 }
727 } else if (IS_CHAR(*cur)) {
728 char buf[10], *ptr;
729
Owen Taylor3473f882001-02-23 17:55:21 +0000730 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000731 buf[sizeof(buf) - 1] = 0;
732 ptr = buf;
733 while (*ptr != 0) *out++ = *ptr++;
734 }
735#if 0
736 else {
737 /*
738 * default case, this is not a valid char !
739 * Skip it...
740 */
741 xmlGenericError(xmlGenericErrorContext,
742 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
743 }
744#endif
745 cur++;
746 }
747 *out++ = 0;
748 return(buffer);
749}
750
751/**
752 * xmlEncodeSpecialChars:
753 * @doc: the document containing the string
754 * @input: A string to convert to XML.
755 *
756 * Do a global encoding of a string, replacing the predefined entities
757 * this routine is reentrant, and result must be deallocated.
758 *
759 * Returns A newly allocated string with the substitution done.
760 */
761xmlChar *
762xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
763 const xmlChar *cur = input;
764 xmlChar *buffer = NULL;
765 xmlChar *out = NULL;
766 int buffer_size = 0;
767 int html = 0;
768
769 if (input == NULL) return(NULL);
770 if (doc != NULL)
771 html = (doc->type == XML_HTML_DOCUMENT_NODE);
772
773 /*
774 * allocate an translation buffer.
775 */
776 buffer_size = 1000;
777 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
778 if (buffer == NULL) {
779 perror("malloc failed");
780 return(NULL);
781 }
782 out = buffer;
783
784 while (*cur != '\0') {
785 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000786 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000787
788 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000789 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000790 }
791
792 /*
793 * By default one have to encode at least '<', '>', '"' and '&' !
794 */
795 if (*cur == '<') {
796 *out++ = '&';
797 *out++ = 'l';
798 *out++ = 't';
799 *out++ = ';';
800 } else if (*cur == '>') {
801 *out++ = '&';
802 *out++ = 'g';
803 *out++ = 't';
804 *out++ = ';';
805 } else if (*cur == '&') {
806 *out++ = '&';
807 *out++ = 'a';
808 *out++ = 'm';
809 *out++ = 'p';
810 *out++ = ';';
811 } else if (*cur == '"') {
812 *out++ = '&';
813 *out++ = 'q';
814 *out++ = 'u';
815 *out++ = 'o';
816 *out++ = 't';
817 *out++ = ';';
818 } else {
819 /*
820 * Works because on UTF-8, all extended sequences cannot
821 * result in bytes in the ASCII range.
822 */
823 *out++ = *cur;
824 }
825 cur++;
826 }
827 *out++ = 0;
828 return(buffer);
829}
830
831/**
832 * xmlCreateEntitiesTable:
833 *
834 * create and initialize an empty entities hash table.
835 *
836 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
837 */
838xmlEntitiesTablePtr
839xmlCreateEntitiesTable(void) {
840 return((xmlEntitiesTablePtr) xmlHashCreate(0));
841}
842
843/**
844 * xmlFreeEntitiesTable:
845 * @table: An entity table
846 *
847 * Deallocate the memory used by an entities hash table.
848 */
849void
850xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
851 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
852}
853
854/**
855 * xmlCopyEntity:
856 * @ent: An entity
857 *
858 * Build a copy of an entity
859 *
860 * Returns the new xmlEntitiesPtr or NULL in case of error.
861 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000862static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000863xmlCopyEntity(xmlEntityPtr ent) {
864 xmlEntityPtr cur;
865
866 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
867 if (cur == NULL) {
868 xmlGenericError(xmlGenericErrorContext,
869 "xmlCopyEntity: out of memory !\n");
870 return(NULL);
871 }
872 memset(cur, 0, sizeof(xmlEntity));
Daniel Veillard845cce42002-01-09 11:51:37 +0000873 cur->type = XML_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +0000874
875 cur->etype = ent->etype;
876 if (ent->name != NULL)
877 cur->name = xmlStrdup(ent->name);
878 if (ent->ExternalID != NULL)
879 cur->ExternalID = xmlStrdup(ent->ExternalID);
880 if (ent->SystemID != NULL)
881 cur->SystemID = xmlStrdup(ent->SystemID);
882 if (ent->content != NULL)
883 cur->content = xmlStrdup(ent->content);
884 if (ent->orig != NULL)
885 cur->orig = xmlStrdup(ent->orig);
Daniel Veillard8ee9c8f2002-01-26 21:42:58 +0000886 if (ent->URI != NULL)
887 cur->URI = xmlStrdup(ent->URI);
Owen Taylor3473f882001-02-23 17:55:21 +0000888 return(cur);
889}
890
891/**
892 * xmlCopyEntitiesTable:
893 * @table: An entity table
894 *
895 * Build a copy of an entity table.
896 *
897 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
898 */
899xmlEntitiesTablePtr
900xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
901 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
902}
903
904/**
905 * xmlDumpEntityDecl:
906 * @buf: An XML buffer.
907 * @ent: An entity table
908 *
909 * This will dump the content of the entity table as an XML DTD definition
910 */
911void
912xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
913 switch (ent->etype) {
914 case XML_INTERNAL_GENERAL_ENTITY:
915 xmlBufferWriteChar(buf, "<!ENTITY ");
916 xmlBufferWriteCHAR(buf, ent->name);
917 xmlBufferWriteChar(buf, " ");
918 if (ent->orig != NULL)
919 xmlBufferWriteQuotedString(buf, ent->orig);
920 else
921 xmlBufferWriteQuotedString(buf, ent->content);
922 xmlBufferWriteChar(buf, ">\n");
923 break;
924 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
925 xmlBufferWriteChar(buf, "<!ENTITY ");
926 xmlBufferWriteCHAR(buf, ent->name);
927 if (ent->ExternalID != NULL) {
928 xmlBufferWriteChar(buf, " PUBLIC ");
929 xmlBufferWriteQuotedString(buf, ent->ExternalID);
930 xmlBufferWriteChar(buf, " ");
931 xmlBufferWriteQuotedString(buf, ent->SystemID);
932 } else {
933 xmlBufferWriteChar(buf, " SYSTEM ");
934 xmlBufferWriteQuotedString(buf, ent->SystemID);
935 }
936 xmlBufferWriteChar(buf, ">\n");
937 break;
938 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
939 xmlBufferWriteChar(buf, "<!ENTITY ");
940 xmlBufferWriteCHAR(buf, ent->name);
941 if (ent->ExternalID != NULL) {
942 xmlBufferWriteChar(buf, " PUBLIC ");
943 xmlBufferWriteQuotedString(buf, ent->ExternalID);
944 xmlBufferWriteChar(buf, " ");
945 xmlBufferWriteQuotedString(buf, ent->SystemID);
946 } else {
947 xmlBufferWriteChar(buf, " SYSTEM ");
948 xmlBufferWriteQuotedString(buf, ent->SystemID);
949 }
950 if (ent->content != NULL) { /* Should be true ! */
951 xmlBufferWriteChar(buf, " NDATA ");
952 if (ent->orig != NULL)
953 xmlBufferWriteCHAR(buf, ent->orig);
954 else
955 xmlBufferWriteCHAR(buf, ent->content);
956 }
957 xmlBufferWriteChar(buf, ">\n");
958 break;
959 case XML_INTERNAL_PARAMETER_ENTITY:
960 xmlBufferWriteChar(buf, "<!ENTITY % ");
961 xmlBufferWriteCHAR(buf, ent->name);
962 xmlBufferWriteChar(buf, " ");
963 if (ent->orig == NULL)
964 xmlBufferWriteQuotedString(buf, ent->content);
965 else
966 xmlBufferWriteQuotedString(buf, ent->orig);
967 xmlBufferWriteChar(buf, ">\n");
968 break;
969 case XML_EXTERNAL_PARAMETER_ENTITY:
970 xmlBufferWriteChar(buf, "<!ENTITY % ");
971 xmlBufferWriteCHAR(buf, ent->name);
972 if (ent->ExternalID != NULL) {
973 xmlBufferWriteChar(buf, " PUBLIC ");
974 xmlBufferWriteQuotedString(buf, ent->ExternalID);
975 xmlBufferWriteChar(buf, " ");
976 xmlBufferWriteQuotedString(buf, ent->SystemID);
977 } else {
978 xmlBufferWriteChar(buf, " SYSTEM ");
979 xmlBufferWriteQuotedString(buf, ent->SystemID);
980 }
981 xmlBufferWriteChar(buf, ">\n");
982 break;
983 default:
984 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000985 "xmlDumpEntitiesDecl: internal: unknown type %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000986 ent->etype);
987 }
988}
989
990/**
991 * xmlDumpEntitiesTable:
992 * @buf: An XML buffer.
993 * @table: An entity table
994 *
995 * This will dump the content of the entity table as an XML DTD definition
996 */
997void
998xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
999 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
1000}