blob: 4f6f52a6b4edd745389cd862d5730894f22e4705 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002 * entities.c : implementation for the XML entities handling
Owen Taylor3473f882001-02-23 17:55:21 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
Bjorn Reese70a9da52001-04-21 16:57:29 +00009#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000010
Owen Taylor3473f882001-02-23 17:55:21 +000011#include <string.h>
12#ifdef HAVE_STDLIB_H
13#include <stdlib.h>
14#endif
15#include <libxml/xmlmemory.h>
16#include <libxml/hash.h>
17#include <libxml/entities.h>
18#include <libxml/parser.h>
19#include <libxml/xmlerror.h>
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000020#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000021
Owen Taylor3473f882001-02-23 17:55:21 +000022/*
23 * The XML predefined entities.
24 */
25
26struct xmlPredefinedEntityValue {
27 const char *name;
28 const char *value;
29};
Daniel Veillard22090732001-07-16 00:06:07 +000030static struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000031 { "lt", "<" },
32 { "gt", ">" },
33 { "apos", "'" },
34 { "quot", "\"" },
35 { "amp", "&" }
36};
37
38/*
Daniel Veillardd79bcd12001-06-21 22:07:42 +000039 * TODO: This is GROSS, allocation of a 256 entry hash for
40 * a fixed number of 4 elements !
Owen Taylor3473f882001-02-23 17:55:21 +000041 */
Daniel Veillardb44025c2001-10-11 22:55:55 +000042static xmlHashTablePtr xmlPredefinedEntities = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000043
44/*
45 * xmlFreeEntity : clean-up an entity record.
46 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000047static void xmlFreeEntity(xmlEntityPtr entity) {
Owen Taylor3473f882001-02-23 17:55:21 +000048 if (entity == NULL) return;
49
Daniel Veillard22090732001-07-16 00:06:07 +000050 if ((entity->children) &&
51 (entity == (xmlEntityPtr) entity->children->parent))
Owen Taylor3473f882001-02-23 17:55:21 +000052 xmlFreeNodeList(entity->children);
53 if (entity->name != NULL)
54 xmlFree((char *) entity->name);
55 if (entity->ExternalID != NULL)
56 xmlFree((char *) entity->ExternalID);
57 if (entity->SystemID != NULL)
58 xmlFree((char *) entity->SystemID);
59 if (entity->URI != NULL)
60 xmlFree((char *) entity->URI);
61 if (entity->content != NULL)
62 xmlFree((char *) entity->content);
63 if (entity->orig != NULL)
64 xmlFree((char *) entity->orig);
Owen Taylor3473f882001-02-23 17:55:21 +000065 xmlFree(entity);
66}
67
68/*
69 * xmlAddEntity : register a new entity for an entities table.
70 */
71static xmlEntityPtr
72xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
73 const xmlChar *ExternalID, const xmlChar *SystemID,
74 const xmlChar *content) {
75 xmlEntitiesTablePtr table = NULL;
76 xmlEntityPtr ret;
77
78 if (name == NULL)
79 return(NULL);
80 switch (type) {
81 case XML_INTERNAL_GENERAL_ENTITY:
82 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
83 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
84 if (dtd->entities == NULL)
85 dtd->entities = xmlHashCreate(0);
86 table = dtd->entities;
87 break;
88 case XML_INTERNAL_PARAMETER_ENTITY:
89 case XML_EXTERNAL_PARAMETER_ENTITY:
90 if (dtd->pentities == NULL)
91 dtd->pentities = xmlHashCreate(0);
92 table = dtd->pentities;
93 break;
94 case XML_INTERNAL_PREDEFINED_ENTITY:
95 if (xmlPredefinedEntities == NULL)
96 xmlPredefinedEntities = xmlHashCreate(8);
97 table = xmlPredefinedEntities;
98 }
99 if (table == NULL)
100 return(NULL);
101 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
102 if (ret == NULL) {
103 xmlGenericError(xmlGenericErrorContext,
104 "xmlAddEntity: out of memory\n");
105 return(NULL);
106 }
107 memset(ret, 0, sizeof(xmlEntity));
108 ret->type = XML_ENTITY_DECL;
109
110 /*
111 * fill the structure.
112 */
113 ret->name = xmlStrdup(name);
114 ret->etype = (xmlEntityType) type;
115 if (ExternalID != NULL)
116 ret->ExternalID = xmlStrdup(ExternalID);
117 if (SystemID != NULL)
118 ret->SystemID = xmlStrdup(SystemID);
119 if (content != NULL) {
120 ret->length = xmlStrlen(content);
121 ret->content = xmlStrndup(content, ret->length);
122 } else {
123 ret->length = 0;
124 ret->content = NULL;
125 }
126 ret->URI = NULL; /* to be computed by the layer knowing
127 the defining entity */
128 ret->orig = NULL;
129
130 if (xmlHashAddEntry(table, name, ret)) {
131 /*
132 * entity was already defined at another level.
133 */
134 xmlFreeEntity(ret);
135 return(NULL);
136 }
137 return(ret);
138}
139
140/**
141 * xmlInitializePredefinedEntities:
142 *
143 * Set up the predefined entities.
144 */
145void xmlInitializePredefinedEntities(void) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000146 unsigned int i;
Owen Taylor3473f882001-02-23 17:55:21 +0000147 xmlChar name[50];
148 xmlChar value[50];
149 const char *in;
150 xmlChar *out;
151
152 if (xmlPredefinedEntities != NULL) return;
153
154 xmlPredefinedEntities = xmlCreateEntitiesTable();
155 for (i = 0;i < sizeof(xmlPredefinedEntityValues) /
156 sizeof(xmlPredefinedEntityValues[0]);i++) {
157 in = xmlPredefinedEntityValues[i].name;
158 out = &name[0];
159 for (;(*out++ = (xmlChar) *in);)in++;
160 in = xmlPredefinedEntityValues[i].value;
161 out = &value[0];
162 for (;(*out++ = (xmlChar) *in);)in++;
163
164 xmlAddEntity(NULL, (const xmlChar *) &name[0],
165 XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
166 &value[0]);
167 }
168}
169
170/**
171 * xmlCleanupPredefinedEntities:
172 *
173 * Cleanup up the predefined entities table.
174 */
175void xmlCleanupPredefinedEntities(void) {
176 if (xmlPredefinedEntities == NULL) return;
177
178 xmlFreeEntitiesTable(xmlPredefinedEntities);
179 xmlPredefinedEntities = NULL;
180}
181
182/**
183 * xmlGetPredefinedEntity:
184 * @name: the entity name
185 *
186 * Check whether this name is an predefined entity.
187 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000188 * Returns NULL if not, otherwise the entity
Owen Taylor3473f882001-02-23 17:55:21 +0000189 */
190xmlEntityPtr
191xmlGetPredefinedEntity(const xmlChar *name) {
192 if (xmlPredefinedEntities == NULL)
193 xmlInitializePredefinedEntities();
194 return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name));
195}
196
197/**
198 * xmlAddDtdEntity:
199 * @doc: the document
200 * @name: the entity name
201 * @type: the entity type XML_xxx_yyy_ENTITY
202 * @ExternalID: the entity external ID if available
203 * @SystemID: the entity system ID if available
204 * @content: the entity content
205 *
206 * Register a new entity for this document DTD external subset.
207 *
208 * Returns a pointer to the entity or NULL in case of error
209 */
210xmlEntityPtr
211xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
212 const xmlChar *ExternalID, const xmlChar *SystemID,
213 const xmlChar *content) {
214 xmlEntityPtr ret;
215 xmlDtdPtr dtd;
216
217 if (doc == NULL) {
218 xmlGenericError(xmlGenericErrorContext,
219 "xmlAddDtdEntity: doc == NULL !\n");
220 return(NULL);
221 }
222 if (doc->extSubset == NULL) {
223 xmlGenericError(xmlGenericErrorContext,
224 "xmlAddDtdEntity: document without external subset !\n");
225 return(NULL);
226 }
227 dtd = doc->extSubset;
228 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
229 if (ret == NULL) return(NULL);
230
231 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000232 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000233 */
234 ret->parent = dtd;
235 ret->doc = dtd->doc;
236 if (dtd->last == NULL) {
237 dtd->children = dtd->last = (xmlNodePtr) ret;
238 } else {
239 dtd->last->next = (xmlNodePtr) ret;
240 ret->prev = dtd->last;
241 dtd->last = (xmlNodePtr) ret;
242 }
243 return(ret);
244}
245
246/**
247 * xmlAddDocEntity:
248 * @doc: the document
249 * @name: the entity name
250 * @type: the entity type XML_xxx_yyy_ENTITY
251 * @ExternalID: the entity external ID if available
252 * @SystemID: the entity system ID if available
253 * @content: the entity content
254 *
255 * Register a new entity for this document.
256 *
257 * Returns a pointer to the entity or NULL in case of error
258 */
259xmlEntityPtr
260xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
261 const xmlChar *ExternalID, const xmlChar *SystemID,
262 const xmlChar *content) {
263 xmlEntityPtr ret;
264 xmlDtdPtr dtd;
265
266 if (doc == NULL) {
267 xmlGenericError(xmlGenericErrorContext,
268 "xmlAddDocEntity: document is NULL !\n");
269 return(NULL);
270 }
271 if (doc->intSubset == NULL) {
272 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000273 "xmlAddDocEntity: document without internal subset !\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000274 return(NULL);
275 }
276 dtd = doc->intSubset;
277 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
278 if (ret == NULL) return(NULL);
279
280 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000281 * Link it to the DTD
Owen Taylor3473f882001-02-23 17:55:21 +0000282 */
283 ret->parent = dtd;
284 ret->doc = dtd->doc;
285 if (dtd->last == NULL) {
286 dtd->children = dtd->last = (xmlNodePtr) ret;
287 } else {
288 dtd->last->next = (xmlNodePtr) ret;
289 ret->prev = dtd->last;
290 dtd->last = (xmlNodePtr) ret;
291 }
292 return(ret);
293}
294
295/**
296 * xmlGetEntityFromTable:
297 * @table: an entity table
298 * @name: the entity name
299 * @parameter: look for parameter entities
300 *
301 * Do an entity lookup in the table.
302 * returns the corresponding parameter entity, if found.
303 *
304 * Returns A pointer to the entity structure or NULL if not found.
305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000306static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000307xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
308 return((xmlEntityPtr) xmlHashLookup(table, name));
309}
310
311/**
312 * xmlGetParameterEntity:
313 * @doc: the document referencing the entity
314 * @name: the entity name
315 *
316 * Do an entity lookup in the internal and external subsets and
317 * returns the corresponding parameter entity, if found.
318 *
319 * Returns A pointer to the entity structure or NULL if not found.
320 */
321xmlEntityPtr
322xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
323 xmlEntitiesTablePtr table;
324 xmlEntityPtr ret;
325
Daniel Veillard36065812002-01-24 15:02:46 +0000326 if (doc == NULL)
327 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000328 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
329 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
330 ret = xmlGetEntityFromTable(table, name);
331 if (ret != NULL)
332 return(ret);
333 }
334 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
335 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
336 return(xmlGetEntityFromTable(table, name));
337 }
338 return(NULL);
339}
340
341/**
342 * xmlGetDtdEntity:
343 * @doc: the document referencing the entity
344 * @name: the entity name
345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000346 * Do an entity lookup in the DTD entity hash table and
Owen Taylor3473f882001-02-23 17:55:21 +0000347 * returns the corresponding entity, if found.
Daniel Veillard36065812002-01-24 15:02:46 +0000348 * Note: the first argument is the document node, not the DTD node.
Owen Taylor3473f882001-02-23 17:55:21 +0000349 *
350 * Returns A pointer to the entity structure or NULL if not found.
351 */
352xmlEntityPtr
353xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
354 xmlEntitiesTablePtr table;
355
Daniel Veillard36065812002-01-24 15:02:46 +0000356 if (doc == NULL)
357 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +0000358 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
359 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
360 return(xmlGetEntityFromTable(table, name));
361 }
362 return(NULL);
363}
364
365/**
366 * xmlGetDocEntity:
367 * @doc: the document referencing the entity
368 * @name: the entity name
369 *
370 * Do an entity lookup in the document entity hash table and
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000371 * returns the corresponding entity, otherwise a lookup is done
Owen Taylor3473f882001-02-23 17:55:21 +0000372 * in the predefined entities too.
373 *
374 * Returns A pointer to the entity structure or NULL if not found.
375 */
376xmlEntityPtr
377xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
378 xmlEntityPtr cur;
379 xmlEntitiesTablePtr table;
380
381 if (doc != NULL) {
382 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
383 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
384 cur = xmlGetEntityFromTable(table, name);
385 if (cur != NULL)
386 return(cur);
387 }
Daniel Veillard28757702002-02-18 11:19:30 +0000388 if (doc->standalone != 1) {
389 if ((doc->extSubset != NULL) &&
390 (doc->extSubset->entities != NULL)) {
391 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
392 cur = xmlGetEntityFromTable(table, name);
393 if (cur != NULL)
394 return(cur);
395 }
Owen Taylor3473f882001-02-23 17:55:21 +0000396 }
397 }
398 if (xmlPredefinedEntities == NULL)
399 xmlInitializePredefinedEntities();
400 table = xmlPredefinedEntities;
401 return(xmlGetEntityFromTable(table, name));
402}
403
404/*
405 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
406 * | [#x10000-#x10FFFF]
407 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
408 */
409#define IS_CHAR(c) \
410 (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
411 (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
412
413/*
414 * A buffer used for converting entities to their equivalent and back.
415 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000416static int static_buffer_size = 0;
417static xmlChar *static_buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +0000418
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000419static int growBuffer(void) {
420 static_buffer_size *= 2;
421 static_buffer = (xmlChar *) xmlRealloc(static_buffer, static_buffer_size * sizeof(xmlChar));
422 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000423 perror("realloc failed");
424 return(-1);
425 }
426 return(0);
427}
428
429
430/**
431 * xmlEncodeEntities:
432 * @doc: the document containing the string
433 * @input: A string to convert to XML.
434 *
435 * Do a global encoding of a string, replacing the predefined entities
436 * and non ASCII values with their entities and CharRef counterparts.
437 *
438 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
439 * compatibility
440 *
441 * People must migrate their code to xmlEncodeEntitiesReentrant !
442 * This routine will issue a warning when encountered.
443 *
444 * Returns A newly allocated string with the substitution done.
445 */
446const xmlChar *
447xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
448 const xmlChar *cur = input;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000449 xmlChar *out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000450 static int warning = 1;
451 int html = 0;
452
453
454 if (warning) {
455 xmlGenericError(xmlGenericErrorContext,
456 "Deprecated API xmlEncodeEntities() used\n");
457 xmlGenericError(xmlGenericErrorContext,
458 " change code to use xmlEncodeEntitiesReentrant()\n");
459 warning = 0;
460 }
461
462 if (input == NULL) return(NULL);
463 if (doc != NULL)
464 html = (doc->type == XML_HTML_DOCUMENT_NODE);
465
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000466 if (static_buffer == NULL) {
467 static_buffer_size = 1000;
468 static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
469 if (static_buffer == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +0000470 perror("malloc failed");
471 return(NULL);
472 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000473 out = static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000474 }
475 while (*cur != '\0') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000476 if (out - static_buffer > static_buffer_size - 100) {
477 int indx = out - static_buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000478
479 growBuffer();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000480 out = &static_buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000481 }
482
483 /*
484 * By default one have to encode at least '<', '>', '"' and '&' !
485 */
486 if (*cur == '<') {
487 *out++ = '&';
488 *out++ = 'l';
489 *out++ = 't';
490 *out++ = ';';
491 } else if (*cur == '>') {
492 *out++ = '&';
493 *out++ = 'g';
494 *out++ = 't';
495 *out++ = ';';
496 } else if (*cur == '&') {
497 *out++ = '&';
498 *out++ = 'a';
499 *out++ = 'm';
500 *out++ = 'p';
501 *out++ = ';';
502 } else if (*cur == '"') {
503 *out++ = '&';
504 *out++ = 'q';
505 *out++ = 'u';
506 *out++ = 'o';
507 *out++ = 't';
508 *out++ = ';';
509 } else if ((*cur == '\'') && (!html)) {
510 *out++ = '&';
511 *out++ = 'a';
512 *out++ = 'p';
513 *out++ = 'o';
514 *out++ = 's';
515 *out++ = ';';
516 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
517 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
518 /*
519 * default case, just copy !
520 */
521 *out++ = *cur;
522#ifndef USE_UTF_8
523 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
524 char buf[10], *ptr;
525
Owen Taylor3473f882001-02-23 17:55:21 +0000526 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000527 buf[sizeof(buf) - 1] = 0;
528 ptr = buf;
529 while (*ptr != 0) *out++ = *ptr++;
530#endif
531 } else if (IS_CHAR(*cur)) {
532 char buf[10], *ptr;
533
Owen Taylor3473f882001-02-23 17:55:21 +0000534 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000535 buf[sizeof(buf) - 1] = 0;
536 ptr = buf;
537 while (*ptr != 0) *out++ = *ptr++;
538 }
539#if 0
540 else {
541 /*
542 * default case, this is not a valid char !
543 * Skip it...
544 */
545 xmlGenericError(xmlGenericErrorContext,
546 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
547 }
548#endif
549 cur++;
550 }
551 *out++ = 0;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000552 return(static_buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000553}
554
555/*
556 * Macro used to grow the current buffer.
557 */
558#define growBufferReentrant() { \
559 buffer_size *= 2; \
560 buffer = (xmlChar *) \
561 xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \
562 if (buffer == NULL) { \
563 perror("realloc failed"); \
564 return(NULL); \
565 } \
566}
567
568
569/**
570 * xmlEncodeEntitiesReentrant:
571 * @doc: the document containing the string
572 * @input: A string to convert to XML.
573 *
574 * Do a global encoding of a string, replacing the predefined entities
575 * and non ASCII values with their entities and CharRef counterparts.
576 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
577 * must be deallocated.
578 *
579 * Returns A newly allocated string with the substitution done.
580 */
581xmlChar *
582xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
583 const xmlChar *cur = input;
584 xmlChar *buffer = NULL;
585 xmlChar *out = NULL;
586 int buffer_size = 0;
587 int html = 0;
588
589 if (input == NULL) return(NULL);
590 if (doc != NULL)
591 html = (doc->type == XML_HTML_DOCUMENT_NODE);
592
593 /*
594 * allocate an translation buffer.
595 */
596 buffer_size = 1000;
597 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
598 if (buffer == NULL) {
599 perror("malloc failed");
600 return(NULL);
601 }
602 out = buffer;
603
604 while (*cur != '\0') {
605 if (out - buffer > buffer_size - 100) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000606 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000607
608 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000609 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000610 }
611
612 /*
613 * By default one have to encode at least '<', '>', '"' and '&' !
614 */
615 if (*cur == '<') {
616 *out++ = '&';
617 *out++ = 'l';
618 *out++ = 't';
619 *out++ = ';';
620 } else if (*cur == '>') {
621 *out++ = '&';
622 *out++ = 'g';
623 *out++ = 't';
624 *out++ = ';';
625 } else if (*cur == '&') {
626 *out++ = '&';
627 *out++ = 'a';
628 *out++ = 'm';
629 *out++ = 'p';
630 *out++ = ';';
631 } else if (*cur == '"') {
632 *out++ = '&';
633 *out++ = 'q';
634 *out++ = 'u';
635 *out++ = 'o';
636 *out++ = 't';
637 *out++ = ';';
638#if 0
639 } else if ((*cur == '\'') && (!html)) {
640 *out++ = '&';
641 *out++ = 'a';
642 *out++ = 'p';
643 *out++ = 'o';
644 *out++ = 's';
645 *out++ = ';';
646#endif
647 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
648 (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
649 /*
650 * default case, just copy !
651 */
652 *out++ = *cur;
653 } else if (*cur >= 0x80) {
Daniel Veillard122376b2001-04-24 12:12:30 +0000654 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
Owen Taylor3473f882001-02-23 17:55:21 +0000655 /*
656 * Bjørn Reese <br@sseusa.com> provided the patch
657 xmlChar xc;
658 xc = (*cur & 0x3F) << 6;
659 if (cur[1] != 0) {
660 xc += *(++cur) & 0x3F;
661 *out++ = xc;
662 } else
663 */
664 *out++ = *cur;
665 } else {
666 /*
667 * We assume we have UTF-8 input.
668 */
669 char buf[10], *ptr;
670 int val = 0, l = 1;
671
672 if (*cur < 0xC0) {
673 xmlGenericError(xmlGenericErrorContext,
674 "xmlEncodeEntitiesReentrant : input not UTF-8\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000675 if (doc != NULL)
676 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000677 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000678 buf[sizeof(buf) - 1] = 0;
679 ptr = buf;
680 while (*ptr != 0) *out++ = *ptr++;
Daniel Veillard05c13a22001-09-09 08:38:09 +0000681 cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000682 continue;
683 } else if (*cur < 0xE0) {
684 val = (cur[0]) & 0x1F;
685 val <<= 6;
686 val |= (cur[1]) & 0x3F;
687 l = 2;
688 } else if (*cur < 0xF0) {
689 val = (cur[0]) & 0x0F;
690 val <<= 6;
691 val |= (cur[1]) & 0x3F;
692 val <<= 6;
693 val |= (cur[2]) & 0x3F;
694 l = 3;
695 } else if (*cur < 0xF8) {
696 val = (cur[0]) & 0x07;
697 val <<= 6;
698 val |= (cur[1]) & 0x3F;
699 val <<= 6;
700 val |= (cur[2]) & 0x3F;
701 val <<= 6;
702 val |= (cur[3]) & 0x3F;
703 l = 4;
704 }
705 if ((l == 1) || (!IS_CHAR(val))) {
706 xmlGenericError(xmlGenericErrorContext,
707 "xmlEncodeEntitiesReentrant : char out of range\n");
Daniel Veillard122376b2001-04-24 12:12:30 +0000708 if (doc != NULL)
709 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
Owen Taylor3473f882001-02-23 17:55:21 +0000710 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000711 buf[sizeof(buf) - 1] = 0;
712 ptr = buf;
713 while (*ptr != 0) *out++ = *ptr++;
714 cur++;
715 continue;
716 }
717 /*
718 * We could do multiple things here. Just save as a char ref
719 */
Daniel Veillard16698282001-09-14 10:29:27 +0000720 if (html)
721 snprintf(buf, sizeof(buf), "&#%d;", val);
722 else
723 snprintf(buf, sizeof(buf), "&#x%X;", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000724 buf[sizeof(buf) - 1] = 0;
725 ptr = buf;
726 while (*ptr != 0) *out++ = *ptr++;
727 cur += l;
728 continue;
729 }
730 } else if (IS_CHAR(*cur)) {
731 char buf[10], *ptr;
732
Owen Taylor3473f882001-02-23 17:55:21 +0000733 snprintf(buf, sizeof(buf), "&#%d;", *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000734 buf[sizeof(buf) - 1] = 0;
735 ptr = buf;
736 while (*ptr != 0) *out++ = *ptr++;
737 }
738#if 0
739 else {
740 /*
741 * default case, this is not a valid char !
742 * Skip it...
743 */
744 xmlGenericError(xmlGenericErrorContext,
745 "xmlEncodeEntities: invalid char %d\n", (int) *cur);
746 }
747#endif
748 cur++;
749 }
750 *out++ = 0;
751 return(buffer);
752}
753
754/**
755 * xmlEncodeSpecialChars:
756 * @doc: the document containing the string
757 * @input: A string to convert to XML.
758 *
759 * Do a global encoding of a string, replacing the predefined entities
760 * this routine is reentrant, and result must be deallocated.
761 *
762 * Returns A newly allocated string with the substitution done.
763 */
764xmlChar *
765xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) {
766 const xmlChar *cur = input;
767 xmlChar *buffer = NULL;
768 xmlChar *out = NULL;
769 int buffer_size = 0;
770 int html = 0;
771
772 if (input == NULL) return(NULL);
773 if (doc != NULL)
774 html = (doc->type == XML_HTML_DOCUMENT_NODE);
775
776 /*
777 * allocate an translation buffer.
778 */
779 buffer_size = 1000;
780 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
781 if (buffer == NULL) {
782 perror("malloc failed");
783 return(NULL);
784 }
785 out = buffer;
786
787 while (*cur != '\0') {
788 if (out - buffer > buffer_size - 10) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000789 int indx = out - buffer;
Owen Taylor3473f882001-02-23 17:55:21 +0000790
791 growBufferReentrant();
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000792 out = &buffer[indx];
Owen Taylor3473f882001-02-23 17:55:21 +0000793 }
794
795 /*
796 * By default one have to encode at least '<', '>', '"' and '&' !
797 */
798 if (*cur == '<') {
799 *out++ = '&';
800 *out++ = 'l';
801 *out++ = 't';
802 *out++ = ';';
803 } else if (*cur == '>') {
804 *out++ = '&';
805 *out++ = 'g';
806 *out++ = 't';
807 *out++ = ';';
808 } else if (*cur == '&') {
809 *out++ = '&';
810 *out++ = 'a';
811 *out++ = 'm';
812 *out++ = 'p';
813 *out++ = ';';
814 } else if (*cur == '"') {
815 *out++ = '&';
816 *out++ = 'q';
817 *out++ = 'u';
818 *out++ = 'o';
819 *out++ = 't';
820 *out++ = ';';
821 } else {
822 /*
823 * Works because on UTF-8, all extended sequences cannot
824 * result in bytes in the ASCII range.
825 */
826 *out++ = *cur;
827 }
828 cur++;
829 }
830 *out++ = 0;
831 return(buffer);
832}
833
834/**
835 * xmlCreateEntitiesTable:
836 *
837 * create and initialize an empty entities hash table.
838 *
839 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
840 */
841xmlEntitiesTablePtr
842xmlCreateEntitiesTable(void) {
843 return((xmlEntitiesTablePtr) xmlHashCreate(0));
844}
845
846/**
847 * xmlFreeEntitiesTable:
848 * @table: An entity table
849 *
850 * Deallocate the memory used by an entities hash table.
851 */
852void
853xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
854 xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity);
855}
856
857/**
858 * xmlCopyEntity:
859 * @ent: An entity
860 *
861 * Build a copy of an entity
862 *
863 * Returns the new xmlEntitiesPtr or NULL in case of error.
864 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000865static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +0000866xmlCopyEntity(xmlEntityPtr ent) {
867 xmlEntityPtr cur;
868
869 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
870 if (cur == NULL) {
871 xmlGenericError(xmlGenericErrorContext,
872 "xmlCopyEntity: out of memory !\n");
873 return(NULL);
874 }
875 memset(cur, 0, sizeof(xmlEntity));
Daniel Veillard845cce42002-01-09 11:51:37 +0000876 cur->type = XML_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +0000877
878 cur->etype = ent->etype;
879 if (ent->name != NULL)
880 cur->name = xmlStrdup(ent->name);
881 if (ent->ExternalID != NULL)
882 cur->ExternalID = xmlStrdup(ent->ExternalID);
883 if (ent->SystemID != NULL)
884 cur->SystemID = xmlStrdup(ent->SystemID);
885 if (ent->content != NULL)
886 cur->content = xmlStrdup(ent->content);
887 if (ent->orig != NULL)
888 cur->orig = xmlStrdup(ent->orig);
Daniel Veillard8ee9c8f2002-01-26 21:42:58 +0000889 if (ent->URI != NULL)
890 cur->URI = xmlStrdup(ent->URI);
Owen Taylor3473f882001-02-23 17:55:21 +0000891 return(cur);
892}
893
894/**
895 * xmlCopyEntitiesTable:
896 * @table: An entity table
897 *
898 * Build a copy of an entity table.
899 *
900 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
901 */
902xmlEntitiesTablePtr
903xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
904 return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity));
905}
906
907/**
908 * xmlDumpEntityDecl:
909 * @buf: An XML buffer.
910 * @ent: An entity table
911 *
912 * This will dump the content of the entity table as an XML DTD definition
913 */
914void
915xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
916 switch (ent->etype) {
917 case XML_INTERNAL_GENERAL_ENTITY:
918 xmlBufferWriteChar(buf, "<!ENTITY ");
919 xmlBufferWriteCHAR(buf, ent->name);
920 xmlBufferWriteChar(buf, " ");
921 if (ent->orig != NULL)
922 xmlBufferWriteQuotedString(buf, ent->orig);
923 else
924 xmlBufferWriteQuotedString(buf, ent->content);
925 xmlBufferWriteChar(buf, ">\n");
926 break;
927 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
928 xmlBufferWriteChar(buf, "<!ENTITY ");
929 xmlBufferWriteCHAR(buf, ent->name);
930 if (ent->ExternalID != NULL) {
931 xmlBufferWriteChar(buf, " PUBLIC ");
932 xmlBufferWriteQuotedString(buf, ent->ExternalID);
933 xmlBufferWriteChar(buf, " ");
934 xmlBufferWriteQuotedString(buf, ent->SystemID);
935 } else {
936 xmlBufferWriteChar(buf, " SYSTEM ");
937 xmlBufferWriteQuotedString(buf, ent->SystemID);
938 }
939 xmlBufferWriteChar(buf, ">\n");
940 break;
941 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
942 xmlBufferWriteChar(buf, "<!ENTITY ");
943 xmlBufferWriteCHAR(buf, ent->name);
944 if (ent->ExternalID != NULL) {
945 xmlBufferWriteChar(buf, " PUBLIC ");
946 xmlBufferWriteQuotedString(buf, ent->ExternalID);
947 xmlBufferWriteChar(buf, " ");
948 xmlBufferWriteQuotedString(buf, ent->SystemID);
949 } else {
950 xmlBufferWriteChar(buf, " SYSTEM ");
951 xmlBufferWriteQuotedString(buf, ent->SystemID);
952 }
953 if (ent->content != NULL) { /* Should be true ! */
954 xmlBufferWriteChar(buf, " NDATA ");
955 if (ent->orig != NULL)
956 xmlBufferWriteCHAR(buf, ent->orig);
957 else
958 xmlBufferWriteCHAR(buf, ent->content);
959 }
960 xmlBufferWriteChar(buf, ">\n");
961 break;
962 case XML_INTERNAL_PARAMETER_ENTITY:
963 xmlBufferWriteChar(buf, "<!ENTITY % ");
964 xmlBufferWriteCHAR(buf, ent->name);
965 xmlBufferWriteChar(buf, " ");
966 if (ent->orig == NULL)
967 xmlBufferWriteQuotedString(buf, ent->content);
968 else
969 xmlBufferWriteQuotedString(buf, ent->orig);
970 xmlBufferWriteChar(buf, ">\n");
971 break;
972 case XML_EXTERNAL_PARAMETER_ENTITY:
973 xmlBufferWriteChar(buf, "<!ENTITY % ");
974 xmlBufferWriteCHAR(buf, ent->name);
975 if (ent->ExternalID != NULL) {
976 xmlBufferWriteChar(buf, " PUBLIC ");
977 xmlBufferWriteQuotedString(buf, ent->ExternalID);
978 xmlBufferWriteChar(buf, " ");
979 xmlBufferWriteQuotedString(buf, ent->SystemID);
980 } else {
981 xmlBufferWriteChar(buf, " SYSTEM ");
982 xmlBufferWriteQuotedString(buf, ent->SystemID);
983 }
984 xmlBufferWriteChar(buf, ">\n");
985 break;
986 default:
987 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000988 "xmlDumpEntitiesDecl: internal: unknown type %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000989 ent->etype);
990 }
991}
992
993/**
994 * xmlDumpEntitiesTable:
995 * @buf: An XML buffer.
996 * @table: An entity table
997 *
998 * This will dump the content of the entity table as an XML DTD definition
999 */
1000void
1001xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1002 xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf);
1003}