blob: 412838794d703b18a02f8526699ec4e5cf1c81d9 [file] [log] [blame]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003*/
4
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005#define XML_BUILDING_EXPAT 1
6
7#ifdef COMPILED_FROM_DSP
8#include "winconfig.h"
9#elif defined(MACOS_CLASSIC)
10#include "macconfig.h"
11#elif defined(__amigaos__)
12#include "amigaconfig.h"
13#elif defined(__WATCOMC__)
14#include "watcomconfig.h"
15#elif defined(HAVE_EXPAT_CONFIG_H)
16#include <expat_config.h>
17#endif /* ndef COMPILED_FROM_DSP */
18
Christian Heimesaa152762013-12-06 23:43:50 +010019#include <stddef.h>
20#include <string.h> /* memset(), memcpy() */
21#include <assert.h>
22#include <limits.h> /* UINT_MAX */
23#include <time.h> /* time() */
24
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070025#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000026#include "expat.h"
27
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000028#ifdef XML_UNICODE
29#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
30#define XmlConvert XmlUtf16Convert
31#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
32#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
33#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070034/* Using pointer subtraction to convert to integer type. */
35#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000036typedef unsigned short ICHAR;
37#else
38#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
39#define XmlConvert XmlUtf8Convert
40#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
41#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
42#define XmlEncode XmlUtf8Encode
43#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
44typedef char ICHAR;
45#endif
46
47
48#ifndef XML_NS
49
50#define XmlInitEncodingNS XmlInitEncoding
51#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
52#undef XmlGetInternalEncodingNS
53#define XmlGetInternalEncodingNS XmlGetInternalEncoding
54#define XmlParseXmlDeclNS XmlParseXmlDecl
55
56#endif
57
Martin v. Löwisfc03a942003-01-25 22:41:29 +000058#ifdef XML_UNICODE
59
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000060#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +000061#define XML_T(x) (const wchar_t)x
62#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000063#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +000064#define XML_T(x) (const unsigned short)x
65#define XML_L(x) x
66#endif
67
68#else
69
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000070#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +000071#define XML_L(x) x
72
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000073#endif
74
75/* Round up n to be a multiple of sz, where sz is a power of 2. */
76#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
77
Fred Drake08317ae2003-10-21 15:38:55 +000078/* Handle the case where memmove() doesn't exist. */
79#ifndef HAVE_MEMMOVE
80#ifdef HAVE_BCOPY
81#define memmove(d,s,l) bcopy((s),(d),(l))
82#else
83#error memmove does not exist on this platform, nor is a substitute available
84#endif /* HAVE_BCOPY */
85#endif /* HAVE_MEMMOVE */
86
Martin v. Löwisfc03a942003-01-25 22:41:29 +000087#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000088#include "xmltok.h"
89#include "xmlrole.h"
90
91typedef const XML_Char *KEY;
92
93typedef struct {
94 KEY name;
95} NAMED;
96
97typedef struct {
98 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +000099 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000100 size_t size;
101 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000102 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000103} HASH_TABLE;
104
Fred Drake08317ae2003-10-21 15:38:55 +0000105/* Basic character hash algorithm, taken from Python's string hash:
106 h = h * 1000003 ^ character, the constant being a prime number.
107
108*/
109#ifdef XML_UNICODE
110#define CHAR_HASH(h, c) \
111 (((h) * 0xF4243) ^ (unsigned short)(c))
112#else
113#define CHAR_HASH(h, c) \
114 (((h) * 0xF4243) ^ (unsigned char)(c))
115#endif
116
117/* For probing (after a collision) we need a step size relative prime
118 to the hash table size, which is a power of 2. We use double-hashing,
119 since we can calculate a second hash value cheaply by taking those bits
120 of the first hash value that were discarded (masked out) when the table
121 index was calculated: index = hash & mask, where mask = table->size - 1.
122 We limit the maximum step size to table->size / 4 (mask >> 2) and make
123 it odd, since odd numbers are always relative prime to a power of 2.
124*/
125#define SECOND_HASH(hash, mask, power) \
126 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
127#define PROBE_STEP(hash, mask, power) \
128 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
129
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000130typedef struct {
131 NAMED **p;
132 NAMED **end;
133} HASH_TABLE_ITER;
134
135#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
136#define INIT_DATA_BUF_SIZE 1024
137#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000138#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000139#define INIT_BLOCK_SIZE 1024
140#define INIT_BUFFER_SIZE 1024
141
142#define EXPAND_SPARE 24
143
144typedef struct binding {
145 struct prefix *prefix;
146 struct binding *nextTagBinding;
147 struct binding *prevPrefixBinding;
148 const struct attribute_id *attId;
149 XML_Char *uri;
150 int uriLen;
151 int uriAlloc;
152} BINDING;
153
154typedef struct prefix {
155 const XML_Char *name;
156 BINDING *binding;
157} PREFIX;
158
159typedef struct {
160 const XML_Char *str;
161 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000162 const XML_Char *prefix;
163 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000164 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000165 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000166} TAG_NAME;
167
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000168/* TAG represents an open element.
169 The name of the element is stored in both the document and API
170 encodings. The memory buffer 'buf' is a separately-allocated
171 memory area which stores the name. During the XML_Parse()/
172 XMLParseBuffer() when the element is open, the memory for the 'raw'
173 version of the name (in the document encoding) is shared with the
174 document buffer. If the element is open across calls to
175 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
176 contain the 'raw' name as well.
177
178 A parser re-uses these structures, maintaining a list of allocated
179 TAG objects in a free list.
180*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000181typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000182 struct tag *parent; /* parent of this element */
183 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000184 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000185 TAG_NAME name; /* tagName in the API encoding */
186 char *buf; /* buffer for name components */
187 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000188 BINDING *bindings;
189} TAG;
190
191typedef struct {
192 const XML_Char *name;
193 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000194 int textLen; /* length in XML_Chars */
195 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000196 const XML_Char *systemId;
197 const XML_Char *base;
198 const XML_Char *publicId;
199 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000200 XML_Bool open;
201 XML_Bool is_param;
202 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000203} ENTITY;
204
205typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000206 enum XML_Content_Type type;
207 enum XML_Content_Quant quant;
208 const XML_Char * name;
209 int firstchild;
210 int lastchild;
211 int childcnt;
212 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000213} CONTENT_SCAFFOLD;
214
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000215#define INIT_SCAFFOLD_ELEMENTS 32
216
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000217typedef struct block {
218 struct block *next;
219 int size;
220 XML_Char s[1];
221} BLOCK;
222
223typedef struct {
224 BLOCK *blocks;
225 BLOCK *freeBlocks;
226 const XML_Char *end;
227 XML_Char *ptr;
228 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000229 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000230} STRING_POOL;
231
232/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000233 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000234typedef struct attribute_id {
235 XML_Char *name;
236 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000237 XML_Bool maybeTokenized;
238 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000239} ATTRIBUTE_ID;
240
241typedef struct {
242 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000243 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000244 const XML_Char *value;
245} DEFAULT_ATTRIBUTE;
246
247typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000248 unsigned long version;
249 unsigned long hash;
250 const XML_Char *uriName;
251} NS_ATT;
252
253typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000254 const XML_Char *name;
255 PREFIX *prefix;
256 const ATTRIBUTE_ID *idAtt;
257 int nDefaultAtts;
258 int allocDefaultAtts;
259 DEFAULT_ATTRIBUTE *defaultAtts;
260} ELEMENT_TYPE;
261
262typedef struct {
263 HASH_TABLE generalEntities;
264 HASH_TABLE elementTypes;
265 HASH_TABLE attributeIds;
266 HASH_TABLE prefixes;
267 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000268 STRING_POOL entityValuePool;
269 /* false once a parameter entity reference has been skipped */
270 XML_Bool keepProcessing;
271 /* true once an internal or external PE reference has been encountered;
272 this includes the reference to an external subset */
273 XML_Bool hasParamEntityRefs;
274 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000275#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000276 /* indicates if external PE has been read */
277 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000278 HASH_TABLE paramEntities;
279#endif /* XML_DTD */
280 PREFIX defaultPrefix;
281 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000282 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000283 CONTENT_SCAFFOLD *scaffold;
284 unsigned contentStringLen;
285 unsigned scaffSize;
286 unsigned scaffCount;
287 int scaffLevel;
288 int *scaffIndex;
289} DTD;
290
291typedef struct open_internal_entity {
292 const char *internalEventPtr;
293 const char *internalEventEndPtr;
294 struct open_internal_entity *next;
295 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000296 int startTagLevel;
297 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000298} OPEN_INTERNAL_ENTITY;
299
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000300typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
301 const char *start,
302 const char *end,
303 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000304
305static Processor prologProcessor;
306static Processor prologInitProcessor;
307static Processor contentProcessor;
308static Processor cdataSectionProcessor;
309#ifdef XML_DTD
310static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000311static Processor externalParEntProcessor;
312static Processor externalParEntInitProcessor;
313static Processor entityValueProcessor;
314static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000315#endif /* XML_DTD */
316static Processor epilogProcessor;
317static Processor errorProcessor;
318static Processor externalEntityInitProcessor;
319static Processor externalEntityInitProcessor2;
320static Processor externalEntityInitProcessor3;
321static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000322static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000323
324static enum XML_Error
325handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
326static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000327processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000328 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000329static enum XML_Error
330initializeEncoding(XML_Parser parser);
331static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700332doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
333 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000334 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000335static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700336processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000337 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000338static enum XML_Error
339doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700340 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000341 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000342static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000343doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000344 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000345#ifdef XML_DTD
346static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000347doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000348 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000349#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000350
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000351static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000352storeAtts(XML_Parser parser, const ENCODING *, const char *s,
353 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000354static enum XML_Error
355addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
356 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000357static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700358defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000359 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000360static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000361storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
362 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000363static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000364appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
365 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000366static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000367getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
368 const char *end);
369static int
370setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000371static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000372storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
373 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000374static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000375reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
376 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000377static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000378reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
379 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000380static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000381reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
382 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000383
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000384static const XML_Char * getContext(XML_Parser parser);
385static XML_Bool
386setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000387
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000388static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000389
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000390static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
391/* do not call if parentParser != NULL */
392static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
393static void
394dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
395static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700396dtdCopy(XML_Parser oldParser,
397 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000398static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700399copyEntityTable(XML_Parser oldParser,
400 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000401static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700402lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000403static void FASTCALL
404hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
405static void FASTCALL hashTableClear(HASH_TABLE *);
406static void FASTCALL hashTableDestroy(HASH_TABLE *);
407static void FASTCALL
408hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
409static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000410
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000411static void FASTCALL
412poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
413static void FASTCALL poolClear(STRING_POOL *);
414static void FASTCALL poolDestroy(STRING_POOL *);
415static XML_Char *
416poolAppend(STRING_POOL *pool, const ENCODING *enc,
417 const char *ptr, const char *end);
418static XML_Char *
419poolStoreString(STRING_POOL *pool, const ENCODING *enc,
420 const char *ptr, const char *end);
421static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
422static const XML_Char * FASTCALL
423poolCopyString(STRING_POOL *pool, const XML_Char *s);
424static const XML_Char *
425poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
426static const XML_Char * FASTCALL
427poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000428
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000429static int FASTCALL nextScaffoldPart(XML_Parser parser);
430static XML_Content * build_model(XML_Parser parser);
431static ELEMENT_TYPE *
432getElementType(XML_Parser parser, const ENCODING *enc,
433 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000434
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700435static unsigned long generate_hash_secret_salt(void);
436static XML_Bool startParsing(XML_Parser parser);
437
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000438static XML_Parser
439parserCreate(const XML_Char *encodingName,
440 const XML_Memory_Handling_Suite *memsuite,
441 const XML_Char *nameSep,
442 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700443
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000444static void
445parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000446
447#define poolStart(pool) ((pool)->start)
448#define poolEnd(pool) ((pool)->ptr)
449#define poolLength(pool) ((pool)->ptr - (pool)->start)
450#define poolChop(pool) ((void)--(pool->ptr))
451#define poolLastChar(pool) (((pool)->ptr)[-1])
452#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
453#define poolFinish(pool) ((pool)->start = (pool)->ptr)
454#define poolAppendChar(pool, c) \
455 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
456 ? 0 \
457 : ((*((pool)->ptr)++ = c), 1))
458
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000459struct XML_ParserStruct {
460 /* The first member must be userData so that the XML_GetUserData
461 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000462 void *m_userData;
463 void *m_handlerArg;
464 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000465 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000466 /* first character to be parsed */
467 const char *m_bufferPtr;
468 /* past last character to be parsed */
469 char *m_bufferEnd;
470 /* allocated end of buffer */
471 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000473 const char *m_parseEndPtr;
474 XML_Char *m_dataBuf;
475 XML_Char *m_dataBufEnd;
476 XML_StartElementHandler m_startElementHandler;
477 XML_EndElementHandler m_endElementHandler;
478 XML_CharacterDataHandler m_characterDataHandler;
479 XML_ProcessingInstructionHandler m_processingInstructionHandler;
480 XML_CommentHandler m_commentHandler;
481 XML_StartCdataSectionHandler m_startCdataSectionHandler;
482 XML_EndCdataSectionHandler m_endCdataSectionHandler;
483 XML_DefaultHandler m_defaultHandler;
484 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
485 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
486 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
487 XML_NotationDeclHandler m_notationDeclHandler;
488 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
489 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
490 XML_NotStandaloneHandler m_notStandaloneHandler;
491 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000492 XML_Parser m_externalEntityRefHandlerArg;
493 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000494 XML_UnknownEncodingHandler m_unknownEncodingHandler;
495 XML_ElementDeclHandler m_elementDeclHandler;
496 XML_AttlistDeclHandler m_attlistDeclHandler;
497 XML_EntityDeclHandler m_entityDeclHandler;
498 XML_XmlDeclHandler m_xmlDeclHandler;
499 const ENCODING *m_encoding;
500 INIT_ENCODING m_initEncoding;
501 const ENCODING *m_internalEncoding;
502 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000503 XML_Bool m_ns;
504 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000505 void *m_unknownEncodingMem;
506 void *m_unknownEncodingData;
507 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000508 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000509 PROLOG_STATE m_prologState;
510 Processor *m_processor;
511 enum XML_Error m_errorCode;
512 const char *m_eventPtr;
513 const char *m_eventEndPtr;
514 const char *m_positionPtr;
515 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000516 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000517 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000518 int m_tagLevel;
519 ENTITY *m_declEntity;
520 const XML_Char *m_doctypeName;
521 const XML_Char *m_doctypeSysid;
522 const XML_Char *m_doctypePubid;
523 const XML_Char *m_declAttributeType;
524 const XML_Char *m_declNotationName;
525 const XML_Char *m_declNotationPublicId;
526 ELEMENT_TYPE *m_declElementType;
527 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000528 XML_Bool m_declAttributeIsCdata;
529 XML_Bool m_declAttributeIsId;
530 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000531 const XML_Char *m_curBase;
532 TAG *m_tagStack;
533 TAG *m_freeTagList;
534 BINDING *m_inheritedBindings;
535 BINDING *m_freeBindingList;
536 int m_attsSize;
537 int m_nSpecifiedAtts;
538 int m_idAttIndex;
539 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000540 NS_ATT *m_nsAtts;
541 unsigned long m_nsAttsVersion;
542 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700543#ifdef XML_ATTR_INFO
544 XML_AttrInfo *m_attInfo;
545#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000546 POSITION m_position;
547 STRING_POOL m_tempPool;
548 STRING_POOL m_temp2Pool;
549 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000550 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000551 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000552 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000553 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000554#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000555 XML_Bool m_isParamEntity;
556 XML_Bool m_useForeignDTD;
557 enum XML_ParamEntityParsing m_paramEntityParsing;
558#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700559 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000560};
561
562#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
563#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
564#define FREE(p) (parser->m_mem.free_fcn((p)))
565
566#define userData (parser->m_userData)
567#define handlerArg (parser->m_handlerArg)
568#define startElementHandler (parser->m_startElementHandler)
569#define endElementHandler (parser->m_endElementHandler)
570#define characterDataHandler (parser->m_characterDataHandler)
571#define processingInstructionHandler \
572 (parser->m_processingInstructionHandler)
573#define commentHandler (parser->m_commentHandler)
574#define startCdataSectionHandler \
575 (parser->m_startCdataSectionHandler)
576#define endCdataSectionHandler (parser->m_endCdataSectionHandler)
577#define defaultHandler (parser->m_defaultHandler)
578#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
579#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
580#define unparsedEntityDeclHandler \
581 (parser->m_unparsedEntityDeclHandler)
582#define notationDeclHandler (parser->m_notationDeclHandler)
583#define startNamespaceDeclHandler \
584 (parser->m_startNamespaceDeclHandler)
585#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
586#define notStandaloneHandler (parser->m_notStandaloneHandler)
587#define externalEntityRefHandler \
588 (parser->m_externalEntityRefHandler)
589#define externalEntityRefHandlerArg \
590 (parser->m_externalEntityRefHandlerArg)
591#define internalEntityRefHandler \
592 (parser->m_internalEntityRefHandler)
593#define skippedEntityHandler (parser->m_skippedEntityHandler)
594#define unknownEncodingHandler (parser->m_unknownEncodingHandler)
595#define elementDeclHandler (parser->m_elementDeclHandler)
596#define attlistDeclHandler (parser->m_attlistDeclHandler)
597#define entityDeclHandler (parser->m_entityDeclHandler)
598#define xmlDeclHandler (parser->m_xmlDeclHandler)
599#define encoding (parser->m_encoding)
600#define initEncoding (parser->m_initEncoding)
601#define internalEncoding (parser->m_internalEncoding)
602#define unknownEncodingMem (parser->m_unknownEncodingMem)
603#define unknownEncodingData (parser->m_unknownEncodingData)
604#define unknownEncodingHandlerData \
605 (parser->m_unknownEncodingHandlerData)
606#define unknownEncodingRelease (parser->m_unknownEncodingRelease)
607#define protocolEncodingName (parser->m_protocolEncodingName)
608#define ns (parser->m_ns)
609#define ns_triplets (parser->m_ns_triplets)
610#define prologState (parser->m_prologState)
611#define processor (parser->m_processor)
612#define errorCode (parser->m_errorCode)
613#define eventPtr (parser->m_eventPtr)
614#define eventEndPtr (parser->m_eventEndPtr)
615#define positionPtr (parser->m_positionPtr)
616#define position (parser->m_position)
617#define openInternalEntities (parser->m_openInternalEntities)
Fred Drake31d485c2004-08-03 07:06:22 +0000618#define freeInternalEntities (parser->m_freeInternalEntities)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000619#define defaultExpandInternalEntities \
620 (parser->m_defaultExpandInternalEntities)
621#define tagLevel (parser->m_tagLevel)
622#define buffer (parser->m_buffer)
623#define bufferPtr (parser->m_bufferPtr)
624#define bufferEnd (parser->m_bufferEnd)
625#define parseEndByteIndex (parser->m_parseEndByteIndex)
626#define parseEndPtr (parser->m_parseEndPtr)
627#define bufferLim (parser->m_bufferLim)
628#define dataBuf (parser->m_dataBuf)
629#define dataBufEnd (parser->m_dataBufEnd)
630#define _dtd (parser->m_dtd)
631#define curBase (parser->m_curBase)
632#define declEntity (parser->m_declEntity)
633#define doctypeName (parser->m_doctypeName)
634#define doctypeSysid (parser->m_doctypeSysid)
635#define doctypePubid (parser->m_doctypePubid)
636#define declAttributeType (parser->m_declAttributeType)
637#define declNotationName (parser->m_declNotationName)
638#define declNotationPublicId (parser->m_declNotationPublicId)
639#define declElementType (parser->m_declElementType)
640#define declAttributeId (parser->m_declAttributeId)
641#define declAttributeIsCdata (parser->m_declAttributeIsCdata)
642#define declAttributeIsId (parser->m_declAttributeIsId)
643#define freeTagList (parser->m_freeTagList)
644#define freeBindingList (parser->m_freeBindingList)
645#define inheritedBindings (parser->m_inheritedBindings)
646#define tagStack (parser->m_tagStack)
647#define atts (parser->m_atts)
648#define attsSize (parser->m_attsSize)
649#define nSpecifiedAtts (parser->m_nSpecifiedAtts)
650#define idAttIndex (parser->m_idAttIndex)
Fred Drake08317ae2003-10-21 15:38:55 +0000651#define nsAtts (parser->m_nsAtts)
652#define nsAttsVersion (parser->m_nsAttsVersion)
653#define nsAttsPower (parser->m_nsAttsPower)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700654#define attInfo (parser->m_attInfo)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000655#define tempPool (parser->m_tempPool)
656#define temp2Pool (parser->m_temp2Pool)
657#define groupConnector (parser->m_groupConnector)
658#define groupSize (parser->m_groupSize)
659#define namespaceSeparator (parser->m_namespaceSeparator)
660#define parentParser (parser->m_parentParser)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000661#define ps_parsing (parser->m_parsingStatus.parsing)
662#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000663#ifdef XML_DTD
664#define isParamEntity (parser->m_isParamEntity)
665#define useForeignDTD (parser->m_useForeignDTD)
666#define paramEntityParsing (parser->m_paramEntityParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000667#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700668#define hash_secret_salt (parser->m_hash_secret_salt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000669
Fred Drake08317ae2003-10-21 15:38:55 +0000670XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000671XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000672{
673 return XML_ParserCreate_MM(encodingName, NULL, NULL);
674}
675
Fred Drake08317ae2003-10-21 15:38:55 +0000676XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000677XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000678{
679 XML_Char tmp[2];
680 *tmp = nsSep;
681 return XML_ParserCreate_MM(encodingName, NULL, tmp);
682}
683
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000684static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700685 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
686 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
687 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
688 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
689 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
690 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000691};
692
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700693static unsigned long
694generate_hash_secret_salt(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000695{
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700696 unsigned int seed = time(NULL) % UINT_MAX;
697 srand(seed);
698 return rand();
699}
700
701static XML_Bool /* only valid for root parser */
702startParsing(XML_Parser parser)
703{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700704 /* hash functions must be initialized before setContext() is called */
705 if (hash_secret_salt == 0)
706 hash_secret_salt = generate_hash_secret_salt();
707 if (ns) {
708 /* implicit context only set for root parser, since child
709 parsers (i.e. external entity parsers) will inherit it
710 */
711 return setContext(parser, implicitContext);
712 }
713 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700714}
715
716XML_Parser XMLCALL
717XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700718 const XML_Memory_Handling_Suite *memsuite,
719 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700720{
721 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000722}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000723
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000724static XML_Parser
725parserCreate(const XML_Char *encodingName,
726 const XML_Memory_Handling_Suite *memsuite,
727 const XML_Char *nameSep,
728 DTD *dtd)
729{
730 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000731
732 if (memsuite) {
733 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000734 parser = (XML_Parser)
735 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
736 if (parser != NULL) {
737 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
738 mtemp->malloc_fcn = memsuite->malloc_fcn;
739 mtemp->realloc_fcn = memsuite->realloc_fcn;
740 mtemp->free_fcn = memsuite->free_fcn;
741 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000742 }
743 else {
744 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000745 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
746 if (parser != NULL) {
747 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
748 mtemp->malloc_fcn = malloc;
749 mtemp->realloc_fcn = realloc;
750 mtemp->free_fcn = free;
751 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000752 }
753
754 if (!parser)
755 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000756
757 buffer = NULL;
758 bufferLim = NULL;
759
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000760 attsSize = INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000761 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
762 if (atts == NULL) {
763 FREE(parser);
764 return NULL;
765 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700766#ifdef XML_ATTR_INFO
767 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
768 if (attInfo == NULL) {
769 FREE(atts);
770 FREE(parser);
771 return NULL;
772 }
773#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000774 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
775 if (dataBuf == NULL) {
776 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700777#ifdef XML_ATTR_INFO
778 FREE(attInfo);
779#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000780 FREE(parser);
781 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000782 }
783 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
784
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000785 if (dtd)
786 _dtd = dtd;
787 else {
788 _dtd = dtdCreate(&parser->m_mem);
789 if (_dtd == NULL) {
790 FREE(dataBuf);
791 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700792#ifdef XML_ATTR_INFO
793 FREE(attInfo);
794#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000795 FREE(parser);
796 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000797 }
798 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000799
800 freeBindingList = NULL;
801 freeTagList = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +0000802 freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000803
804 groupSize = 0;
805 groupConnector = NULL;
806
807 unknownEncodingHandler = NULL;
808 unknownEncodingHandlerData = NULL;
809
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700810 namespaceSeparator = ASCII_EXCL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000811 ns = XML_FALSE;
812 ns_triplets = XML_FALSE;
813
Fred Drake08317ae2003-10-21 15:38:55 +0000814 nsAtts = NULL;
815 nsAttsVersion = 0;
816 nsAttsPower = 0;
817
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000818 poolInit(&tempPool, &(parser->m_mem));
819 poolInit(&temp2Pool, &(parser->m_mem));
820 parserInit(parser, encodingName);
821
822 if (encodingName && !protocolEncodingName) {
823 XML_ParserFree(parser);
824 return NULL;
825 }
826
827 if (nameSep) {
828 ns = XML_TRUE;
829 internalEncoding = XmlGetInternalEncodingNS();
830 namespaceSeparator = *nameSep;
831 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000832 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000833 internalEncoding = XmlGetInternalEncoding();
834 }
835
836 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000837}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000838
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000839static void
840parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000841{
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000842 processor = prologInitProcessor;
843 XmlPrologStateInit(&prologState);
844 protocolEncodingName = (encodingName != NULL
845 ? poolCopyString(&tempPool, encodingName)
846 : NULL);
847 curBase = NULL;
848 XmlInitEncoding(&initEncoding, &encoding, 0);
849 userData = NULL;
850 handlerArg = NULL;
851 startElementHandler = NULL;
852 endElementHandler = NULL;
853 characterDataHandler = NULL;
854 processingInstructionHandler = NULL;
855 commentHandler = NULL;
856 startCdataSectionHandler = NULL;
857 endCdataSectionHandler = NULL;
858 defaultHandler = NULL;
859 startDoctypeDeclHandler = NULL;
860 endDoctypeDeclHandler = NULL;
861 unparsedEntityDeclHandler = NULL;
862 notationDeclHandler = NULL;
863 startNamespaceDeclHandler = NULL;
864 endNamespaceDeclHandler = NULL;
865 notStandaloneHandler = NULL;
866 externalEntityRefHandler = NULL;
867 externalEntityRefHandlerArg = parser;
868 skippedEntityHandler = NULL;
869 elementDeclHandler = NULL;
870 attlistDeclHandler = NULL;
871 entityDeclHandler = NULL;
872 xmlDeclHandler = NULL;
873 bufferPtr = buffer;
874 bufferEnd = buffer;
875 parseEndByteIndex = 0;
876 parseEndPtr = NULL;
877 declElementType = NULL;
878 declAttributeId = NULL;
879 declEntity = NULL;
880 doctypeName = NULL;
881 doctypeSysid = NULL;
882 doctypePubid = NULL;
883 declAttributeType = NULL;
884 declNotationName = NULL;
885 declNotationPublicId = NULL;
886 declAttributeIsCdata = XML_FALSE;
887 declAttributeIsId = XML_FALSE;
888 memset(&position, 0, sizeof(POSITION));
889 errorCode = XML_ERROR_NONE;
890 eventPtr = NULL;
891 eventEndPtr = NULL;
892 positionPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +0000893 openInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000894 defaultExpandInternalEntities = XML_TRUE;
895 tagLevel = 0;
896 tagStack = NULL;
897 inheritedBindings = NULL;
898 nSpecifiedAtts = 0;
899 unknownEncodingMem = NULL;
900 unknownEncodingRelease = NULL;
901 unknownEncodingData = NULL;
902 parentParser = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000903 ps_parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000904#ifdef XML_DTD
905 isParamEntity = XML_FALSE;
906 useForeignDTD = XML_FALSE;
907 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
908#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700909 hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000910}
911
912/* moves list of bindings to freeBindingList */
913static void FASTCALL
914moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
915{
916 while (bindings) {
917 BINDING *b = bindings;
918 bindings = bindings->nextTagBinding;
919 b->nextTagBinding = freeBindingList;
920 freeBindingList = b;
921 }
922}
923
Fred Drake08317ae2003-10-21 15:38:55 +0000924XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000925XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
926{
927 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +0000928 OPEN_INTERNAL_ENTITY *openEntityList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000929 if (parentParser)
930 return XML_FALSE;
931 /* move tagStack to freeTagList */
932 tStk = tagStack;
933 while (tStk) {
934 TAG *tag = tStk;
935 tStk = tStk->parent;
936 tag->parent = freeTagList;
937 moveToFreeBindingList(parser, tag->bindings);
938 tag->bindings = NULL;
939 freeTagList = tag;
940 }
Fred Drake31d485c2004-08-03 07:06:22 +0000941 /* move openInternalEntities to freeInternalEntities */
942 openEntityList = openInternalEntities;
943 while (openEntityList) {
944 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
945 openEntityList = openEntity->next;
946 openEntity->next = freeInternalEntities;
947 freeInternalEntities = openEntity;
948 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000949 moveToFreeBindingList(parser, inheritedBindings);
Fred Drake08317ae2003-10-21 15:38:55 +0000950 FREE(unknownEncodingMem);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000951 if (unknownEncodingRelease)
952 unknownEncodingRelease(unknownEncodingData);
953 poolClear(&tempPool);
954 poolClear(&temp2Pool);
955 parserInit(parser, encodingName);
956 dtdReset(_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700957 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000958}
959
Fred Drake08317ae2003-10-21 15:38:55 +0000960enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000961XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
962{
963 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
964 XXX There's no way for the caller to determine which of the
965 XXX possible error cases caused the XML_STATUS_ERROR return.
966 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000967 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000968 return XML_STATUS_ERROR;
969 if (encodingName == NULL)
970 protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000971 else {
972 protocolEncodingName = poolCopyString(&tempPool, encodingName);
973 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000974 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000975 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000976 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000977}
978
Fred Drake08317ae2003-10-21 15:38:55 +0000979XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000980XML_ExternalEntityParserCreate(XML_Parser oldParser,
981 const XML_Char *context,
982 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000983{
984 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000985 DTD *newDtd = NULL;
986 DTD *oldDtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000987 XML_StartElementHandler oldStartElementHandler = startElementHandler;
988 XML_EndElementHandler oldEndElementHandler = endElementHandler;
989 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000990 XML_ProcessingInstructionHandler oldProcessingInstructionHandler
991 = processingInstructionHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000992 XML_CommentHandler oldCommentHandler = commentHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000993 XML_StartCdataSectionHandler oldStartCdataSectionHandler
994 = startCdataSectionHandler;
995 XML_EndCdataSectionHandler oldEndCdataSectionHandler
996 = endCdataSectionHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000997 XML_DefaultHandler oldDefaultHandler = defaultHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000998 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler
999 = unparsedEntityDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001000 XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001001 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler
1002 = startNamespaceDeclHandler;
1003 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler
1004 = endNamespaceDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001005 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001006 XML_ExternalEntityRefHandler oldExternalEntityRefHandler
1007 = externalEntityRefHandler;
1008 XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler;
1009 XML_UnknownEncodingHandler oldUnknownEncodingHandler
1010 = unknownEncodingHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001011 XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler;
1012 XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler;
1013 XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler;
1014 XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler;
1015 ELEMENT_TYPE * oldDeclElementType = declElementType;
1016
1017 void *oldUserData = userData;
1018 void *oldHandlerArg = handlerArg;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001019 XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1020 XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001021#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001022 enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing;
1023 int oldInEntityValue = prologState.inEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001024#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001025 XML_Bool oldns_triplets = ns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001026 /* Note that the new parser shares the same hash secret as the old
1027 parser, so that dtdCopy and copyEntityTable can lookup values
1028 from hash tables associated with either parser without us having
1029 to worry which hash secrets each table has.
1030 */
1031 unsigned long oldhash_secret_salt = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001032
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001033#ifdef XML_DTD
1034 if (!context)
1035 newDtd = oldDtd;
1036#endif /* XML_DTD */
1037
1038 /* Note that the magical uses of the pre-processor to make field
1039 access look more like C++ require that `parser' be overwritten
1040 here. This makes this function more painful to follow than it
1041 would be otherwise.
1042 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001043 if (ns) {
1044 XML_Char tmp[2];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001045 *tmp = namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001046 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001047 }
1048 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001049 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001050 }
1051
1052 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001053 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001054
1055 startElementHandler = oldStartElementHandler;
1056 endElementHandler = oldEndElementHandler;
1057 characterDataHandler = oldCharacterDataHandler;
1058 processingInstructionHandler = oldProcessingInstructionHandler;
1059 commentHandler = oldCommentHandler;
1060 startCdataSectionHandler = oldStartCdataSectionHandler;
1061 endCdataSectionHandler = oldEndCdataSectionHandler;
1062 defaultHandler = oldDefaultHandler;
1063 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1064 notationDeclHandler = oldNotationDeclHandler;
1065 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1066 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1067 notStandaloneHandler = oldNotStandaloneHandler;
1068 externalEntityRefHandler = oldExternalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001069 skippedEntityHandler = oldSkippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001070 unknownEncodingHandler = oldUnknownEncodingHandler;
1071 elementDeclHandler = oldElementDeclHandler;
1072 attlistDeclHandler = oldAttlistDeclHandler;
1073 entityDeclHandler = oldEntityDeclHandler;
1074 xmlDeclHandler = oldXmlDeclHandler;
1075 declElementType = oldDeclElementType;
1076 userData = oldUserData;
1077 if (oldUserData == oldHandlerArg)
1078 handlerArg = userData;
1079 else
1080 handlerArg = parser;
1081 if (oldExternalEntityRefHandlerArg != oldParser)
1082 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1083 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1084 ns_triplets = oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001085 hash_secret_salt = oldhash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001086 parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001087#ifdef XML_DTD
1088 paramEntityParsing = oldParamEntityParsing;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001089 prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001090 if (context) {
1091#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001092 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001093 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001094 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001095 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001096 }
1097 processor = externalEntityInitProcessor;
1098#ifdef XML_DTD
1099 }
1100 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001101 /* The DTD instance referenced by _dtd is shared between the document's
1102 root parser and external PE parsers, therefore one does not need to
1103 call setContext. In addition, one also *must* not call setContext,
1104 because this would overwrite existing prefix->binding pointers in
1105 _dtd with ones that get destroyed with the external PE parser.
1106 This would leave those prefixes with dangling pointers.
1107 */
1108 isParamEntity = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001109 XmlPrologStateInitExternalEntity(&prologState);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001110 processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001111 }
1112#endif /* XML_DTD */
1113 return parser;
1114}
1115
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001116static void FASTCALL
1117destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001118{
1119 for (;;) {
1120 BINDING *b = bindings;
1121 if (!b)
1122 break;
1123 bindings = b->nextTagBinding;
1124 FREE(b->uri);
1125 FREE(b);
1126 }
1127}
1128
Fred Drake08317ae2003-10-21 15:38:55 +00001129void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001130XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001131{
Fred Drake31d485c2004-08-03 07:06:22 +00001132 TAG *tagList;
1133 OPEN_INTERNAL_ENTITY *entityList;
1134 if (parser == NULL)
1135 return;
1136 /* free tagStack and freeTagList */
1137 tagList = tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001138 for (;;) {
1139 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001140 if (tagList == NULL) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001141 if (freeTagList == NULL)
1142 break;
Fred Drake31d485c2004-08-03 07:06:22 +00001143 tagList = freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001144 freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001145 }
Fred Drake31d485c2004-08-03 07:06:22 +00001146 p = tagList;
1147 tagList = tagList->parent;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001148 FREE(p->buf);
1149 destroyBindings(p->bindings, parser);
1150 FREE(p);
1151 }
Fred Drake31d485c2004-08-03 07:06:22 +00001152 /* free openInternalEntities and freeInternalEntities */
1153 entityList = openInternalEntities;
1154 for (;;) {
1155 OPEN_INTERNAL_ENTITY *openEntity;
1156 if (entityList == NULL) {
1157 if (freeInternalEntities == NULL)
1158 break;
1159 entityList = freeInternalEntities;
1160 freeInternalEntities = NULL;
1161 }
1162 openEntity = entityList;
1163 entityList = entityList->next;
1164 FREE(openEntity);
1165 }
1166
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001167 destroyBindings(freeBindingList, parser);
1168 destroyBindings(inheritedBindings, parser);
1169 poolDestroy(&tempPool);
1170 poolDestroy(&temp2Pool);
1171#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001172 /* external parameter entity parsers share the DTD structure
1173 parser->m_dtd with the root parser, so we must not destroy it
1174 */
1175 if (!isParamEntity && _dtd)
1176#else
1177 if (_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001178#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001179 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001180 FREE((void *)atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001181#ifdef XML_ATTR_INFO
1182 FREE((void *)attInfo);
1183#endif
Fred Drake08317ae2003-10-21 15:38:55 +00001184 FREE(groupConnector);
1185 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001186 FREE(dataBuf);
Fred Drake08317ae2003-10-21 15:38:55 +00001187 FREE(nsAtts);
1188 FREE(unknownEncodingMem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001189 if (unknownEncodingRelease)
1190 unknownEncodingRelease(unknownEncodingData);
1191 FREE(parser);
1192}
1193
Fred Drake08317ae2003-10-21 15:38:55 +00001194void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001195XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001196{
1197 handlerArg = parser;
1198}
1199
Fred Drake08317ae2003-10-21 15:38:55 +00001200enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001201XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1202{
1203#ifdef XML_DTD
1204 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001205 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001206 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1207 useForeignDTD = useDTD;
1208 return XML_ERROR_NONE;
1209#else
1210 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1211#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001212}
1213
Fred Drake08317ae2003-10-21 15:38:55 +00001214void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001215XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1216{
1217 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001219 return;
1220 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1221}
1222
Fred Drake08317ae2003-10-21 15:38:55 +00001223void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001224XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001225{
1226 if (handlerArg == userData)
1227 handlerArg = userData = p;
1228 else
1229 userData = p;
1230}
1231
Fred Drake08317ae2003-10-21 15:38:55 +00001232enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001233XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001234{
1235 if (p) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001236 p = poolCopyString(&_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001237 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001238 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001239 curBase = p;
1240 }
1241 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001242 curBase = NULL;
1243 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001244}
1245
Fred Drake08317ae2003-10-21 15:38:55 +00001246const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001247XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001248{
1249 return curBase;
1250}
1251
Fred Drake08317ae2003-10-21 15:38:55 +00001252int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001253XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001254{
1255 return nSpecifiedAtts;
1256}
1257
Fred Drake08317ae2003-10-21 15:38:55 +00001258int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001259XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001260{
1261 return idAttIndex;
1262}
1263
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001264#ifdef XML_ATTR_INFO
1265const XML_AttrInfo * XMLCALL
1266XML_GetAttributeInfo(XML_Parser parser)
1267{
1268 return attInfo;
1269}
1270#endif
1271
Fred Drake08317ae2003-10-21 15:38:55 +00001272void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001273XML_SetElementHandler(XML_Parser parser,
1274 XML_StartElementHandler start,
1275 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001276{
1277 startElementHandler = start;
1278 endElementHandler = end;
1279}
1280
Fred Drake08317ae2003-10-21 15:38:55 +00001281void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001282XML_SetStartElementHandler(XML_Parser parser,
1283 XML_StartElementHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001284 startElementHandler = start;
1285}
1286
Fred Drake08317ae2003-10-21 15:38:55 +00001287void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001288XML_SetEndElementHandler(XML_Parser parser,
1289 XML_EndElementHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001290 endElementHandler = end;
1291}
1292
Fred Drake08317ae2003-10-21 15:38:55 +00001293void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001294XML_SetCharacterDataHandler(XML_Parser parser,
1295 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001296{
1297 characterDataHandler = handler;
1298}
1299
Fred Drake08317ae2003-10-21 15:38:55 +00001300void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001301XML_SetProcessingInstructionHandler(XML_Parser parser,
1302 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001303{
1304 processingInstructionHandler = handler;
1305}
1306
Fred Drake08317ae2003-10-21 15:38:55 +00001307void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001308XML_SetCommentHandler(XML_Parser parser,
1309 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001310{
1311 commentHandler = handler;
1312}
1313
Fred Drake08317ae2003-10-21 15:38:55 +00001314void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001315XML_SetCdataSectionHandler(XML_Parser parser,
1316 XML_StartCdataSectionHandler start,
1317 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001318{
1319 startCdataSectionHandler = start;
1320 endCdataSectionHandler = end;
1321}
1322
Fred Drake08317ae2003-10-21 15:38:55 +00001323void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001324XML_SetStartCdataSectionHandler(XML_Parser parser,
1325 XML_StartCdataSectionHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001326 startCdataSectionHandler = start;
1327}
1328
Fred Drake08317ae2003-10-21 15:38:55 +00001329void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001330XML_SetEndCdataSectionHandler(XML_Parser parser,
1331 XML_EndCdataSectionHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001332 endCdataSectionHandler = end;
1333}
1334
Fred Drake08317ae2003-10-21 15:38:55 +00001335void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001336XML_SetDefaultHandler(XML_Parser parser,
1337 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001338{
1339 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001340 defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001341}
1342
Fred Drake08317ae2003-10-21 15:38:55 +00001343void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001344XML_SetDefaultHandlerExpand(XML_Parser parser,
1345 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001346{
1347 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001348 defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001349}
1350
Fred Drake08317ae2003-10-21 15:38:55 +00001351void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001352XML_SetDoctypeDeclHandler(XML_Parser parser,
1353 XML_StartDoctypeDeclHandler start,
1354 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001355{
1356 startDoctypeDeclHandler = start;
1357 endDoctypeDeclHandler = end;
1358}
1359
Fred Drake08317ae2003-10-21 15:38:55 +00001360void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001361XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1362 XML_StartDoctypeDeclHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001363 startDoctypeDeclHandler = start;
1364}
1365
Fred Drake08317ae2003-10-21 15:38:55 +00001366void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001367XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1368 XML_EndDoctypeDeclHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001369 endDoctypeDeclHandler = end;
1370}
1371
Fred Drake08317ae2003-10-21 15:38:55 +00001372void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001373XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1374 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001375{
1376 unparsedEntityDeclHandler = handler;
1377}
1378
Fred Drake08317ae2003-10-21 15:38:55 +00001379void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001380XML_SetNotationDeclHandler(XML_Parser parser,
1381 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001382{
1383 notationDeclHandler = handler;
1384}
1385
Fred Drake08317ae2003-10-21 15:38:55 +00001386void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001387XML_SetNamespaceDeclHandler(XML_Parser parser,
1388 XML_StartNamespaceDeclHandler start,
1389 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001390{
1391 startNamespaceDeclHandler = start;
1392 endNamespaceDeclHandler = end;
1393}
1394
Fred Drake08317ae2003-10-21 15:38:55 +00001395void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001396XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1397 XML_StartNamespaceDeclHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001398 startNamespaceDeclHandler = start;
1399}
1400
Fred Drake08317ae2003-10-21 15:38:55 +00001401void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001402XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1403 XML_EndNamespaceDeclHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001404 endNamespaceDeclHandler = end;
1405}
1406
Fred Drake08317ae2003-10-21 15:38:55 +00001407void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001408XML_SetNotStandaloneHandler(XML_Parser parser,
1409 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001410{
1411 notStandaloneHandler = handler;
1412}
1413
Fred Drake08317ae2003-10-21 15:38:55 +00001414void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001415XML_SetExternalEntityRefHandler(XML_Parser parser,
1416 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001417{
1418 externalEntityRefHandler = handler;
1419}
1420
Fred Drake08317ae2003-10-21 15:38:55 +00001421void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001422XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001423{
1424 if (arg)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001425 externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001426 else
1427 externalEntityRefHandlerArg = parser;
1428}
1429
Fred Drake08317ae2003-10-21 15:38:55 +00001430void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001431XML_SetSkippedEntityHandler(XML_Parser parser,
1432 XML_SkippedEntityHandler handler)
1433{
1434 skippedEntityHandler = handler;
1435}
1436
Fred Drake08317ae2003-10-21 15:38:55 +00001437void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001438XML_SetUnknownEncodingHandler(XML_Parser parser,
1439 XML_UnknownEncodingHandler handler,
1440 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001441{
1442 unknownEncodingHandler = handler;
1443 unknownEncodingHandlerData = data;
1444}
1445
Fred Drake08317ae2003-10-21 15:38:55 +00001446void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001447XML_SetElementDeclHandler(XML_Parser parser,
1448 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001449{
1450 elementDeclHandler = eldecl;
1451}
1452
Fred Drake08317ae2003-10-21 15:38:55 +00001453void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001454XML_SetAttlistDeclHandler(XML_Parser parser,
1455 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001456{
1457 attlistDeclHandler = attdecl;
1458}
1459
Fred Drake08317ae2003-10-21 15:38:55 +00001460void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001461XML_SetEntityDeclHandler(XML_Parser parser,
1462 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001463{
1464 entityDeclHandler = handler;
1465}
1466
Fred Drake08317ae2003-10-21 15:38:55 +00001467void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001468XML_SetXmlDeclHandler(XML_Parser parser,
1469 XML_XmlDeclHandler handler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001470 xmlDeclHandler = handler;
1471}
1472
Fred Drake08317ae2003-10-21 15:38:55 +00001473int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001474XML_SetParamEntityParsing(XML_Parser parser,
1475 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001476{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001477 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001478 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001479 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001480#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001481 paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001482 return 1;
1483#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001484 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001485#endif
1486}
1487
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001488int XMLCALL
1489XML_SetHashSalt(XML_Parser parser,
1490 unsigned long hash_salt)
1491{
1492 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1493 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1494 return 0;
1495 hash_secret_salt = hash_salt;
1496 return 1;
1497}
1498
Fred Drake08317ae2003-10-21 15:38:55 +00001499enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001500XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001501{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001502 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001503 case XML_SUSPENDED:
1504 errorCode = XML_ERROR_SUSPENDED;
1505 return XML_STATUS_ERROR;
1506 case XML_FINISHED:
1507 errorCode = XML_ERROR_FINISHED;
1508 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001509 case XML_INITIALIZED:
1510 if (parentParser == NULL && !startParsing(parser)) {
1511 errorCode = XML_ERROR_NO_MEMORY;
1512 return XML_STATUS_ERROR;
1513 }
Fred Drake31d485c2004-08-03 07:06:22 +00001514 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001515 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001516 }
1517
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001518 if (len == 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001519 ps_finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001520 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001521 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001522 positionPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001523 parseEndPtr = bufferEnd;
1524
1525 /* If data are left over from last buffer, and we now know that these
1526 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001527 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001528 */
1529 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1530
1531 if (errorCode == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001532 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001533 case XML_SUSPENDED:
1534 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1535 positionPtr = bufferPtr;
1536 return XML_STATUS_SUSPENDED;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001537 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001538 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001539 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001540 /* fall through */
1541 default:
1542 return XML_STATUS_OK;
1543 }
1544 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001545 eventEndPtr = eventPtr;
1546 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001547 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001548 }
1549#ifndef XML_CONTEXT_BYTES
1550 else if (bufferPtr == bufferEnd) {
1551 const char *end;
1552 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001553 enum XML_Status result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001554 parseEndByteIndex += len;
1555 positionPtr = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001556 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001557
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001558 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001559
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001560 if (errorCode != XML_ERROR_NONE) {
1561 eventEndPtr = eventPtr;
1562 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001563 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001564 }
Fred Drake31d485c2004-08-03 07:06:22 +00001565 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001566 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001567 case XML_SUSPENDED:
1568 result = XML_STATUS_SUSPENDED;
1569 break;
1570 case XML_INITIALIZED:
1571 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001572 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001573 ps_parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001574 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001575 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001576 /* fall through */
1577 default:
1578 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001579 }
1580 }
1581
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001582 XmlUpdatePosition(encoding, positionPtr, end, &position);
1583 nLeftOver = s + len - end;
1584 if (nLeftOver) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001585 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
1586 /* FIXME avoid integer overflow */
1587 char *temp;
1588 temp = (buffer == NULL
1589 ? (char *)MALLOC(len * 2)
1590 : (char *)REALLOC(buffer, len * 2));
1591 if (temp == NULL) {
1592 errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001593 eventPtr = eventEndPtr = NULL;
1594 processor = errorProcessor;
1595 return XML_STATUS_ERROR;
1596 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001597 buffer = temp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001598 bufferLim = buffer + len * 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001599 }
1600 memcpy(buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001601 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001602 bufferPtr = buffer;
1603 bufferEnd = buffer + nLeftOver;
1604 positionPtr = bufferPtr;
1605 parseEndPtr = bufferEnd;
1606 eventPtr = bufferPtr;
1607 eventEndPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001608 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001609 }
1610#endif /* not defined XML_CONTEXT_BYTES */
1611 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001612 void *buff = XML_GetBuffer(parser, len);
1613 if (buff == NULL)
1614 return XML_STATUS_ERROR;
1615 else {
1616 memcpy(buff, s, len);
1617 return XML_ParseBuffer(parser, len, isFinal);
1618 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001619 }
1620}
1621
Fred Drake08317ae2003-10-21 15:38:55 +00001622enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001623XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001624{
Fred Drake31d485c2004-08-03 07:06:22 +00001625 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001626 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001627
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001628 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001629 case XML_SUSPENDED:
1630 errorCode = XML_ERROR_SUSPENDED;
1631 return XML_STATUS_ERROR;
1632 case XML_FINISHED:
1633 errorCode = XML_ERROR_FINISHED;
1634 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001635 case XML_INITIALIZED:
1636 if (parentParser == NULL && !startParsing(parser)) {
1637 errorCode = XML_ERROR_NO_MEMORY;
1638 return XML_STATUS_ERROR;
1639 }
Fred Drake31d485c2004-08-03 07:06:22 +00001640 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001641 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001642 }
1643
1644 start = bufferPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001645 positionPtr = start;
1646 bufferEnd += len;
Fred Drake31d485c2004-08-03 07:06:22 +00001647 parseEndPtr = bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001648 parseEndByteIndex += len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001649 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001650
1651 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
1652
1653 if (errorCode != XML_ERROR_NONE) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001654 eventEndPtr = eventPtr;
1655 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001656 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001657 }
Fred Drake31d485c2004-08-03 07:06:22 +00001658 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001659 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001660 case XML_SUSPENDED:
1661 result = XML_STATUS_SUSPENDED;
1662 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001663 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001664 case XML_PARSING:
1665 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001666 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001667 return result;
1668 }
1669 default: ; /* should not happen */
1670 }
1671 }
1672
1673 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1674 positionPtr = bufferPtr;
1675 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001676}
1677
Fred Drake08317ae2003-10-21 15:38:55 +00001678void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001679XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001680{
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001681 if (len < 0) {
1682 errorCode = XML_ERROR_NO_MEMORY;
1683 return NULL;
1684 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001685 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001686 case XML_SUSPENDED:
1687 errorCode = XML_ERROR_SUSPENDED;
1688 return NULL;
1689 case XML_FINISHED:
1690 errorCode = XML_ERROR_FINISHED;
1691 return NULL;
1692 default: ;
1693 }
1694
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001695 if (len > bufferLim - bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001696#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001697 int keep;
1698#endif
1699 int neededSize = len + (int)(bufferEnd - bufferPtr);
1700 if (neededSize < 0) {
1701 errorCode = XML_ERROR_NO_MEMORY;
1702 return NULL;
1703 }
1704#ifdef XML_CONTEXT_BYTES
1705 keep = (int)(bufferPtr - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001706
1707 if (keep > XML_CONTEXT_BYTES)
1708 keep = XML_CONTEXT_BYTES;
1709 neededSize += keep;
1710#endif /* defined XML_CONTEXT_BYTES */
1711 if (neededSize <= bufferLim - buffer) {
1712#ifdef XML_CONTEXT_BYTES
1713 if (keep < bufferPtr - buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001714 int offset = (int)(bufferPtr - buffer) - keep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001715 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
1716 bufferEnd -= offset;
1717 bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001718 }
1719#else
1720 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
1721 bufferEnd = buffer + (bufferEnd - bufferPtr);
1722 bufferPtr = buffer;
1723#endif /* not defined XML_CONTEXT_BYTES */
1724 }
1725 else {
1726 char *newBuf;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001727 int bufferSize = (int)(bufferLim - bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001728 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001729 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001730 do {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001731 bufferSize *= 2;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001732 } while (bufferSize < neededSize && bufferSize > 0);
1733 if (bufferSize <= 0) {
1734 errorCode = XML_ERROR_NO_MEMORY;
1735 return NULL;
1736 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001737 newBuf = (char *)MALLOC(bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001738 if (newBuf == 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001739 errorCode = XML_ERROR_NO_MEMORY;
1740 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001741 }
1742 bufferLim = newBuf + bufferSize;
1743#ifdef XML_CONTEXT_BYTES
1744 if (bufferPtr) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001745 int keep = (int)(bufferPtr - buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001746 if (keep > XML_CONTEXT_BYTES)
1747 keep = XML_CONTEXT_BYTES;
1748 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
1749 FREE(buffer);
1750 buffer = newBuf;
1751 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
1752 bufferPtr = buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001753 }
1754 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001755 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1756 bufferPtr = buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001757 }
1758#else
1759 if (bufferPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001760 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
1761 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001762 }
1763 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1764 bufferPtr = buffer = newBuf;
1765#endif /* not defined XML_CONTEXT_BYTES */
1766 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001767 eventPtr = eventEndPtr = NULL;
1768 positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001769 }
1770 return bufferEnd;
1771}
1772
Fred Drake31d485c2004-08-03 07:06:22 +00001773enum XML_Status XMLCALL
1774XML_StopParser(XML_Parser parser, XML_Bool resumable)
1775{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001776 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001777 case XML_SUSPENDED:
1778 if (resumable) {
1779 errorCode = XML_ERROR_SUSPENDED;
1780 return XML_STATUS_ERROR;
1781 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001782 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001783 break;
1784 case XML_FINISHED:
1785 errorCode = XML_ERROR_FINISHED;
1786 return XML_STATUS_ERROR;
1787 default:
1788 if (resumable) {
1789#ifdef XML_DTD
1790 if (isParamEntity) {
1791 errorCode = XML_ERROR_SUSPEND_PE;
1792 return XML_STATUS_ERROR;
1793 }
1794#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001795 ps_parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001796 }
1797 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001798 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001799 }
1800 return XML_STATUS_OK;
1801}
1802
1803enum XML_Status XMLCALL
1804XML_ResumeParser(XML_Parser parser)
1805{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001806 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001807
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001808 if (ps_parsing != XML_SUSPENDED) {
Fred Drake31d485c2004-08-03 07:06:22 +00001809 errorCode = XML_ERROR_NOT_SUSPENDED;
1810 return XML_STATUS_ERROR;
1811 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001812 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001813
1814 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1815
1816 if (errorCode != XML_ERROR_NONE) {
1817 eventEndPtr = eventPtr;
1818 processor = errorProcessor;
1819 return XML_STATUS_ERROR;
1820 }
1821 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001822 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001823 case XML_SUSPENDED:
1824 result = XML_STATUS_SUSPENDED;
1825 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001826 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001827 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001828 if (ps_finalBuffer) {
1829 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001830 return result;
1831 }
1832 default: ;
1833 }
1834 }
1835
1836 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1837 positionPtr = bufferPtr;
1838 return result;
1839}
1840
1841void XMLCALL
1842XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
1843{
1844 assert(status != NULL);
1845 *status = parser->m_parsingStatus;
1846}
1847
Fred Drake08317ae2003-10-21 15:38:55 +00001848enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001849XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001850{
1851 return errorCode;
1852}
1853
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001854XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001855XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001856{
1857 if (eventPtr)
1858 return parseEndByteIndex - (parseEndPtr - eventPtr);
1859 return -1;
1860}
1861
Fred Drake08317ae2003-10-21 15:38:55 +00001862int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001863XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001864{
1865 if (eventEndPtr && eventPtr)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001866 return (int)(eventEndPtr - eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001867 return 0;
1868}
1869
Fred Drake08317ae2003-10-21 15:38:55 +00001870const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001871XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001872{
1873#ifdef XML_CONTEXT_BYTES
1874 if (eventPtr && buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001875 *offset = (int)(eventPtr - buffer);
1876 *size = (int)(bufferEnd - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001877 return buffer;
1878 }
1879#endif /* defined XML_CONTEXT_BYTES */
1880 return (char *) 0;
1881}
1882
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001883XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001884XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001885{
Fred Drake31d485c2004-08-03 07:06:22 +00001886 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001887 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1888 positionPtr = eventPtr;
1889 }
1890 return position.lineNumber + 1;
1891}
1892
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001893XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001894XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001895{
Fred Drake31d485c2004-08-03 07:06:22 +00001896 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001897 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1898 positionPtr = eventPtr;
1899 }
1900 return position.columnNumber;
1901}
1902
Fred Drake08317ae2003-10-21 15:38:55 +00001903void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001904XML_FreeContentModel(XML_Parser parser, XML_Content *model)
1905{
1906 FREE(model);
1907}
1908
Fred Drake08317ae2003-10-21 15:38:55 +00001909void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001910XML_MemMalloc(XML_Parser parser, size_t size)
1911{
1912 return MALLOC(size);
1913}
1914
Fred Drake08317ae2003-10-21 15:38:55 +00001915void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001916XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
1917{
1918 return REALLOC(ptr, size);
1919}
1920
Fred Drake08317ae2003-10-21 15:38:55 +00001921void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001922XML_MemFree(XML_Parser parser, void *ptr)
1923{
1924 FREE(ptr);
1925}
1926
Fred Drake08317ae2003-10-21 15:38:55 +00001927void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001928XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001929{
1930 if (defaultHandler) {
1931 if (openInternalEntities)
1932 reportDefault(parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001933 internalEncoding,
1934 openInternalEntities->internalEventPtr,
1935 openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001936 else
1937 reportDefault(parser, encoding, eventPtr, eventEndPtr);
1938 }
1939}
1940
Fred Drake08317ae2003-10-21 15:38:55 +00001941const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001942XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001943{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001944 static const XML_LChar* const message[] = {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001945 0,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001946 XML_L("out of memory"),
1947 XML_L("syntax error"),
1948 XML_L("no element found"),
1949 XML_L("not well-formed (invalid token)"),
1950 XML_L("unclosed token"),
1951 XML_L("partial character"),
1952 XML_L("mismatched tag"),
1953 XML_L("duplicate attribute"),
1954 XML_L("junk after document element"),
1955 XML_L("illegal parameter entity reference"),
1956 XML_L("undefined entity"),
1957 XML_L("recursive entity reference"),
1958 XML_L("asynchronous entity"),
1959 XML_L("reference to invalid character number"),
1960 XML_L("reference to binary entity"),
1961 XML_L("reference to external entity in attribute"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001962 XML_L("XML or text declaration not at start of entity"),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001963 XML_L("unknown encoding"),
1964 XML_L("encoding specified in XML declaration is incorrect"),
1965 XML_L("unclosed CDATA section"),
1966 XML_L("error in processing external entity reference"),
1967 XML_L("document is not standalone"),
1968 XML_L("unexpected parser state - please send a bug report"),
1969 XML_L("entity declared in parameter entity"),
1970 XML_L("requested feature requires XML_DTD support in Expat"),
Fred Drake08317ae2003-10-21 15:38:55 +00001971 XML_L("cannot change setting once parsing has begun"),
Fred Drake31d485c2004-08-03 07:06:22 +00001972 XML_L("unbound prefix"),
1973 XML_L("must not undeclare prefix"),
1974 XML_L("incomplete markup in parameter entity"),
1975 XML_L("XML declaration not well-formed"),
1976 XML_L("text declaration not well-formed"),
1977 XML_L("illegal character(s) in public id"),
1978 XML_L("parser suspended"),
1979 XML_L("parser not suspended"),
1980 XML_L("parsing aborted"),
1981 XML_L("parsing finished"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001982 XML_L("cannot suspend in external parameter entity"),
1983 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
1984 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
1985 XML_L("prefix must not be bound to one of the reserved namespace names")
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001986 };
1987 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1988 return message[code];
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001989 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001990}
1991
Fred Drake08317ae2003-10-21 15:38:55 +00001992const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001993XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001994
1995 /* V1 is used to string-ize the version number. However, it would
1996 string-ize the actual version macro *names* unless we get them
1997 substituted before being passed to V1. CPP is defined to expand
1998 a macro, then rescan for more expansions. Thus, we use V2 to expand
1999 the version macros, then CPP will expand the resulting V1() macro
2000 with the correct numerals. */
2001 /* ### I'm assuming cpp is portable in this respect... */
2002
2003#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2004#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2005
2006 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2007
2008#undef V1
2009#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002010}
2011
Fred Drake08317ae2003-10-21 15:38:55 +00002012XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002013XML_ExpatVersionInfo(void)
2014{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002015 XML_Expat_Version version;
2016
2017 version.major = XML_MAJOR_VERSION;
2018 version.minor = XML_MINOR_VERSION;
2019 version.micro = XML_MICRO_VERSION;
2020
2021 return version;
2022}
2023
Fred Drake08317ae2003-10-21 15:38:55 +00002024const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002025XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002026{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002027 static const XML_Feature features[] = {
2028 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2029 sizeof(XML_Char)},
2030 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2031 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002032#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002033 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002034#endif
2035#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002036 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002037#endif
2038#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002039 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002040#endif
2041#ifdef XML_CONTEXT_BYTES
2042 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2043 XML_CONTEXT_BYTES},
2044#endif
2045#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002046 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002047#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002048#ifdef XML_NS
2049 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2050#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002051#ifdef XML_LARGE_SIZE
2052 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2053#endif
2054#ifdef XML_ATTR_INFO
2055 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2056#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002057 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002058 };
2059
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002060 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002061}
2062
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002063/* Initially tag->rawName always points into the parse buffer;
2064 for those TAG instances opened while the current parse buffer was
2065 processed, and not yet closed, we need to store tag->rawName in a more
2066 permanent location, since the parse buffer is about to be discarded.
2067*/
2068static XML_Bool
2069storeRawNames(XML_Parser parser)
2070{
2071 TAG *tag = tagStack;
2072 while (tag) {
2073 int bufSize;
2074 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2075 char *rawNameBuf = tag->buf + nameLen;
2076 /* Stop if already stored. Since tagStack is a stack, we can stop
2077 at the first entry that has already been copied; everything
2078 below it in the stack is already been accounted for in a
2079 previous call to this function.
2080 */
2081 if (tag->rawName == rawNameBuf)
2082 break;
2083 /* For re-use purposes we need to ensure that the
2084 size of tag->buf is a multiple of sizeof(XML_Char).
2085 */
2086 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2087 if (bufSize > tag->bufEnd - tag->buf) {
2088 char *temp = (char *)REALLOC(tag->buf, bufSize);
2089 if (temp == NULL)
2090 return XML_FALSE;
2091 /* if tag->name.str points to tag->buf (only when namespace
2092 processing is off) then we have to update it
2093 */
2094 if (tag->name.str == (XML_Char *)tag->buf)
2095 tag->name.str = (XML_Char *)temp;
2096 /* if tag->name.localPart is set (when namespace processing is on)
2097 then update it as well, since it will always point into tag->buf
2098 */
2099 if (tag->name.localPart)
2100 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2101 (XML_Char *)tag->buf);
2102 tag->buf = temp;
2103 tag->bufEnd = temp + bufSize;
2104 rawNameBuf = temp + nameLen;
2105 }
2106 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2107 tag->rawName = rawNameBuf;
2108 tag = tag->parent;
2109 }
2110 return XML_TRUE;
2111}
2112
2113static enum XML_Error PTRCALL
2114contentProcessor(XML_Parser parser,
2115 const char *start,
2116 const char *end,
2117 const char **endPtr)
2118{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002119 enum XML_Error result = doContent(parser, 0, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002120 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002121 if (result == XML_ERROR_NONE) {
2122 if (!storeRawNames(parser))
2123 return XML_ERROR_NO_MEMORY;
2124 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002125 return result;
2126}
2127
2128static enum XML_Error PTRCALL
2129externalEntityInitProcessor(XML_Parser parser,
2130 const char *start,
2131 const char *end,
2132 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002133{
2134 enum XML_Error result = initializeEncoding(parser);
2135 if (result != XML_ERROR_NONE)
2136 return result;
2137 processor = externalEntityInitProcessor2;
2138 return externalEntityInitProcessor2(parser, start, end, endPtr);
2139}
2140
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002141static enum XML_Error PTRCALL
2142externalEntityInitProcessor2(XML_Parser parser,
2143 const char *start,
2144 const char *end,
2145 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002146{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002147 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002148 int tok = XmlContentTok(encoding, start, end, &next);
2149 switch (tok) {
2150 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002151 /* If we are at the end of the buffer, this would cause the next stage,
2152 i.e. externalEntityInitProcessor3, to pass control directly to
2153 doContent (by detecting XML_TOK_NONE) without processing any xml text
2154 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2155 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002156 if (next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002157 *endPtr = next;
2158 return XML_ERROR_NONE;
2159 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002160 start = next;
2161 break;
2162 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002163 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002164 *endPtr = start;
2165 return XML_ERROR_NONE;
2166 }
2167 eventPtr = start;
2168 return XML_ERROR_UNCLOSED_TOKEN;
2169 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002170 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002171 *endPtr = start;
2172 return XML_ERROR_NONE;
2173 }
2174 eventPtr = start;
2175 return XML_ERROR_PARTIAL_CHAR;
2176 }
2177 processor = externalEntityInitProcessor3;
2178 return externalEntityInitProcessor3(parser, start, end, endPtr);
2179}
2180
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002181static enum XML_Error PTRCALL
2182externalEntityInitProcessor3(XML_Parser parser,
2183 const char *start,
2184 const char *end,
2185 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002186{
Fred Drake31d485c2004-08-03 07:06:22 +00002187 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002188 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Fred Drake31d485c2004-08-03 07:06:22 +00002189 eventPtr = start;
2190 tok = XmlContentTok(encoding, start, end, &next);
2191 eventEndPtr = next;
2192
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002193 switch (tok) {
2194 case XML_TOK_XML_DECL:
2195 {
Fred Drake31d485c2004-08-03 07:06:22 +00002196 enum XML_Error result;
2197 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002198 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002199 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002200 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002201 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002202 *endPtr = next;
2203 return XML_ERROR_NONE;
2204 case XML_FINISHED:
2205 return XML_ERROR_ABORTED;
2206 default:
2207 start = next;
2208 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002209 }
2210 break;
2211 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002212 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002213 *endPtr = start;
2214 return XML_ERROR_NONE;
2215 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002216 return XML_ERROR_UNCLOSED_TOKEN;
2217 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002218 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002219 *endPtr = start;
2220 return XML_ERROR_NONE;
2221 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002222 return XML_ERROR_PARTIAL_CHAR;
2223 }
2224 processor = externalEntityContentProcessor;
2225 tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002226 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002227}
2228
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002229static enum XML_Error PTRCALL
2230externalEntityContentProcessor(XML_Parser parser,
2231 const char *start,
2232 const char *end,
2233 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002234{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002235 enum XML_Error result = doContent(parser, 1, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002236 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002237 if (result == XML_ERROR_NONE) {
2238 if (!storeRawNames(parser))
2239 return XML_ERROR_NO_MEMORY;
2240 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002241 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002242}
2243
2244static enum XML_Error
2245doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002246 int startTagLevel,
2247 const ENCODING *enc,
2248 const char *s,
2249 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002250 const char **nextPtr,
2251 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002252{
Fred Drake31d485c2004-08-03 07:06:22 +00002253 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002254 DTD * const dtd = _dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002255
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002256 const char **eventPP;
2257 const char **eventEndPP;
2258 if (enc == encoding) {
2259 eventPP = &eventPtr;
2260 eventEndPP = &eventEndPtr;
2261 }
2262 else {
2263 eventPP = &(openInternalEntities->internalEventPtr);
2264 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2265 }
2266 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002267
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002268 for (;;) {
2269 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2270 int tok = XmlContentTok(enc, s, end, &next);
2271 *eventEndPP = next;
2272 switch (tok) {
2273 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002274 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002275 *nextPtr = s;
2276 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002277 }
2278 *eventEndPP = end;
2279 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002280 XML_Char c = 0xA;
2281 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002282 }
2283 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002284 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002285 /* We are at the end of the final buffer, should we check for
2286 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002287 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002288 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002289 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002290 if (tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002291 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002292 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002293 return XML_ERROR_NONE;
2294 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002295 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002296 *nextPtr = s;
2297 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002298 }
2299 if (startTagLevel > 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002300 if (tagLevel != startTagLevel)
2301 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002302 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002303 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002304 }
2305 return XML_ERROR_NO_ELEMENTS;
2306 case XML_TOK_INVALID:
2307 *eventPP = next;
2308 return XML_ERROR_INVALID_TOKEN;
2309 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002310 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002311 *nextPtr = s;
2312 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002313 }
2314 return XML_ERROR_UNCLOSED_TOKEN;
2315 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002316 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002317 *nextPtr = s;
2318 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002319 }
2320 return XML_ERROR_PARTIAL_CHAR;
2321 case XML_TOK_ENTITY_REF:
2322 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002323 const XML_Char *name;
2324 ENTITY *entity;
2325 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2326 s + enc->minBytesPerChar,
2327 next - enc->minBytesPerChar);
2328 if (ch) {
2329 if (characterDataHandler)
2330 characterDataHandler(handlerArg, &ch, 1);
2331 else if (defaultHandler)
2332 reportDefault(parser, enc, s, next);
2333 break;
2334 }
2335 name = poolStoreString(&dtd->pool, enc,
2336 s + enc->minBytesPerChar,
2337 next - enc->minBytesPerChar);
2338 if (!name)
2339 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002340 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002341 poolDiscard(&dtd->pool);
2342 /* First, determine if a check for an existing declaration is needed;
2343 if yes, check that the entity exists, and that it is internal,
2344 otherwise call the skipped entity or default handler.
2345 */
2346 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2347 if (!entity)
2348 return XML_ERROR_UNDEFINED_ENTITY;
2349 else if (!entity->is_internal)
2350 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2351 }
2352 else if (!entity) {
2353 if (skippedEntityHandler)
2354 skippedEntityHandler(handlerArg, name, 0);
2355 else if (defaultHandler)
2356 reportDefault(parser, enc, s, next);
2357 break;
2358 }
2359 if (entity->open)
2360 return XML_ERROR_RECURSIVE_ENTITY_REF;
2361 if (entity->notation)
2362 return XML_ERROR_BINARY_ENTITY_REF;
2363 if (entity->textPtr) {
2364 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002365 if (!defaultExpandInternalEntities) {
2366 if (skippedEntityHandler)
2367 skippedEntityHandler(handlerArg, entity->name, 0);
2368 else if (defaultHandler)
2369 reportDefault(parser, enc, s, next);
2370 break;
2371 }
Fred Drake31d485c2004-08-03 07:06:22 +00002372 result = processInternalEntity(parser, entity, XML_FALSE);
2373 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002374 return result;
2375 }
2376 else if (externalEntityRefHandler) {
2377 const XML_Char *context;
2378 entity->open = XML_TRUE;
2379 context = getContext(parser);
2380 entity->open = XML_FALSE;
2381 if (!context)
2382 return XML_ERROR_NO_MEMORY;
Fred Drake31d485c2004-08-03 07:06:22 +00002383 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002384 context,
2385 entity->base,
2386 entity->systemId,
2387 entity->publicId))
2388 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2389 poolDiscard(&tempPool);
2390 }
2391 else if (defaultHandler)
2392 reportDefault(parser, enc, s, next);
2393 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002394 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002395 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002396 /* fall through */
2397 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002398 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002399 TAG *tag;
2400 enum XML_Error result;
2401 XML_Char *toPtr;
2402 if (freeTagList) {
2403 tag = freeTagList;
2404 freeTagList = freeTagList->parent;
2405 }
2406 else {
2407 tag = (TAG *)MALLOC(sizeof(TAG));
2408 if (!tag)
2409 return XML_ERROR_NO_MEMORY;
2410 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2411 if (!tag->buf) {
2412 FREE(tag);
2413 return XML_ERROR_NO_MEMORY;
2414 }
2415 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2416 }
2417 tag->bindings = NULL;
2418 tag->parent = tagStack;
2419 tagStack = tag;
2420 tag->name.localPart = NULL;
2421 tag->name.prefix = NULL;
2422 tag->rawName = s + enc->minBytesPerChar;
2423 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2424 ++tagLevel;
2425 {
2426 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2427 const char *fromPtr = tag->rawName;
2428 toPtr = (XML_Char *)tag->buf;
2429 for (;;) {
2430 int bufSize;
2431 int convLen;
2432 XmlConvert(enc,
2433 &fromPtr, rawNameEnd,
2434 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002435 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002436 if (fromPtr == rawNameEnd) {
2437 tag->name.strLen = convLen;
2438 break;
2439 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002440 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002441 {
2442 char *temp = (char *)REALLOC(tag->buf, bufSize);
2443 if (temp == NULL)
2444 return XML_ERROR_NO_MEMORY;
2445 tag->buf = temp;
2446 tag->bufEnd = temp + bufSize;
2447 toPtr = (XML_Char *)temp + convLen;
2448 }
2449 }
2450 }
2451 tag->name.str = (XML_Char *)tag->buf;
2452 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002453 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2454 if (result)
2455 return result;
2456 if (startElementHandler)
2457 startElementHandler(handlerArg, tag->name.str,
2458 (const XML_Char **)atts);
2459 else if (defaultHandler)
2460 reportDefault(parser, enc, s, next);
2461 poolClear(&tempPool);
2462 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002463 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002464 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002465 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002466 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2467 {
2468 const char *rawName = s + enc->minBytesPerChar;
2469 enum XML_Error result;
2470 BINDING *bindings = NULL;
2471 XML_Bool noElmHandlers = XML_TRUE;
2472 TAG_NAME name;
2473 name.str = poolStoreString(&tempPool, enc, rawName,
2474 rawName + XmlNameLength(enc, rawName));
2475 if (!name.str)
2476 return XML_ERROR_NO_MEMORY;
2477 poolFinish(&tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002478 result = storeAtts(parser, enc, s, &name, &bindings);
2479 if (result)
2480 return result;
2481 poolFinish(&tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002482 if (startElementHandler) {
2483 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2484 noElmHandlers = XML_FALSE;
2485 }
2486 if (endElementHandler) {
2487 if (startElementHandler)
2488 *eventPP = *eventEndPP;
2489 endElementHandler(handlerArg, name.str);
2490 noElmHandlers = XML_FALSE;
2491 }
2492 if (noElmHandlers && defaultHandler)
2493 reportDefault(parser, enc, s, next);
2494 poolClear(&tempPool);
2495 while (bindings) {
2496 BINDING *b = bindings;
2497 if (endNamespaceDeclHandler)
2498 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2499 bindings = bindings->nextTagBinding;
2500 b->nextTagBinding = freeBindingList;
2501 freeBindingList = b;
2502 b->prefix->binding = b->prevPrefixBinding;
2503 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002504 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002505 if (tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002506 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002507 break;
2508 case XML_TOK_END_TAG:
2509 if (tagLevel == startTagLevel)
2510 return XML_ERROR_ASYNC_ENTITY;
2511 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002512 int len;
2513 const char *rawName;
2514 TAG *tag = tagStack;
2515 tagStack = tag->parent;
2516 tag->parent = freeTagList;
2517 freeTagList = tag;
2518 rawName = s + enc->minBytesPerChar*2;
2519 len = XmlNameLength(enc, rawName);
2520 if (len != tag->rawNameLength
2521 || memcmp(tag->rawName, rawName, len) != 0) {
2522 *eventPP = rawName;
2523 return XML_ERROR_TAG_MISMATCH;
2524 }
2525 --tagLevel;
2526 if (endElementHandler) {
2527 const XML_Char *localPart;
2528 const XML_Char *prefix;
2529 XML_Char *uri;
2530 localPart = tag->name.localPart;
2531 if (ns && localPart) {
2532 /* localPart and prefix may have been overwritten in
2533 tag->name.str, since this points to the binding->uri
2534 buffer which gets re-used; so we have to add them again
2535 */
2536 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2537 /* don't need to check for space - already done in storeAtts() */
2538 while (*localPart) *uri++ = *localPart++;
2539 prefix = (XML_Char *)tag->name.prefix;
2540 if (ns_triplets && prefix) {
2541 *uri++ = namespaceSeparator;
2542 while (*prefix) *uri++ = *prefix++;
2543 }
2544 *uri = XML_T('\0');
2545 }
2546 endElementHandler(handlerArg, tag->name.str);
2547 }
2548 else if (defaultHandler)
2549 reportDefault(parser, enc, s, next);
2550 while (tag->bindings) {
2551 BINDING *b = tag->bindings;
2552 if (endNamespaceDeclHandler)
2553 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2554 tag->bindings = tag->bindings->nextTagBinding;
2555 b->nextTagBinding = freeBindingList;
2556 freeBindingList = b;
2557 b->prefix->binding = b->prevPrefixBinding;
2558 }
2559 if (tagLevel == 0)
2560 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002561 }
2562 break;
2563 case XML_TOK_CHAR_REF:
2564 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002565 int n = XmlCharRefNumber(enc, s);
2566 if (n < 0)
2567 return XML_ERROR_BAD_CHAR_REF;
2568 if (characterDataHandler) {
2569 XML_Char buf[XML_ENCODE_MAX];
2570 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2571 }
2572 else if (defaultHandler)
2573 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002574 }
2575 break;
2576 case XML_TOK_XML_DECL:
2577 return XML_ERROR_MISPLACED_XML_PI;
2578 case XML_TOK_DATA_NEWLINE:
2579 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002580 XML_Char c = 0xA;
2581 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002582 }
2583 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002584 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002585 break;
2586 case XML_TOK_CDATA_SECT_OPEN:
2587 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002588 enum XML_Error result;
2589 if (startCdataSectionHandler)
2590 startCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002591#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002592 /* Suppose you doing a transformation on a document that involves
2593 changing only the character data. You set up a defaultHandler
2594 and a characterDataHandler. The defaultHandler simply copies
2595 characters through. The characterDataHandler does the
2596 transformation and writes the characters out escaping them as
2597 necessary. This case will fail to work if we leave out the
2598 following two lines (because & and < inside CDATA sections will
2599 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002600
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002601 However, now we have a start/endCdataSectionHandler, so it seems
2602 easier to let the user deal with this.
2603 */
2604 else if (characterDataHandler)
2605 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002606#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002607 else if (defaultHandler)
2608 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00002609 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2610 if (result != XML_ERROR_NONE)
2611 return result;
2612 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002613 processor = cdataSectionProcessor;
2614 return result;
2615 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002616 }
2617 break;
2618 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00002619 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002620 *nextPtr = s;
2621 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002622 }
2623 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002624 if (MUST_CONVERT(enc, s)) {
2625 ICHAR *dataPtr = (ICHAR *)dataBuf;
2626 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2627 characterDataHandler(handlerArg, dataBuf,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002628 (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002629 }
2630 else
2631 characterDataHandler(handlerArg,
2632 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002633 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002634 }
2635 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002636 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002637 /* We are at the end of the final buffer, should we check for
2638 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002639 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002640 if (startTagLevel == 0) {
2641 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002642 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002643 }
2644 if (tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002645 *eventPP = end;
2646 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002647 }
Fred Drake31d485c2004-08-03 07:06:22 +00002648 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002649 return XML_ERROR_NONE;
2650 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002651 {
2652 XML_CharacterDataHandler charDataHandler = characterDataHandler;
2653 if (charDataHandler) {
2654 if (MUST_CONVERT(enc, s)) {
2655 for (;;) {
2656 ICHAR *dataPtr = (ICHAR *)dataBuf;
2657 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
2658 *eventEndPP = s;
2659 charDataHandler(handlerArg, dataBuf,
2660 (int)(dataPtr - (ICHAR *)dataBuf));
2661 if (s == next)
2662 break;
2663 *eventPP = s;
2664 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002665 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002666 else
2667 charDataHandler(handlerArg,
2668 (XML_Char *)s,
2669 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002670 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002671 else if (defaultHandler)
2672 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002673 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002674 break;
2675 case XML_TOK_PI:
2676 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002677 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002678 break;
2679 case XML_TOK_COMMENT:
2680 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002681 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002682 break;
2683 default:
2684 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002685 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002686 break;
2687 }
2688 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002690 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002691 *nextPtr = next;
2692 return XML_ERROR_NONE;
2693 case XML_FINISHED:
2694 return XML_ERROR_ABORTED;
2695 default: ;
2696 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002697 }
2698 /* not reached */
2699}
2700
Fred Drake4faea012003-01-28 06:42:40 +00002701/* Precondition: all arguments must be non-NULL;
2702 Purpose:
2703 - normalize attributes
2704 - check attributes for well-formedness
2705 - generate namespace aware attribute names (URI, prefix)
2706 - build list of attributes for startElementHandler
2707 - default attributes
2708 - process namespace declarations (check and report them)
2709 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002710*/
2711static enum XML_Error
2712storeAtts(XML_Parser parser, const ENCODING *enc,
2713 const char *attStr, TAG_NAME *tagNamePtr,
2714 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002715{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002716 DTD * const dtd = _dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00002717 ELEMENT_TYPE *elementType;
2718 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002719 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002720 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002721 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002722 int i;
2723 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002724 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002725 int nPrefixes = 0;
2726 BINDING *binding;
2727 const XML_Char *localPart;
2728
2729 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002730 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00002731 if (!elementType) {
2732 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
2733 if (!name)
2734 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002735 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00002736 sizeof(ELEMENT_TYPE));
2737 if (!elementType)
2738 return XML_ERROR_NO_MEMORY;
2739 if (ns && !setElementTypePrefix(parser, elementType))
2740 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002741 }
Fred Drake4faea012003-01-28 06:42:40 +00002742 nDefaultAtts = elementType->nDefaultAtts;
2743
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002744 /* get the attributes from the tokenizer */
2745 n = XmlGetAttributes(enc, attStr, attsSize, atts);
2746 if (n + nDefaultAtts > attsSize) {
2747 int oldAttsSize = attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002748 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002749#ifdef XML_ATTR_INFO
2750 XML_AttrInfo *temp2;
2751#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002752 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002753 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
2754 if (temp == NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002755 return XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002756 atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002757#ifdef XML_ATTR_INFO
2758 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
2759 if (temp2 == NULL)
2760 return XML_ERROR_NO_MEMORY;
2761 attInfo = temp2;
2762#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002763 if (n > oldAttsSize)
2764 XmlGetAttributes(enc, attStr, n, atts);
2765 }
Fred Drake4faea012003-01-28 06:42:40 +00002766
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002767 appAtts = (const XML_Char **)atts;
2768 for (i = 0; i < n; i++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002769 ATTRIBUTE *currAtt = &atts[i];
2770#ifdef XML_ATTR_INFO
2771 XML_AttrInfo *currAttInfo = &attInfo[i];
2772#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002773 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002774 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
2775 currAtt->name
2776 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002777 if (!attId)
2778 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002779#ifdef XML_ATTR_INFO
2780 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
2781 currAttInfo->nameEnd = currAttInfo->nameStart +
2782 XmlNameLength(enc, currAtt->name);
2783 currAttInfo->valueStart = parseEndByteIndex -
2784 (parseEndPtr - currAtt->valuePtr);
2785 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
2786#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002787 /* Detect duplicate attributes by their QNames. This does not work when
2788 namespace processing is turned on and different prefixes for the same
2789 namespace are used. For this case we have a check further down.
2790 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002791 if ((attId->name)[-1]) {
2792 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002793 eventPtr = atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002794 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2795 }
2796 (attId->name)[-1] = 1;
2797 appAtts[attIndex++] = attId->name;
2798 if (!atts[i].normalized) {
2799 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002800 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002801
2802 /* figure out whether declared as other than CDATA */
2803 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002804 int j;
2805 for (j = 0; j < nDefaultAtts; j++) {
2806 if (attId == elementType->defaultAtts[j].id) {
2807 isCdata = elementType->defaultAtts[j].isCdata;
2808 break;
2809 }
2810 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002811 }
2812
2813 /* normalize the attribute value */
2814 result = storeAttributeValue(parser, enc, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002815 atts[i].valuePtr, atts[i].valueEnd,
2816 &tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002817 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002818 return result;
Fred Drake4faea012003-01-28 06:42:40 +00002819 appAtts[attIndex] = poolStart(&tempPool);
2820 poolFinish(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002821 }
Fred Drake4faea012003-01-28 06:42:40 +00002822 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002823 /* the value did not need normalizing */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002824 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
2825 atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002826 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002827 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002828 poolFinish(&tempPool);
2829 }
2830 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00002831 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002832 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002833 /* deal with namespace declarations here */
2834 enum XML_Error result = addBinding(parser, attId->prefix, attId,
2835 appAtts[attIndex], bindingsPtr);
2836 if (result)
2837 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002838 --attIndex;
2839 }
2840 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002841 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002842 attIndex++;
2843 nPrefixes++;
2844 (attId->name)[-1] = 2;
2845 }
2846 }
2847 else
2848 attIndex++;
2849 }
Fred Drake4faea012003-01-28 06:42:40 +00002850
2851 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
2852 nSpecifiedAtts = attIndex;
2853 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
2854 for (i = 0; i < attIndex; i += 2)
2855 if (appAtts[i] == elementType->idAtt->name) {
2856 idAttIndex = i;
2857 break;
2858 }
2859 }
2860 else
2861 idAttIndex = -1;
2862
2863 /* do attribute defaulting */
2864 for (i = 0; i < nDefaultAtts; i++) {
2865 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
2866 if (!(da->id->name)[-1] && da->value) {
2867 if (da->id->prefix) {
2868 if (da->id->xmlns) {
2869 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
2870 da->value, bindingsPtr);
2871 if (result)
2872 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002873 }
2874 else {
Fred Drake4faea012003-01-28 06:42:40 +00002875 (da->id->name)[-1] = 2;
2876 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002877 appAtts[attIndex++] = da->id->name;
2878 appAtts[attIndex++] = da->value;
2879 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002880 }
Fred Drake4faea012003-01-28 06:42:40 +00002881 else {
2882 (da->id->name)[-1] = 1;
2883 appAtts[attIndex++] = da->id->name;
2884 appAtts[attIndex++] = da->value;
2885 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002886 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002887 }
Fred Drake4faea012003-01-28 06:42:40 +00002888 appAtts[attIndex] = 0;
2889
Fred Drake08317ae2003-10-21 15:38:55 +00002890 /* expand prefixed attribute names, check for duplicates,
2891 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002892 i = 0;
2893 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00002894 int j; /* hash table index */
2895 unsigned long version = nsAttsVersion;
2896 int nsAttsSize = (int)1 << nsAttsPower;
2897 /* size of hash table must be at least 2 * (# of prefixed attributes) */
2898 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
2899 NS_ATT *temp;
2900 /* hash table size must also be a power of 2 and >= 8 */
2901 while (nPrefixes >> nsAttsPower++);
2902 if (nsAttsPower < 3)
2903 nsAttsPower = 3;
2904 nsAttsSize = (int)1 << nsAttsPower;
2905 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
2906 if (!temp)
2907 return XML_ERROR_NO_MEMORY;
2908 nsAtts = temp;
2909 version = 0; /* force re-initialization of nsAtts hash table */
2910 }
2911 /* using a version flag saves us from initializing nsAtts every time */
2912 if (!version) { /* initialize version flags when version wraps around */
2913 version = INIT_ATTS_VERSION;
2914 for (j = nsAttsSize; j != 0; )
2915 nsAtts[--j].version = version;
2916 }
2917 nsAttsVersion = --version;
2918
2919 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002920 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00002921 const XML_Char *s = appAtts[i];
2922 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002923 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00002924 const BINDING *b;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002925 unsigned long uriHash = hash_secret_salt;
Fred Drake08317ae2003-10-21 15:38:55 +00002926 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002927 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002928 if (!id || !id->prefix)
2929 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00002930 b = id->prefix->binding;
2931 if (!b)
2932 return XML_ERROR_UNBOUND_PREFIX;
2933
2934 /* as we expand the name we also calculate its hash value */
2935 for (j = 0; j < b->uriLen; j++) {
2936 const XML_Char c = b->uri[j];
2937 if (!poolAppendChar(&tempPool, c))
2938 return XML_ERROR_NO_MEMORY;
2939 uriHash = CHAR_HASH(uriHash, c);
2940 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002941 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00002942 ;
2943 do { /* copies null terminator */
2944 const XML_Char c = *s;
2945 if (!poolAppendChar(&tempPool, *s))
2946 return XML_ERROR_NO_MEMORY;
2947 uriHash = CHAR_HASH(uriHash, c);
2948 } while (*s++);
2949
2950 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002951 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00002952 */
2953 unsigned char step = 0;
2954 unsigned long mask = nsAttsSize - 1;
2955 j = uriHash & mask; /* index into hash table */
2956 while (nsAtts[j].version == version) {
2957 /* for speed we compare stored hash values first */
2958 if (uriHash == nsAtts[j].hash) {
2959 const XML_Char *s1 = poolStart(&tempPool);
2960 const XML_Char *s2 = nsAtts[j].uriName;
2961 /* s1 is null terminated, but not s2 */
2962 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
2963 if (*s1 == 0)
2964 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2965 }
2966 if (!step)
2967 step = PROBE_STEP(uriHash, mask, nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002968 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002969 }
Fred Drake08317ae2003-10-21 15:38:55 +00002970 }
2971
2972 if (ns_triplets) { /* append namespace separator and prefix */
2973 tempPool.ptr[-1] = namespaceSeparator;
2974 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002975 do {
2976 if (!poolAppendChar(&tempPool, *s))
2977 return XML_ERROR_NO_MEMORY;
2978 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002979 }
Fred Drake08317ae2003-10-21 15:38:55 +00002980
2981 /* store expanded name in attribute list */
2982 s = poolStart(&tempPool);
2983 poolFinish(&tempPool);
2984 appAtts[i] = s;
2985
2986 /* fill empty slot with new version, uriName and hash value */
2987 nsAtts[j].version = version;
2988 nsAtts[j].hash = uriHash;
2989 nsAtts[j].uriName = s;
2990
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002991 if (!--nPrefixes) {
2992 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002993 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002994 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002995 }
Fred Drake08317ae2003-10-21 15:38:55 +00002996 else /* not prefixed */
2997 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002998 }
2999 }
Fred Drake08317ae2003-10-21 15:38:55 +00003000 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003001 for (; i < attIndex; i += 2)
3002 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003003 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3004 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003005
Fred Drake08317ae2003-10-21 15:38:55 +00003006 if (!ns)
3007 return XML_ERROR_NONE;
3008
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003009 /* expand the element type name */
3010 if (elementType->prefix) {
3011 binding = elementType->prefix->binding;
3012 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003013 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003014 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003015 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003016 ;
3017 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003018 else if (dtd->defaultPrefix.binding) {
3019 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003020 localPart = tagNamePtr->str;
3021 }
3022 else
3023 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003024 prefixLen = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00003025 if (ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003026 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003027 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003028 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003029 tagNamePtr->localPart = localPart;
3030 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003031 tagNamePtr->prefix = binding->prefix->name;
3032 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003033 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003034 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003035 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003036 if (n > binding->uriAlloc) {
3037 TAG *p;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003038 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003039 if (!uri)
3040 return XML_ERROR_NO_MEMORY;
3041 binding->uriAlloc = n + EXPAND_SPARE;
3042 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3043 for (p = tagStack; p; p = p->parent)
3044 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003045 p->name.str = uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003046 FREE(binding->uri);
3047 binding->uri = uri;
3048 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003049 /* if namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003050 uri = binding->uri + binding->uriLen;
3051 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003052 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003053 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003054 uri += i - 1;
3055 *uri = namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003056 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3057 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003058 tagNamePtr->str = binding->uri;
3059 return XML_ERROR_NONE;
3060}
3061
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003062/* addBinding() overwrites the value of prefix->binding without checking.
3063 Therefore one must keep track of the old value outside of addBinding().
3064*/
3065static enum XML_Error
3066addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3067 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003068{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003069 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003070 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3071 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3072 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3073 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3074 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3075 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003076 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003077 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003078 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3079 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003080 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3081 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3082 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3083 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3084 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003085 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003086 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003087 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3088
3089 XML_Bool mustBeXML = XML_FALSE;
3090 XML_Bool isXML = XML_TRUE;
3091 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003092
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003093 BINDING *b;
3094 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003095
Fred Drake31d485c2004-08-03 07:06:22 +00003096 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003097 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003098 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003099
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003100 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003101 && prefix->name[0] == XML_T(ASCII_x)
3102 && prefix->name[1] == XML_T(ASCII_m)
3103 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003104
3105 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003106 if (prefix->name[3] == XML_T(ASCII_n)
3107 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003108 && prefix->name[5] == XML_T('\0'))
3109 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3110
3111 if (prefix->name[3] == XML_T('\0'))
3112 mustBeXML = XML_TRUE;
3113 }
3114
3115 for (len = 0; uri[len]; len++) {
3116 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3117 isXML = XML_FALSE;
3118
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003119 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003120 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3121 isXMLNS = XML_FALSE;
3122 }
3123 isXML = isXML && len == xmlLen;
3124 isXMLNS = isXMLNS && len == xmlnsLen;
3125
3126 if (mustBeXML != isXML)
3127 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3128 : XML_ERROR_RESERVED_NAMESPACE_URI;
3129
3130 if (isXMLNS)
3131 return XML_ERROR_RESERVED_NAMESPACE_URI;
3132
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003133 if (namespaceSeparator)
3134 len++;
3135 if (freeBindingList) {
3136 b = freeBindingList;
3137 if (len > b->uriAlloc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003138 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3139 sizeof(XML_Char) * (len + EXPAND_SPARE));
3140 if (temp == NULL)
3141 return XML_ERROR_NO_MEMORY;
3142 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003143 b->uriAlloc = len + EXPAND_SPARE;
3144 }
3145 freeBindingList = b->nextTagBinding;
3146 }
3147 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003148 b = (BINDING *)MALLOC(sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003149 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003150 return XML_ERROR_NO_MEMORY;
3151 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003152 if (!b->uri) {
3153 FREE(b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003154 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003155 }
3156 b->uriAlloc = len + EXPAND_SPARE;
3157 }
3158 b->uriLen = len;
3159 memcpy(b->uri, uri, len * sizeof(XML_Char));
3160 if (namespaceSeparator)
3161 b->uri[len - 1] = namespaceSeparator;
3162 b->prefix = prefix;
3163 b->attId = attId;
3164 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003165 /* NULL binding when default namespace undeclared */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003166 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3167 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003168 else
3169 prefix->binding = b;
3170 b->nextTagBinding = *bindingsPtr;
3171 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003172 /* if attId == NULL then we are not starting a namespace scope */
3173 if (attId && startNamespaceDeclHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003174 startNamespaceDeclHandler(handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003175 prefix->binding ? uri : 0);
3176 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003177}
3178
3179/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003180 the whole file is parsed with one call.
3181*/
3182static enum XML_Error PTRCALL
3183cdataSectionProcessor(XML_Parser parser,
3184 const char *start,
3185 const char *end,
3186 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003187{
Fred Drake31d485c2004-08-03 07:06:22 +00003188 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003189 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003190 if (result != XML_ERROR_NONE)
3191 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003192 if (start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003193 if (parentParser) { /* we are parsing an external entity */
3194 processor = externalEntityContentProcessor;
3195 return externalEntityContentProcessor(parser, start, end, endPtr);
3196 }
3197 else {
3198 processor = contentProcessor;
3199 return contentProcessor(parser, start, end, endPtr);
3200 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003201 }
3202 return result;
3203}
3204
Fred Drake31d485c2004-08-03 07:06:22 +00003205/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003206 the section is not yet closed.
3207*/
3208static enum XML_Error
3209doCdataSection(XML_Parser parser,
3210 const ENCODING *enc,
3211 const char **startPtr,
3212 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003213 const char **nextPtr,
3214 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003215{
3216 const char *s = *startPtr;
3217 const char **eventPP;
3218 const char **eventEndPP;
3219 if (enc == encoding) {
3220 eventPP = &eventPtr;
3221 *eventPP = s;
3222 eventEndPP = &eventEndPtr;
3223 }
3224 else {
3225 eventPP = &(openInternalEntities->internalEventPtr);
3226 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3227 }
3228 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003229 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003230
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003231 for (;;) {
3232 const char *next;
3233 int tok = XmlCdataSectionTok(enc, s, end, &next);
3234 *eventEndPP = next;
3235 switch (tok) {
3236 case XML_TOK_CDATA_SECT_CLOSE:
3237 if (endCdataSectionHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003238 endCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003239#if 0
3240 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3241 else if (characterDataHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003242 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003243#endif
3244 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003245 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003246 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003247 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003248 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003249 return XML_ERROR_ABORTED;
3250 else
3251 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003252 case XML_TOK_DATA_NEWLINE:
3253 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003254 XML_Char c = 0xA;
3255 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003256 }
3257 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003258 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003259 break;
3260 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003261 {
3262 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3263 if (charDataHandler) {
3264 if (MUST_CONVERT(enc, s)) {
3265 for (;;) {
3266 ICHAR *dataPtr = (ICHAR *)dataBuf;
3267 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
3268 *eventEndPP = next;
3269 charDataHandler(handlerArg, dataBuf,
3270 (int)(dataPtr - (ICHAR *)dataBuf));
3271 if (s == next)
3272 break;
3273 *eventPP = s;
3274 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003275 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003276 else
3277 charDataHandler(handlerArg,
3278 (XML_Char *)s,
3279 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003280 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003281 else if (defaultHandler)
3282 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003283 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003284 break;
3285 case XML_TOK_INVALID:
3286 *eventPP = next;
3287 return XML_ERROR_INVALID_TOKEN;
3288 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003289 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003290 *nextPtr = s;
3291 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003292 }
3293 return XML_ERROR_PARTIAL_CHAR;
3294 case XML_TOK_PARTIAL:
3295 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003296 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003297 *nextPtr = s;
3298 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003299 }
3300 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3301 default:
3302 *eventPP = next;
3303 return XML_ERROR_UNEXPECTED_STATE;
3304 }
Fred Drake31d485c2004-08-03 07:06:22 +00003305
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003306 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003307 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003308 case XML_SUSPENDED:
3309 *nextPtr = next;
3310 return XML_ERROR_NONE;
3311 case XML_FINISHED:
3312 return XML_ERROR_ABORTED;
3313 default: ;
3314 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003315 }
3316 /* not reached */
3317}
3318
3319#ifdef XML_DTD
3320
3321/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003322 the whole file is parsed with one call.
3323*/
3324static enum XML_Error PTRCALL
3325ignoreSectionProcessor(XML_Parser parser,
3326 const char *start,
3327 const char *end,
3328 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003329{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003330 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003331 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003332 if (result != XML_ERROR_NONE)
3333 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003334 if (start) {
3335 processor = prologProcessor;
3336 return prologProcessor(parser, start, end, endPtr);
3337 }
3338 return result;
3339}
3340
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003341/* startPtr gets set to non-null is the section is closed, and to null
3342 if the section is not yet closed.
3343*/
3344static enum XML_Error
3345doIgnoreSection(XML_Parser parser,
3346 const ENCODING *enc,
3347 const char **startPtr,
3348 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003349 const char **nextPtr,
3350 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003351{
3352 const char *next;
3353 int tok;
3354 const char *s = *startPtr;
3355 const char **eventPP;
3356 const char **eventEndPP;
3357 if (enc == encoding) {
3358 eventPP = &eventPtr;
3359 *eventPP = s;
3360 eventEndPP = &eventEndPtr;
3361 }
3362 else {
3363 eventPP = &(openInternalEntities->internalEventPtr);
3364 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3365 }
3366 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003367 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003368 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3369 *eventEndPP = next;
3370 switch (tok) {
3371 case XML_TOK_IGNORE_SECT:
3372 if (defaultHandler)
3373 reportDefault(parser, enc, s, next);
3374 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003375 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003376 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003377 return XML_ERROR_ABORTED;
3378 else
3379 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003380 case XML_TOK_INVALID:
3381 *eventPP = next;
3382 return XML_ERROR_INVALID_TOKEN;
3383 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003384 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003385 *nextPtr = s;
3386 return XML_ERROR_NONE;
3387 }
3388 return XML_ERROR_PARTIAL_CHAR;
3389 case XML_TOK_PARTIAL:
3390 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003391 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003392 *nextPtr = s;
3393 return XML_ERROR_NONE;
3394 }
3395 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3396 default:
3397 *eventPP = next;
3398 return XML_ERROR_UNEXPECTED_STATE;
3399 }
3400 /* not reached */
3401}
3402
3403#endif /* XML_DTD */
3404
3405static enum XML_Error
3406initializeEncoding(XML_Parser parser)
3407{
3408 const char *s;
3409#ifdef XML_UNICODE
3410 char encodingBuf[128];
3411 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003412 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003413 else {
3414 int i;
3415 for (i = 0; protocolEncodingName[i]; i++) {
3416 if (i == sizeof(encodingBuf) - 1
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003417 || (protocolEncodingName[i] & ~0x7f) != 0) {
3418 encodingBuf[0] = '\0';
3419 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003420 }
3421 encodingBuf[i] = (char)protocolEncodingName[i];
3422 }
3423 encodingBuf[i] = '\0';
3424 s = encodingBuf;
3425 }
3426#else
3427 s = protocolEncodingName;
3428#endif
3429 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3430 return XML_ERROR_NONE;
3431 return handleUnknownEncoding(parser, protocolEncodingName);
3432}
3433
3434static enum XML_Error
3435processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003436 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003437{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003438 const char *encodingName = NULL;
3439 const XML_Char *storedEncName = NULL;
3440 const ENCODING *newEncoding = NULL;
3441 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003442 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003443 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003444 int standalone = -1;
3445 if (!(ns
3446 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003447 : XmlParseXmlDecl)(isGeneralTextEntity,
3448 encoding,
3449 s,
3450 next,
3451 &eventPtr,
3452 &version,
3453 &versionend,
3454 &encodingName,
3455 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003456 &standalone)) {
3457 if (isGeneralTextEntity)
3458 return XML_ERROR_TEXT_DECL;
3459 else
3460 return XML_ERROR_XML_DECL;
3461 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003462 if (!isGeneralTextEntity && standalone == 1) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003463 _dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003464#ifdef XML_DTD
3465 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3466 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3467#endif /* XML_DTD */
3468 }
3469 if (xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003470 if (encodingName != NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003471 storedEncName = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003472 encoding,
3473 encodingName,
3474 encodingName
3475 + XmlNameLength(encoding, encodingName));
3476 if (!storedEncName)
3477 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003478 poolFinish(&temp2Pool);
3479 }
3480 if (version) {
3481 storedversion = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003482 encoding,
3483 version,
3484 versionend - encoding->minBytesPerChar);
3485 if (!storedversion)
3486 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003487 }
3488 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
3489 }
3490 else if (defaultHandler)
3491 reportDefault(parser, encoding, s, next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003492 if (protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003493 if (newEncoding) {
3494 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003495 eventPtr = encodingName;
3496 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003497 }
3498 encoding = newEncoding;
3499 }
3500 else if (encodingName) {
3501 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003502 if (!storedEncName) {
3503 storedEncName = poolStoreString(
3504 &temp2Pool, encoding, encodingName,
3505 encodingName + XmlNameLength(encoding, encodingName));
3506 if (!storedEncName)
3507 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003508 }
3509 result = handleUnknownEncoding(parser, storedEncName);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003510 poolClear(&temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003511 if (result == XML_ERROR_UNKNOWN_ENCODING)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003512 eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003513 return result;
3514 }
3515 }
3516
3517 if (storedEncName || storedversion)
3518 poolClear(&temp2Pool);
3519
3520 return XML_ERROR_NONE;
3521}
3522
3523static enum XML_Error
3524handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
3525{
3526 if (unknownEncodingHandler) {
3527 XML_Encoding info;
3528 int i;
3529 for (i = 0; i < 256; i++)
3530 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003531 info.convert = NULL;
3532 info.data = NULL;
3533 info.release = NULL;
3534 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
3535 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003536 ENCODING *enc;
3537 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
3538 if (!unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003539 if (info.release)
3540 info.release(info.data);
3541 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003542 }
3543 enc = (ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003544 ? XmlInitUnknownEncodingNS
3545 : XmlInitUnknownEncoding)(unknownEncodingMem,
3546 info.map,
3547 info.convert,
3548 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003549 if (enc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003550 unknownEncodingData = info.data;
3551 unknownEncodingRelease = info.release;
3552 encoding = enc;
3553 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003554 }
3555 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003556 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003557 info.release(info.data);
3558 }
3559 return XML_ERROR_UNKNOWN_ENCODING;
3560}
3561
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003562static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003563prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003564 const char *s,
3565 const char *end,
3566 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003567{
3568 enum XML_Error result = initializeEncoding(parser);
3569 if (result != XML_ERROR_NONE)
3570 return result;
3571 processor = prologProcessor;
3572 return prologProcessor(parser, s, end, nextPtr);
3573}
3574
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003575#ifdef XML_DTD
3576
3577static enum XML_Error PTRCALL
3578externalParEntInitProcessor(XML_Parser parser,
3579 const char *s,
3580 const char *end,
3581 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003582{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003583 enum XML_Error result = initializeEncoding(parser);
3584 if (result != XML_ERROR_NONE)
3585 return result;
3586
3587 /* we know now that XML_Parse(Buffer) has been called,
3588 so we consider the external parameter entity read */
3589 _dtd->paramEntityRead = XML_TRUE;
3590
3591 if (prologState.inEntityValue) {
3592 processor = entityValueInitProcessor;
3593 return entityValueInitProcessor(parser, s, end, nextPtr);
3594 }
3595 else {
3596 processor = externalParEntProcessor;
3597 return externalParEntProcessor(parser, s, end, nextPtr);
3598 }
3599}
3600
3601static enum XML_Error PTRCALL
3602entityValueInitProcessor(XML_Parser parser,
3603 const char *s,
3604 const char *end,
3605 const char **nextPtr)
3606{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003607 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00003608 const char *start = s;
3609 const char *next = start;
3610 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003611
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003612 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003613 tok = XmlPrologTok(encoding, start, end, &next);
Fred Drake31d485c2004-08-03 07:06:22 +00003614 eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003615 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003616 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00003617 *nextPtr = s;
3618 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003619 }
3620 switch (tok) {
3621 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00003622 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003623 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00003624 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003625 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003626 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003627 case XML_TOK_NONE: /* start == end */
3628 default:
3629 break;
3630 }
Fred Drake31d485c2004-08-03 07:06:22 +00003631 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003632 return storeEntityValue(parser, encoding, s, end);
3633 }
3634 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00003635 enum XML_Error result;
3636 result = processXmlDecl(parser, 0, start, next);
3637 if (result != XML_ERROR_NONE)
3638 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003639 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003640 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003641 *nextPtr = next;
3642 return XML_ERROR_NONE;
3643 case XML_FINISHED:
3644 return XML_ERROR_ABORTED;
3645 default:
3646 *nextPtr = next;
3647 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003648 /* stop scanning for text declaration - we found one */
3649 processor = entityValueProcessor;
3650 return entityValueProcessor(parser, next, end, nextPtr);
3651 }
3652 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3653 return XML_TOK_NONE on the next call, which would then cause the
3654 function to exit with *nextPtr set to s - that is what we want for other
3655 tokens, but not for the BOM - we would rather like to skip it;
3656 then, when this routine is entered the next time, XmlPrologTok will
3657 return XML_TOK_INVALID, since the BOM is still in the buffer
3658 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003659 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003660 *nextPtr = next;
3661 return XML_ERROR_NONE;
3662 }
3663 start = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003664 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003665 }
3666}
3667
3668static enum XML_Error PTRCALL
3669externalParEntProcessor(XML_Parser parser,
3670 const char *s,
3671 const char *end,
3672 const char **nextPtr)
3673{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003674 const char *next = s;
3675 int tok;
3676
Fred Drake31d485c2004-08-03 07:06:22 +00003677 tok = XmlPrologTok(encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003678 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003679 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003680 *nextPtr = s;
3681 return XML_ERROR_NONE;
3682 }
3683 switch (tok) {
3684 case XML_TOK_INVALID:
3685 return XML_ERROR_INVALID_TOKEN;
3686 case XML_TOK_PARTIAL:
3687 return XML_ERROR_UNCLOSED_TOKEN;
3688 case XML_TOK_PARTIAL_CHAR:
3689 return XML_ERROR_PARTIAL_CHAR;
3690 case XML_TOK_NONE: /* start == end */
3691 default:
3692 break;
3693 }
3694 }
3695 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
3696 However, when parsing an external subset, doProlog will not accept a BOM
3697 as valid, and report a syntax error, so we have to skip the BOM
3698 */
3699 else if (tok == XML_TOK_BOM) {
3700 s = next;
3701 tok = XmlPrologTok(encoding, s, end, &next);
3702 }
3703
3704 processor = prologProcessor;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003705 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003706 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003707}
3708
3709static enum XML_Error PTRCALL
3710entityValueProcessor(XML_Parser parser,
3711 const char *s,
3712 const char *end,
3713 const char **nextPtr)
3714{
3715 const char *start = s;
3716 const char *next = s;
3717 const ENCODING *enc = encoding;
3718 int tok;
3719
3720 for (;;) {
3721 tok = XmlPrologTok(enc, start, end, &next);
3722 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003723 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003724 *nextPtr = s;
3725 return XML_ERROR_NONE;
3726 }
3727 switch (tok) {
3728 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00003729 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003730 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00003731 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003732 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003733 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003734 case XML_TOK_NONE: /* start == end */
3735 default:
3736 break;
3737 }
Fred Drake31d485c2004-08-03 07:06:22 +00003738 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003739 return storeEntityValue(parser, enc, s, end);
3740 }
3741 start = next;
3742 }
3743}
3744
3745#endif /* XML_DTD */
3746
3747static enum XML_Error PTRCALL
3748prologProcessor(XML_Parser parser,
3749 const char *s,
3750 const char *end,
3751 const char **nextPtr)
3752{
3753 const char *next = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003754 int tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003755 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003756 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003757}
3758
3759static enum XML_Error
3760doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003761 const ENCODING *enc,
3762 const char *s,
3763 const char *end,
3764 int tok,
3765 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00003766 const char **nextPtr,
3767 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003768{
3769#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003770 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003771#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003772 static const XML_Char atypeCDATA[] =
3773 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
3774 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
3775 static const XML_Char atypeIDREF[] =
3776 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
3777 static const XML_Char atypeIDREFS[] =
3778 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
3779 static const XML_Char atypeENTITY[] =
3780 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
3781 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
3782 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003783 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003784 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
3785 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
3786 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
3787 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
3788 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
3789 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
3790 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003791
Fred Drake31d485c2004-08-03 07:06:22 +00003792 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003793 DTD * const dtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003794
3795 const char **eventPP;
3796 const char **eventEndPP;
3797 enum XML_Content_Quant quant;
3798
3799 if (enc == encoding) {
3800 eventPP = &eventPtr;
3801 eventEndPP = &eventEndPtr;
3802 }
3803 else {
3804 eventPP = &(openInternalEntities->internalEventPtr);
3805 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3806 }
Fred Drake31d485c2004-08-03 07:06:22 +00003807
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003808 for (;;) {
3809 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003810 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003811 *eventPP = s;
3812 *eventEndPP = next;
3813 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00003814 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003815 *nextPtr = s;
3816 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003817 }
3818 switch (tok) {
3819 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003820 *eventPP = next;
3821 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003822 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003823 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003824 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003825 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00003826 case -XML_TOK_PROLOG_S:
3827 tok = -tok;
3828 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003829 case XML_TOK_NONE:
3830#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00003831 /* for internal PE NOT referenced between declarations */
3832 if (enc != encoding && !openInternalEntities->betweenDecl) {
3833 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003834 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00003835 }
3836 /* WFC: PE Between Declarations - must check that PE contains
3837 complete markup, not only for external PEs, but also for
3838 internal PEs if the reference occurs between declarations.
3839 */
3840 if (isParamEntity || enc != encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003841 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
3842 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00003843 return XML_ERROR_INCOMPLETE_PE;
3844 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003845 return XML_ERROR_NONE;
3846 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003847#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003848 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003849 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003850 tok = -tok;
3851 next = end;
3852 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003853 }
3854 }
3855 role = XmlTokenRole(&prologState, tok, s, next, enc);
3856 switch (role) {
3857 case XML_ROLE_XML_DECL:
3858 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003859 enum XML_Error result = processXmlDecl(parser, 0, s, next);
3860 if (result != XML_ERROR_NONE)
3861 return result;
3862 enc = encoding;
3863 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003864 }
3865 break;
3866 case XML_ROLE_DOCTYPE_NAME:
3867 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003868 doctypeName = poolStoreString(&tempPool, enc, s, next);
3869 if (!doctypeName)
3870 return XML_ERROR_NO_MEMORY;
3871 poolFinish(&tempPool);
3872 doctypePubid = NULL;
3873 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003874 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003875 doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003876 break;
3877 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
3878 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003879 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
3880 doctypePubid, 1);
3881 doctypeName = NULL;
3882 poolClear(&tempPool);
3883 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003884 }
3885 break;
3886#ifdef XML_DTD
3887 case XML_ROLE_TEXT_DECL:
3888 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003889 enum XML_Error result = processXmlDecl(parser, 1, s, next);
3890 if (result != XML_ERROR_NONE)
3891 return result;
3892 enc = encoding;
3893 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003894 }
3895 break;
3896#endif /* XML_DTD */
3897 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003898#ifdef XML_DTD
3899 useForeignDTD = XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003900 declEntity = (ENTITY *)lookup(parser,
3901 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003902 externalSubsetName,
3903 sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003904 if (!declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003905 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003906#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00003907 dtd->hasParamEntityRefs = XML_TRUE;
3908 if (startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003909 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00003910 if (!XmlIsPublicId(enc, s, next, eventPP))
3911 return XML_ERROR_PUBLICID;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003912 pubId = poolStoreString(&tempPool, enc,
3913 s + enc->minBytesPerChar,
3914 next - enc->minBytesPerChar);
3915 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00003916 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003917 normalizePublicId(pubId);
Fred Drake31d485c2004-08-03 07:06:22 +00003918 poolFinish(&tempPool);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003919 doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00003920 handleDefault = XML_FALSE;
3921 goto alreadyChecked;
3922 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003923 /* fall through */
3924 case XML_ROLE_ENTITY_PUBLIC_ID:
3925 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00003926 return XML_ERROR_PUBLICID;
3927 alreadyChecked:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003928 if (dtd->keepProcessing && declEntity) {
3929 XML_Char *tem = poolStoreString(&dtd->pool,
3930 enc,
3931 s + enc->minBytesPerChar,
3932 next - enc->minBytesPerChar);
3933 if (!tem)
3934 return XML_ERROR_NO_MEMORY;
3935 normalizePublicId(tem);
3936 declEntity->publicId = tem;
3937 poolFinish(&dtd->pool);
3938 if (entityDeclHandler)
3939 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003940 }
3941 break;
3942 case XML_ROLE_DOCTYPE_CLOSE:
3943 if (doctypeName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003944 startDoctypeDeclHandler(handlerArg, doctypeName,
3945 doctypeSysid, doctypePubid, 0);
3946 poolClear(&tempPool);
3947 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003948 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003949 /* doctypeSysid will be non-NULL in the case of a previous
3950 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
3951 was not set, indicating an external subset
3952 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003953#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003954 if (doctypeSysid || useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003955 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3956 dtd->hasParamEntityRefs = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003957 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003958 ENTITY *entity = (ENTITY *)lookup(parser,
3959 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003960 externalSubsetName,
3961 sizeof(ENTITY));
3962 if (!entity)
3963 return XML_ERROR_NO_MEMORY;
3964 if (useForeignDTD)
3965 entity->base = curBase;
3966 dtd->paramEntityRead = XML_FALSE;
3967 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
3968 0,
3969 entity->base,
3970 entity->systemId,
3971 entity->publicId))
3972 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003973 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003974 if (!dtd->standalone &&
3975 notStandaloneHandler &&
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003976 !notStandaloneHandler(handlerArg))
3977 return XML_ERROR_NOT_STANDALONE;
3978 }
3979 /* if we didn't read the foreign DTD then this means that there
3980 is no external subset and we must reset dtd->hasParamEntityRefs
3981 */
3982 else if (!doctypeSysid)
3983 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003984 /* end of DTD - no need to update dtd->keepProcessing */
3985 }
3986 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003987 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003988#endif /* XML_DTD */
3989 if (endDoctypeDeclHandler) {
3990 endDoctypeDeclHandler(handlerArg);
3991 handleDefault = XML_FALSE;
3992 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003993 break;
3994 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003995#ifdef XML_DTD
3996 /* if there is no DOCTYPE declaration then now is the
3997 last chance to read the foreign DTD
3998 */
3999 if (useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004000 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004001 dtd->hasParamEntityRefs = XML_TRUE;
4002 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004003 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004004 externalSubsetName,
4005 sizeof(ENTITY));
4006 if (!entity)
4007 return XML_ERROR_NO_MEMORY;
4008 entity->base = curBase;
4009 dtd->paramEntityRead = XML_FALSE;
4010 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4011 0,
4012 entity->base,
4013 entity->systemId,
4014 entity->publicId))
4015 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004016 if (dtd->paramEntityRead) {
4017 if (!dtd->standalone &&
4018 notStandaloneHandler &&
4019 !notStandaloneHandler(handlerArg))
4020 return XML_ERROR_NOT_STANDALONE;
4021 }
4022 /* if we didn't read the foreign DTD then this means that there
4023 is no external subset and we must reset dtd->hasParamEntityRefs
4024 */
4025 else
4026 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004027 /* end of DTD - no need to update dtd->keepProcessing */
4028 }
4029 }
4030#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004031 processor = contentProcessor;
4032 return contentProcessor(parser, s, end, nextPtr);
4033 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4034 declElementType = getElementType(parser, enc, s, next);
4035 if (!declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004036 return XML_ERROR_NO_MEMORY;
4037 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004038 case XML_ROLE_ATTRIBUTE_NAME:
4039 declAttributeId = getAttributeId(parser, enc, s, next);
4040 if (!declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004041 return XML_ERROR_NO_MEMORY;
4042 declAttributeIsCdata = XML_FALSE;
4043 declAttributeType = NULL;
4044 declAttributeIsId = XML_FALSE;
4045 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004046 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004047 declAttributeIsCdata = XML_TRUE;
4048 declAttributeType = atypeCDATA;
4049 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004050 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004051 declAttributeIsId = XML_TRUE;
4052 declAttributeType = atypeID;
4053 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004054 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004055 declAttributeType = atypeIDREF;
4056 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004057 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004058 declAttributeType = atypeIDREFS;
4059 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004060 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004061 declAttributeType = atypeENTITY;
4062 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004063 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004064 declAttributeType = atypeENTITIES;
4065 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004066 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004067 declAttributeType = atypeNMTOKEN;
4068 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004069 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004070 declAttributeType = atypeNMTOKENS;
4071 checkAttListDeclHandler:
4072 if (dtd->keepProcessing && attlistDeclHandler)
4073 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004074 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004075 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4076 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004077 if (dtd->keepProcessing && attlistDeclHandler) {
4078 const XML_Char *prefix;
4079 if (declAttributeType) {
4080 prefix = enumValueSep;
4081 }
4082 else {
4083 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4084 ? notationPrefix
4085 : enumValueStart);
4086 }
4087 if (!poolAppendString(&tempPool, prefix))
4088 return XML_ERROR_NO_MEMORY;
4089 if (!poolAppend(&tempPool, enc, s, next))
4090 return XML_ERROR_NO_MEMORY;
4091 declAttributeType = tempPool.start;
4092 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004093 }
4094 break;
4095 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4096 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004097 if (dtd->keepProcessing) {
4098 if (!defineAttribute(declElementType, declAttributeId,
Fred Drake08317ae2003-10-21 15:38:55 +00004099 declAttributeIsCdata, declAttributeIsId,
4100 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004101 return XML_ERROR_NO_MEMORY;
4102 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004103 if (*declAttributeType == XML_T(ASCII_LPAREN)
4104 || (*declAttributeType == XML_T(ASCII_N)
4105 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004106 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004107 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004108 || !poolAppendChar(&tempPool, XML_T('\0')))
4109 return XML_ERROR_NO_MEMORY;
4110 declAttributeType = tempPool.start;
4111 poolFinish(&tempPool);
4112 }
4113 *eventEndPP = s;
4114 attlistDeclHandler(handlerArg, declElementType->name,
4115 declAttributeId->name, declAttributeType,
4116 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4117 poolClear(&tempPool);
4118 handleDefault = XML_FALSE;
4119 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004120 }
4121 break;
4122 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4123 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004124 if (dtd->keepProcessing) {
4125 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004126 enum XML_Error result =
4127 storeAttributeValue(parser, enc, declAttributeIsCdata,
4128 s + enc->minBytesPerChar,
4129 next - enc->minBytesPerChar,
4130 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004131 if (result)
4132 return result;
4133 attVal = poolStart(&dtd->pool);
4134 poolFinish(&dtd->pool);
4135 /* ID attributes aren't allowed to have a default */
4136 if (!defineAttribute(declElementType, declAttributeId,
4137 declAttributeIsCdata, XML_FALSE, attVal, parser))
4138 return XML_ERROR_NO_MEMORY;
4139 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004140 if (*declAttributeType == XML_T(ASCII_LPAREN)
4141 || (*declAttributeType == XML_T(ASCII_N)
4142 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004143 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004144 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004145 || !poolAppendChar(&tempPool, XML_T('\0')))
4146 return XML_ERROR_NO_MEMORY;
4147 declAttributeType = tempPool.start;
4148 poolFinish(&tempPool);
4149 }
4150 *eventEndPP = s;
4151 attlistDeclHandler(handlerArg, declElementType->name,
4152 declAttributeId->name, declAttributeType,
4153 attVal,
4154 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4155 poolClear(&tempPool);
4156 handleDefault = XML_FALSE;
4157 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004158 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004159 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004160 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004161 if (dtd->keepProcessing) {
4162 enum XML_Error result = storeEntityValue(parser, enc,
4163 s + enc->minBytesPerChar,
4164 next - enc->minBytesPerChar);
4165 if (declEntity) {
4166 declEntity->textPtr = poolStart(&dtd->entityValuePool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004167 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004168 poolFinish(&dtd->entityValuePool);
4169 if (entityDeclHandler) {
4170 *eventEndPP = s;
4171 entityDeclHandler(handlerArg,
4172 declEntity->name,
4173 declEntity->is_param,
4174 declEntity->textPtr,
4175 declEntity->textLen,
4176 curBase, 0, 0, 0);
4177 handleDefault = XML_FALSE;
4178 }
4179 }
4180 else
4181 poolDiscard(&dtd->entityValuePool);
4182 if (result != XML_ERROR_NONE)
4183 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004184 }
4185 break;
4186 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004187#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004188 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004189#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004190 dtd->hasParamEntityRefs = XML_TRUE;
4191 if (startDoctypeDeclHandler) {
4192 doctypeSysid = poolStoreString(&tempPool, enc,
4193 s + enc->minBytesPerChar,
4194 next - enc->minBytesPerChar);
4195 if (doctypeSysid == NULL)
4196 return XML_ERROR_NO_MEMORY;
4197 poolFinish(&tempPool);
4198 handleDefault = XML_FALSE;
4199 }
4200#ifdef XML_DTD
4201 else
4202 /* use externalSubsetName to make doctypeSysid non-NULL
4203 for the case where no startDoctypeDeclHandler is set */
4204 doctypeSysid = externalSubsetName;
4205#endif /* XML_DTD */
4206 if (!dtd->standalone
4207#ifdef XML_DTD
4208 && !paramEntityParsing
4209#endif /* XML_DTD */
4210 && notStandaloneHandler
4211 && !notStandaloneHandler(handlerArg))
4212 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004213#ifndef XML_DTD
4214 break;
4215#else /* XML_DTD */
4216 if (!declEntity) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004217 declEntity = (ENTITY *)lookup(parser,
4218 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004219 externalSubsetName,
4220 sizeof(ENTITY));
4221 if (!declEntity)
4222 return XML_ERROR_NO_MEMORY;
4223 declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004224 }
4225 /* fall through */
4226#endif /* XML_DTD */
4227 case XML_ROLE_ENTITY_SYSTEM_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004228 if (dtd->keepProcessing && declEntity) {
4229 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4230 s + enc->minBytesPerChar,
4231 next - enc->minBytesPerChar);
4232 if (!declEntity->systemId)
4233 return XML_ERROR_NO_MEMORY;
4234 declEntity->base = curBase;
4235 poolFinish(&dtd->pool);
4236 if (entityDeclHandler)
4237 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004238 }
4239 break;
4240 case XML_ROLE_ENTITY_COMPLETE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004241 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4242 *eventEndPP = s;
4243 entityDeclHandler(handlerArg,
4244 declEntity->name,
4245 declEntity->is_param,
4246 0,0,
4247 declEntity->base,
4248 declEntity->systemId,
4249 declEntity->publicId,
4250 0);
4251 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004252 }
4253 break;
4254 case XML_ROLE_ENTITY_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004255 if (dtd->keepProcessing && declEntity) {
4256 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4257 if (!declEntity->notation)
4258 return XML_ERROR_NO_MEMORY;
4259 poolFinish(&dtd->pool);
4260 if (unparsedEntityDeclHandler) {
4261 *eventEndPP = s;
4262 unparsedEntityDeclHandler(handlerArg,
4263 declEntity->name,
4264 declEntity->base,
4265 declEntity->systemId,
4266 declEntity->publicId,
4267 declEntity->notation);
4268 handleDefault = XML_FALSE;
4269 }
4270 else if (entityDeclHandler) {
4271 *eventEndPP = s;
4272 entityDeclHandler(handlerArg,
4273 declEntity->name,
4274 0,0,0,
4275 declEntity->base,
4276 declEntity->systemId,
4277 declEntity->publicId,
4278 declEntity->notation);
4279 handleDefault = XML_FALSE;
4280 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004281 }
4282 break;
4283 case XML_ROLE_GENERAL_ENTITY_NAME:
4284 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004285 if (XmlPredefinedEntityName(enc, s, next)) {
4286 declEntity = NULL;
4287 break;
4288 }
4289 if (dtd->keepProcessing) {
4290 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4291 if (!name)
4292 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004293 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004294 sizeof(ENTITY));
4295 if (!declEntity)
4296 return XML_ERROR_NO_MEMORY;
4297 if (declEntity->name != name) {
4298 poolDiscard(&dtd->pool);
4299 declEntity = NULL;
4300 }
4301 else {
4302 poolFinish(&dtd->pool);
4303 declEntity->publicId = NULL;
4304 declEntity->is_param = XML_FALSE;
4305 /* if we have a parent parser or are reading an internal parameter
4306 entity, then the entity declaration is not considered "internal"
4307 */
4308 declEntity->is_internal = !(parentParser || openInternalEntities);
4309 if (entityDeclHandler)
4310 handleDefault = XML_FALSE;
4311 }
4312 }
4313 else {
4314 poolDiscard(&dtd->pool);
4315 declEntity = NULL;
4316 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004317 }
4318 break;
4319 case XML_ROLE_PARAM_ENTITY_NAME:
4320#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004321 if (dtd->keepProcessing) {
4322 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4323 if (!name)
4324 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004325 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004326 name, sizeof(ENTITY));
4327 if (!declEntity)
4328 return XML_ERROR_NO_MEMORY;
4329 if (declEntity->name != name) {
4330 poolDiscard(&dtd->pool);
4331 declEntity = NULL;
4332 }
4333 else {
4334 poolFinish(&dtd->pool);
4335 declEntity->publicId = NULL;
4336 declEntity->is_param = XML_TRUE;
4337 /* if we have a parent parser or are reading an internal parameter
4338 entity, then the entity declaration is not considered "internal"
4339 */
4340 declEntity->is_internal = !(parentParser || openInternalEntities);
4341 if (entityDeclHandler)
4342 handleDefault = XML_FALSE;
4343 }
4344 }
4345 else {
4346 poolDiscard(&dtd->pool);
4347 declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004348 }
4349#else /* not XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004350 declEntity = NULL;
4351#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004352 break;
4353 case XML_ROLE_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004354 declNotationPublicId = NULL;
4355 declNotationName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004356 if (notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004357 declNotationName = poolStoreString(&tempPool, enc, s, next);
4358 if (!declNotationName)
4359 return XML_ERROR_NO_MEMORY;
4360 poolFinish(&tempPool);
4361 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004362 }
4363 break;
4364 case XML_ROLE_NOTATION_PUBLIC_ID:
4365 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004366 return XML_ERROR_PUBLICID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004367 if (declNotationName) { /* means notationDeclHandler != NULL */
4368 XML_Char *tem = poolStoreString(&tempPool,
4369 enc,
4370 s + enc->minBytesPerChar,
4371 next - enc->minBytesPerChar);
4372 if (!tem)
4373 return XML_ERROR_NO_MEMORY;
4374 normalizePublicId(tem);
4375 declNotationPublicId = tem;
4376 poolFinish(&tempPool);
4377 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004378 }
4379 break;
4380 case XML_ROLE_NOTATION_SYSTEM_ID:
4381 if (declNotationName && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004382 const XML_Char *systemId
4383 = poolStoreString(&tempPool, enc,
4384 s + enc->minBytesPerChar,
4385 next - enc->minBytesPerChar);
4386 if (!systemId)
4387 return XML_ERROR_NO_MEMORY;
4388 *eventEndPP = s;
4389 notationDeclHandler(handlerArg,
4390 declNotationName,
4391 curBase,
4392 systemId,
4393 declNotationPublicId);
4394 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004395 }
4396 poolClear(&tempPool);
4397 break;
4398 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4399 if (declNotationPublicId && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004400 *eventEndPP = s;
4401 notationDeclHandler(handlerArg,
4402 declNotationName,
4403 curBase,
4404 0,
4405 declNotationPublicId);
4406 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004407 }
4408 poolClear(&tempPool);
4409 break;
4410 case XML_ROLE_ERROR:
4411 switch (tok) {
4412 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004413 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004414 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004415 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004416 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004417 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004418 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004419 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004420 }
4421#ifdef XML_DTD
4422 case XML_ROLE_IGNORE_SECT:
4423 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004424 enum XML_Error result;
4425 if (defaultHandler)
4426 reportDefault(parser, enc, s, next);
4427 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004428 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4429 if (result != XML_ERROR_NONE)
4430 return result;
4431 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004432 processor = ignoreSectionProcessor;
4433 return result;
4434 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004435 }
4436 break;
4437#endif /* XML_DTD */
4438 case XML_ROLE_GROUP_OPEN:
4439 if (prologState.level >= groupSize) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004440 if (groupSize) {
4441 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
4442 if (temp == NULL)
4443 return XML_ERROR_NO_MEMORY;
4444 groupConnector = temp;
4445 if (dtd->scaffIndex) {
4446 int *temp = (int *)REALLOC(dtd->scaffIndex,
4447 groupSize * sizeof(int));
4448 if (temp == NULL)
4449 return XML_ERROR_NO_MEMORY;
4450 dtd->scaffIndex = temp;
4451 }
4452 }
4453 else {
4454 groupConnector = (char *)MALLOC(groupSize = 32);
4455 if (!groupConnector)
4456 return XML_ERROR_NO_MEMORY;
4457 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004458 }
4459 groupConnector[prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004460 if (dtd->in_eldecl) {
4461 int myindex = nextScaffoldPart(parser);
4462 if (myindex < 0)
4463 return XML_ERROR_NO_MEMORY;
4464 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4465 dtd->scaffLevel++;
4466 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4467 if (elementDeclHandler)
4468 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004469 }
4470 break;
4471 case XML_ROLE_GROUP_SEQUENCE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004472 if (groupConnector[prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004473 return XML_ERROR_SYNTAX;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004474 groupConnector[prologState.level] = ASCII_COMMA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004475 if (dtd->in_eldecl && elementDeclHandler)
4476 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004477 break;
4478 case XML_ROLE_GROUP_CHOICE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004479 if (groupConnector[prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004480 return XML_ERROR_SYNTAX;
4481 if (dtd->in_eldecl
4482 && !groupConnector[prologState.level]
4483 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4484 != XML_CTYPE_MIXED)
4485 ) {
4486 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4487 = XML_CTYPE_CHOICE;
4488 if (elementDeclHandler)
4489 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004490 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004491 groupConnector[prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004492 break;
4493 case XML_ROLE_PARAM_ENTITY_REF:
4494#ifdef XML_DTD
4495 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004496 dtd->hasParamEntityRefs = XML_TRUE;
4497 if (!paramEntityParsing)
4498 dtd->keepProcessing = dtd->standalone;
4499 else {
4500 const XML_Char *name;
4501 ENTITY *entity;
4502 name = poolStoreString(&dtd->pool, enc,
4503 s + enc->minBytesPerChar,
4504 next - enc->minBytesPerChar);
4505 if (!name)
4506 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004507 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004508 poolDiscard(&dtd->pool);
4509 /* first, determine if a check for an existing declaration is needed;
4510 if yes, check that the entity exists, and that it is internal,
4511 otherwise call the skipped entity handler
4512 */
4513 if (prologState.documentEntity &&
4514 (dtd->standalone
4515 ? !openInternalEntities
4516 : !dtd->hasParamEntityRefs)) {
4517 if (!entity)
4518 return XML_ERROR_UNDEFINED_ENTITY;
4519 else if (!entity->is_internal)
4520 return XML_ERROR_ENTITY_DECLARED_IN_PE;
4521 }
4522 else if (!entity) {
4523 dtd->keepProcessing = dtd->standalone;
4524 /* cannot report skipped entities in declarations */
4525 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
4526 skippedEntityHandler(handlerArg, name, 1);
4527 handleDefault = XML_FALSE;
4528 }
4529 break;
4530 }
4531 if (entity->open)
4532 return XML_ERROR_RECURSIVE_ENTITY_REF;
4533 if (entity->textPtr) {
4534 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004535 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00004536 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
4537 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004538 if (result != XML_ERROR_NONE)
4539 return result;
4540 handleDefault = XML_FALSE;
4541 break;
4542 }
4543 if (externalEntityRefHandler) {
4544 dtd->paramEntityRead = XML_FALSE;
4545 entity->open = XML_TRUE;
4546 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4547 0,
4548 entity->base,
4549 entity->systemId,
4550 entity->publicId)) {
4551 entity->open = XML_FALSE;
4552 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4553 }
4554 entity->open = XML_FALSE;
4555 handleDefault = XML_FALSE;
4556 if (!dtd->paramEntityRead) {
4557 dtd->keepProcessing = dtd->standalone;
4558 break;
4559 }
4560 }
4561 else {
4562 dtd->keepProcessing = dtd->standalone;
4563 break;
4564 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004565 }
4566#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004567 if (!dtd->standalone &&
4568 notStandaloneHandler &&
4569 !notStandaloneHandler(handlerArg))
4570 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004571 break;
4572
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004573 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004574
4575 case XML_ROLE_ELEMENT_NAME:
4576 if (elementDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004577 declElementType = getElementType(parser, enc, s, next);
4578 if (!declElementType)
4579 return XML_ERROR_NO_MEMORY;
4580 dtd->scaffLevel = 0;
4581 dtd->scaffCount = 0;
4582 dtd->in_eldecl = XML_TRUE;
4583 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004584 }
4585 break;
4586
4587 case XML_ROLE_CONTENT_ANY:
4588 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004589 if (dtd->in_eldecl) {
4590 if (elementDeclHandler) {
4591 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
4592 if (!content)
4593 return XML_ERROR_NO_MEMORY;
4594 content->quant = XML_CQUANT_NONE;
4595 content->name = NULL;
4596 content->numchildren = 0;
4597 content->children = NULL;
4598 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
4599 XML_CTYPE_ANY :
4600 XML_CTYPE_EMPTY);
4601 *eventEndPP = s;
4602 elementDeclHandler(handlerArg, declElementType->name, content);
4603 handleDefault = XML_FALSE;
4604 }
4605 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004606 }
4607 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004608
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004609 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004610 if (dtd->in_eldecl) {
4611 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4612 = XML_CTYPE_MIXED;
4613 if (elementDeclHandler)
4614 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004615 }
4616 break;
4617
4618 case XML_ROLE_CONTENT_ELEMENT:
4619 quant = XML_CQUANT_NONE;
4620 goto elementContent;
4621 case XML_ROLE_CONTENT_ELEMENT_OPT:
4622 quant = XML_CQUANT_OPT;
4623 goto elementContent;
4624 case XML_ROLE_CONTENT_ELEMENT_REP:
4625 quant = XML_CQUANT_REP;
4626 goto elementContent;
4627 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4628 quant = XML_CQUANT_PLUS;
4629 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004630 if (dtd->in_eldecl) {
4631 ELEMENT_TYPE *el;
4632 const XML_Char *name;
4633 int nameLen;
4634 const char *nxt = (quant == XML_CQUANT_NONE
4635 ? next
4636 : next - enc->minBytesPerChar);
4637 int myindex = nextScaffoldPart(parser);
4638 if (myindex < 0)
4639 return XML_ERROR_NO_MEMORY;
4640 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4641 dtd->scaffold[myindex].quant = quant;
4642 el = getElementType(parser, enc, s, nxt);
4643 if (!el)
4644 return XML_ERROR_NO_MEMORY;
4645 name = el->name;
4646 dtd->scaffold[myindex].name = name;
4647 nameLen = 0;
4648 for (; name[nameLen++]; );
4649 dtd->contentStringLen += nameLen;
4650 if (elementDeclHandler)
4651 handleDefault = XML_FALSE;
4652 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004653 break;
4654
4655 case XML_ROLE_GROUP_CLOSE:
4656 quant = XML_CQUANT_NONE;
4657 goto closeGroup;
4658 case XML_ROLE_GROUP_CLOSE_OPT:
4659 quant = XML_CQUANT_OPT;
4660 goto closeGroup;
4661 case XML_ROLE_GROUP_CLOSE_REP:
4662 quant = XML_CQUANT_REP;
4663 goto closeGroup;
4664 case XML_ROLE_GROUP_CLOSE_PLUS:
4665 quant = XML_CQUANT_PLUS;
4666 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004667 if (dtd->in_eldecl) {
4668 if (elementDeclHandler)
4669 handleDefault = XML_FALSE;
4670 dtd->scaffLevel--;
4671 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
4672 if (dtd->scaffLevel == 0) {
4673 if (!handleDefault) {
4674 XML_Content *model = build_model(parser);
4675 if (!model)
4676 return XML_ERROR_NO_MEMORY;
4677 *eventEndPP = s;
4678 elementDeclHandler(handlerArg, declElementType->name, model);
4679 }
4680 dtd->in_eldecl = XML_FALSE;
4681 dtd->contentStringLen = 0;
4682 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004683 }
4684 break;
4685 /* End element declaration stuff */
4686
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004687 case XML_ROLE_PI:
4688 if (!reportProcessingInstruction(parser, enc, s, next))
4689 return XML_ERROR_NO_MEMORY;
4690 handleDefault = XML_FALSE;
4691 break;
4692 case XML_ROLE_COMMENT:
4693 if (!reportComment(parser, enc, s, next))
4694 return XML_ERROR_NO_MEMORY;
4695 handleDefault = XML_FALSE;
4696 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004697 case XML_ROLE_NONE:
4698 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004699 case XML_TOK_BOM:
4700 handleDefault = XML_FALSE;
4701 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004702 }
4703 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004704 case XML_ROLE_DOCTYPE_NONE:
4705 if (startDoctypeDeclHandler)
4706 handleDefault = XML_FALSE;
4707 break;
4708 case XML_ROLE_ENTITY_NONE:
4709 if (dtd->keepProcessing && entityDeclHandler)
4710 handleDefault = XML_FALSE;
4711 break;
4712 case XML_ROLE_NOTATION_NONE:
4713 if (notationDeclHandler)
4714 handleDefault = XML_FALSE;
4715 break;
4716 case XML_ROLE_ATTLIST_NONE:
4717 if (dtd->keepProcessing && attlistDeclHandler)
4718 handleDefault = XML_FALSE;
4719 break;
4720 case XML_ROLE_ELEMENT_NONE:
4721 if (elementDeclHandler)
4722 handleDefault = XML_FALSE;
4723 break;
4724 } /* end of big switch */
4725
4726 if (handleDefault && defaultHandler)
4727 reportDefault(parser, enc, s, next);
4728
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004729 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004730 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00004731 *nextPtr = next;
4732 return XML_ERROR_NONE;
4733 case XML_FINISHED:
4734 return XML_ERROR_ABORTED;
4735 default:
4736 s = next;
4737 tok = XmlPrologTok(enc, s, end, &next);
4738 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004739 }
4740 /* not reached */
4741}
4742
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004743static enum XML_Error PTRCALL
4744epilogProcessor(XML_Parser parser,
4745 const char *s,
4746 const char *end,
4747 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004748{
4749 processor = epilogProcessor;
4750 eventPtr = s;
4751 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004752 const char *next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004753 int tok = XmlPrologTok(encoding, s, end, &next);
4754 eventEndPtr = next;
4755 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004756 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004757 case -XML_TOK_PROLOG_S:
4758 if (defaultHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004759 reportDefault(parser, encoding, s, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004760 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004761 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004762 }
Fred Drake31d485c2004-08-03 07:06:22 +00004763 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004764 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004765 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00004766 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004767 return XML_ERROR_NONE;
4768 case XML_TOK_PROLOG_S:
4769 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004770 reportDefault(parser, encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004771 break;
4772 case XML_TOK_PI:
4773 if (!reportProcessingInstruction(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004774 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004775 break;
4776 case XML_TOK_COMMENT:
4777 if (!reportComment(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004778 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004779 break;
4780 case XML_TOK_INVALID:
4781 eventPtr = next;
4782 return XML_ERROR_INVALID_TOKEN;
4783 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004784 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004785 *nextPtr = s;
4786 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004787 }
4788 return XML_ERROR_UNCLOSED_TOKEN;
4789 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004790 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004791 *nextPtr = s;
4792 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004793 }
4794 return XML_ERROR_PARTIAL_CHAR;
4795 default:
4796 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
4797 }
4798 eventPtr = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004799 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004800 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00004801 *nextPtr = next;
4802 return XML_ERROR_NONE;
4803 case XML_FINISHED:
4804 return XML_ERROR_ABORTED;
4805 default: ;
4806 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004807 }
4808}
4809
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004810static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00004811processInternalEntity(XML_Parser parser, ENTITY *entity,
4812 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004813{
Fred Drake31d485c2004-08-03 07:06:22 +00004814 const char *textStart, *textEnd;
4815 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004816 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00004817 OPEN_INTERNAL_ENTITY *openEntity;
4818
4819 if (freeInternalEntities) {
4820 openEntity = freeInternalEntities;
4821 freeInternalEntities = openEntity->next;
4822 }
4823 else {
4824 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
4825 if (!openEntity)
4826 return XML_ERROR_NO_MEMORY;
4827 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004828 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00004829 entity->processed = 0;
4830 openEntity->next = openInternalEntities;
4831 openInternalEntities = openEntity;
4832 openEntity->entity = entity;
4833 openEntity->startTagLevel = tagLevel;
4834 openEntity->betweenDecl = betweenDecl;
4835 openEntity->internalEventPtr = NULL;
4836 openEntity->internalEventEndPtr = NULL;
4837 textStart = (char *)entity->textPtr;
4838 textEnd = (char *)(entity->textPtr + entity->textLen);
4839
4840#ifdef XML_DTD
4841 if (entity->is_param) {
4842 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004843 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00004844 next, &next, XML_FALSE);
4845 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004846 else
Fred Drake31d485c2004-08-03 07:06:22 +00004847#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004848 result = doContent(parser, tagLevel, internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00004849 textEnd, &next, XML_FALSE);
4850
4851 if (result == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004852 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4853 entity->processed = (int)(next - textStart);
Fred Drake31d485c2004-08-03 07:06:22 +00004854 processor = internalEntityProcessor;
4855 }
4856 else {
4857 entity->open = XML_FALSE;
4858 openInternalEntities = openEntity->next;
4859 /* put openEntity back in list of free instances */
4860 openEntity->next = freeInternalEntities;
4861 freeInternalEntities = openEntity;
4862 }
4863 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004864 return result;
4865}
4866
Fred Drake31d485c2004-08-03 07:06:22 +00004867static enum XML_Error PTRCALL
4868internalEntityProcessor(XML_Parser parser,
4869 const char *s,
4870 const char *end,
4871 const char **nextPtr)
4872{
4873 ENTITY *entity;
4874 const char *textStart, *textEnd;
4875 const char *next;
4876 enum XML_Error result;
4877 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
4878 if (!openEntity)
4879 return XML_ERROR_UNEXPECTED_STATE;
4880
4881 entity = openEntity->entity;
4882 textStart = ((char *)entity->textPtr) + entity->processed;
4883 textEnd = (char *)(entity->textPtr + entity->textLen);
4884
4885#ifdef XML_DTD
4886 if (entity->is_param) {
4887 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004888 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00004889 next, &next, XML_FALSE);
4890 }
4891 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004892#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004893 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
4894 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00004895
4896 if (result != XML_ERROR_NONE)
4897 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004898 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4899 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00004900 return result;
4901 }
4902 else {
4903 entity->open = XML_FALSE;
4904 openInternalEntities = openEntity->next;
4905 /* put openEntity back in list of free instances */
4906 openEntity->next = freeInternalEntities;
4907 freeInternalEntities = openEntity;
4908 }
4909
4910#ifdef XML_DTD
4911 if (entity->is_param) {
4912 int tok;
4913 processor = prologProcessor;
4914 tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004915 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004916 (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00004917 }
4918 else
4919#endif /* XML_DTD */
4920 {
4921 processor = contentProcessor;
4922 /* see externalEntityContentProcessor vs contentProcessor */
4923 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004924 nextPtr, (XML_Bool)!ps_finalBuffer);
4925 }
Fred Drake31d485c2004-08-03 07:06:22 +00004926}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004927
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004928static enum XML_Error PTRCALL
4929errorProcessor(XML_Parser parser,
4930 const char *s,
4931 const char *end,
4932 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004933{
4934 return errorCode;
4935}
4936
4937static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004938storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4939 const char *ptr, const char *end,
4940 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004941{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004942 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
4943 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004944 if (result)
4945 return result;
4946 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
4947 poolChop(pool);
4948 if (!poolAppendChar(pool, XML_T('\0')))
4949 return XML_ERROR_NO_MEMORY;
4950 return XML_ERROR_NONE;
4951}
4952
4953static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004954appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4955 const char *ptr, const char *end,
4956 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004957{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004958 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004959 for (;;) {
4960 const char *next;
4961 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
4962 switch (tok) {
4963 case XML_TOK_NONE:
4964 return XML_ERROR_NONE;
4965 case XML_TOK_INVALID:
4966 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004967 eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004968 return XML_ERROR_INVALID_TOKEN;
4969 case XML_TOK_PARTIAL:
4970 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004971 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004972 return XML_ERROR_INVALID_TOKEN;
4973 case XML_TOK_CHAR_REF:
4974 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004975 XML_Char buf[XML_ENCODE_MAX];
4976 int i;
4977 int n = XmlCharRefNumber(enc, ptr);
4978 if (n < 0) {
4979 if (enc == encoding)
4980 eventPtr = ptr;
4981 return XML_ERROR_BAD_CHAR_REF;
4982 }
4983 if (!isCdata
4984 && n == 0x20 /* space */
4985 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
4986 break;
4987 n = XmlEncode(n, (ICHAR *)buf);
4988 if (!n) {
4989 if (enc == encoding)
4990 eventPtr = ptr;
4991 return XML_ERROR_BAD_CHAR_REF;
4992 }
4993 for (i = 0; i < n; i++) {
4994 if (!poolAppendChar(pool, buf[i]))
4995 return XML_ERROR_NO_MEMORY;
4996 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004997 }
4998 break;
4999 case XML_TOK_DATA_CHARS:
5000 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005001 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005002 break;
5003 case XML_TOK_TRAILING_CR:
5004 next = ptr + enc->minBytesPerChar;
5005 /* fall through */
5006 case XML_TOK_ATTRIBUTE_VALUE_S:
5007 case XML_TOK_DATA_NEWLINE:
5008 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005009 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005010 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005011 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005012 break;
5013 case XML_TOK_ENTITY_REF:
5014 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005015 const XML_Char *name;
5016 ENTITY *entity;
5017 char checkEntityDecl;
5018 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5019 ptr + enc->minBytesPerChar,
5020 next - enc->minBytesPerChar);
5021 if (ch) {
5022 if (!poolAppendChar(pool, ch))
5023 return XML_ERROR_NO_MEMORY;
5024 break;
5025 }
5026 name = poolStoreString(&temp2Pool, enc,
5027 ptr + enc->minBytesPerChar,
5028 next - enc->minBytesPerChar);
5029 if (!name)
5030 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005031 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005032 poolDiscard(&temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005033 /* First, determine if a check for an existing declaration is needed;
5034 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005035 */
5036 if (pool == &dtd->pool) /* are we called from prolog? */
5037 checkEntityDecl =
5038#ifdef XML_DTD
5039 prologState.documentEntity &&
5040#endif /* XML_DTD */
5041 (dtd->standalone
5042 ? !openInternalEntities
5043 : !dtd->hasParamEntityRefs);
5044 else /* if (pool == &tempPool): we are called from content */
5045 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5046 if (checkEntityDecl) {
5047 if (!entity)
5048 return XML_ERROR_UNDEFINED_ENTITY;
5049 else if (!entity->is_internal)
5050 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5051 }
5052 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005053 /* Cannot report skipped entity here - see comments on
5054 skippedEntityHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005055 if (skippedEntityHandler)
5056 skippedEntityHandler(handlerArg, name, 0);
5057 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005058 /* Cannot call the default handler because this would be
5059 out of sync with the call to the startElementHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005060 if ((pool == &tempPool) && defaultHandler)
5061 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005062 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005063 break;
5064 }
5065 if (entity->open) {
5066 if (enc == encoding)
5067 eventPtr = ptr;
5068 return XML_ERROR_RECURSIVE_ENTITY_REF;
5069 }
5070 if (entity->notation) {
5071 if (enc == encoding)
5072 eventPtr = ptr;
5073 return XML_ERROR_BINARY_ENTITY_REF;
5074 }
5075 if (!entity->textPtr) {
5076 if (enc == encoding)
5077 eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005078 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005079 }
5080 else {
5081 enum XML_Error result;
5082 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5083 entity->open = XML_TRUE;
5084 result = appendAttributeValue(parser, internalEncoding, isCdata,
5085 (char *)entity->textPtr,
5086 (char *)textEnd, pool);
5087 entity->open = XML_FALSE;
5088 if (result)
5089 return result;
5090 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005091 }
5092 break;
5093 default:
5094 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005095 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005096 return XML_ERROR_UNEXPECTED_STATE;
5097 }
5098 ptr = next;
5099 }
5100 /* not reached */
5101}
5102
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005103static enum XML_Error
5104storeEntityValue(XML_Parser parser,
5105 const ENCODING *enc,
5106 const char *entityTextPtr,
5107 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005108{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005109 DTD * const dtd = _dtd; /* save one level of indirection */
5110 STRING_POOL *pool = &(dtd->entityValuePool);
5111 enum XML_Error result = XML_ERROR_NONE;
5112#ifdef XML_DTD
5113 int oldInEntityValue = prologState.inEntityValue;
5114 prologState.inEntityValue = 1;
5115#endif /* XML_DTD */
5116 /* never return Null for the value argument in EntityDeclHandler,
5117 since this would indicate an external entity; therefore we
5118 have to make sure that entityValuePool.start is not null */
5119 if (!pool->blocks) {
5120 if (!poolGrow(pool))
5121 return XML_ERROR_NO_MEMORY;
5122 }
5123
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005124 for (;;) {
5125 const char *next;
5126 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5127 switch (tok) {
5128 case XML_TOK_PARAM_ENTITY_REF:
5129#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005130 if (isParamEntity || enc != encoding) {
5131 const XML_Char *name;
5132 ENTITY *entity;
5133 name = poolStoreString(&tempPool, enc,
5134 entityTextPtr + enc->minBytesPerChar,
5135 next - enc->minBytesPerChar);
5136 if (!name) {
5137 result = XML_ERROR_NO_MEMORY;
5138 goto endEntityValue;
5139 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005140 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005141 poolDiscard(&tempPool);
5142 if (!entity) {
5143 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5144 /* cannot report skipped entity here - see comments on
5145 skippedEntityHandler
5146 if (skippedEntityHandler)
5147 skippedEntityHandler(handlerArg, name, 0);
5148 */
5149 dtd->keepProcessing = dtd->standalone;
5150 goto endEntityValue;
5151 }
5152 if (entity->open) {
5153 if (enc == encoding)
5154 eventPtr = entityTextPtr;
5155 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5156 goto endEntityValue;
5157 }
5158 if (entity->systemId) {
5159 if (externalEntityRefHandler) {
5160 dtd->paramEntityRead = XML_FALSE;
5161 entity->open = XML_TRUE;
5162 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5163 0,
5164 entity->base,
5165 entity->systemId,
5166 entity->publicId)) {
5167 entity->open = XML_FALSE;
5168 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5169 goto endEntityValue;
5170 }
5171 entity->open = XML_FALSE;
5172 if (!dtd->paramEntityRead)
5173 dtd->keepProcessing = dtd->standalone;
5174 }
5175 else
5176 dtd->keepProcessing = dtd->standalone;
5177 }
5178 else {
5179 entity->open = XML_TRUE;
5180 result = storeEntityValue(parser,
5181 internalEncoding,
5182 (char *)entity->textPtr,
5183 (char *)(entity->textPtr
5184 + entity->textLen));
5185 entity->open = XML_FALSE;
5186 if (result)
5187 goto endEntityValue;
5188 }
5189 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005190 }
5191#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005192 /* In the internal subset, PE references are not legal
5193 within markup declarations, e.g entity values in this case. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005194 eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005195 result = XML_ERROR_PARAM_ENTITY_REF;
5196 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005197 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005198 result = XML_ERROR_NONE;
5199 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005200 case XML_TOK_ENTITY_REF:
5201 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005202 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5203 result = XML_ERROR_NO_MEMORY;
5204 goto endEntityValue;
5205 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005206 break;
5207 case XML_TOK_TRAILING_CR:
5208 next = entityTextPtr + enc->minBytesPerChar;
5209 /* fall through */
5210 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005211 if (pool->end == pool->ptr && !poolGrow(pool)) {
5212 result = XML_ERROR_NO_MEMORY;
5213 goto endEntityValue;
5214 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005215 *(pool->ptr)++ = 0xA;
5216 break;
5217 case XML_TOK_CHAR_REF:
5218 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005219 XML_Char buf[XML_ENCODE_MAX];
5220 int i;
5221 int n = XmlCharRefNumber(enc, entityTextPtr);
5222 if (n < 0) {
5223 if (enc == encoding)
5224 eventPtr = entityTextPtr;
5225 result = XML_ERROR_BAD_CHAR_REF;
5226 goto endEntityValue;
5227 }
5228 n = XmlEncode(n, (ICHAR *)buf);
5229 if (!n) {
5230 if (enc == encoding)
5231 eventPtr = entityTextPtr;
5232 result = XML_ERROR_BAD_CHAR_REF;
5233 goto endEntityValue;
5234 }
5235 for (i = 0; i < n; i++) {
5236 if (pool->end == pool->ptr && !poolGrow(pool)) {
5237 result = XML_ERROR_NO_MEMORY;
5238 goto endEntityValue;
5239 }
5240 *(pool->ptr)++ = buf[i];
5241 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005242 }
5243 break;
5244 case XML_TOK_PARTIAL:
5245 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005246 eventPtr = entityTextPtr;
5247 result = XML_ERROR_INVALID_TOKEN;
5248 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005249 case XML_TOK_INVALID:
5250 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005251 eventPtr = next;
5252 result = XML_ERROR_INVALID_TOKEN;
5253 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005254 default:
5255 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005256 eventPtr = entityTextPtr;
5257 result = XML_ERROR_UNEXPECTED_STATE;
5258 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005259 }
5260 entityTextPtr = next;
5261 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005262endEntityValue:
5263#ifdef XML_DTD
5264 prologState.inEntityValue = oldInEntityValue;
5265#endif /* XML_DTD */
5266 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005267}
5268
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005269static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005270normalizeLines(XML_Char *s)
5271{
5272 XML_Char *p;
5273 for (;; s++) {
5274 if (*s == XML_T('\0'))
5275 return;
5276 if (*s == 0xD)
5277 break;
5278 }
5279 p = s;
5280 do {
5281 if (*s == 0xD) {
5282 *p++ = 0xA;
5283 if (*++s == 0xA)
5284 s++;
5285 }
5286 else
5287 *p++ = *s++;
5288 } while (*s);
5289 *p = XML_T('\0');
5290}
5291
5292static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005293reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5294 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005295{
5296 const XML_Char *target;
5297 XML_Char *data;
5298 const char *tem;
5299 if (!processingInstructionHandler) {
5300 if (defaultHandler)
5301 reportDefault(parser, enc, start, end);
5302 return 1;
5303 }
5304 start += enc->minBytesPerChar * 2;
5305 tem = start + XmlNameLength(enc, start);
5306 target = poolStoreString(&tempPool, enc, start, tem);
5307 if (!target)
5308 return 0;
5309 poolFinish(&tempPool);
5310 data = poolStoreString(&tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005311 XmlSkipS(enc, tem),
5312 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005313 if (!data)
5314 return 0;
5315 normalizeLines(data);
5316 processingInstructionHandler(handlerArg, target, data);
5317 poolClear(&tempPool);
5318 return 1;
5319}
5320
5321static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005322reportComment(XML_Parser parser, const ENCODING *enc,
5323 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005324{
5325 XML_Char *data;
5326 if (!commentHandler) {
5327 if (defaultHandler)
5328 reportDefault(parser, enc, start, end);
5329 return 1;
5330 }
5331 data = poolStoreString(&tempPool,
5332 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005333 start + enc->minBytesPerChar * 4,
5334 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005335 if (!data)
5336 return 0;
5337 normalizeLines(data);
5338 commentHandler(handlerArg, data);
5339 poolClear(&tempPool);
5340 return 1;
5341}
5342
5343static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005344reportDefault(XML_Parser parser, const ENCODING *enc,
5345 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005346{
5347 if (MUST_CONVERT(enc, s)) {
5348 const char **eventPP;
5349 const char **eventEndPP;
5350 if (enc == encoding) {
5351 eventPP = &eventPtr;
5352 eventEndPP = &eventEndPtr;
5353 }
5354 else {
5355 eventPP = &(openInternalEntities->internalEventPtr);
5356 eventEndPP = &(openInternalEntities->internalEventEndPtr);
5357 }
5358 do {
5359 ICHAR *dataPtr = (ICHAR *)dataBuf;
5360 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
5361 *eventEndPP = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005362 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005363 *eventPP = s;
5364 } while (s != end);
5365 }
5366 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005367 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005368}
5369
5370
5371static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005372defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5373 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005374{
5375 DEFAULT_ATTRIBUTE *att;
5376 if (value || isId) {
5377 /* The handling of default attributes gets messed up if we have
5378 a default which duplicates a non-default. */
5379 int i;
5380 for (i = 0; i < type->nDefaultAtts; i++)
5381 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005382 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005383 if (isId && !type->idAtt && !attId->xmlns)
5384 type->idAtt = attId;
5385 }
5386 if (type->nDefaultAtts == type->allocDefaultAtts) {
5387 if (type->allocDefaultAtts == 0) {
5388 type->allocDefaultAtts = 8;
Fred Drake08317ae2003-10-21 15:38:55 +00005389 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005390 * sizeof(DEFAULT_ATTRIBUTE));
5391 if (!type->defaultAtts)
5392 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005393 }
5394 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005395 DEFAULT_ATTRIBUTE *temp;
5396 int count = type->allocDefaultAtts * 2;
5397 temp = (DEFAULT_ATTRIBUTE *)
5398 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
5399 if (temp == NULL)
5400 return 0;
5401 type->allocDefaultAtts = count;
5402 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005403 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005404 }
5405 att = type->defaultAtts + type->nDefaultAtts;
5406 att->id = attId;
5407 att->value = value;
5408 att->isCdata = isCdata;
5409 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005410 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005411 type->nDefaultAtts += 1;
5412 return 1;
5413}
5414
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005415static int
5416setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005417{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005418 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005419 const XML_Char *name;
5420 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005421 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005422 PREFIX *prefix;
5423 const XML_Char *s;
5424 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005425 if (!poolAppendChar(&dtd->pool, *s))
5426 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005427 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005428 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5429 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005430 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005431 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005432 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005433 return 0;
5434 if (prefix->name == poolStart(&dtd->pool))
5435 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005436 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005437 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005438 elementType->prefix = prefix;
5439
5440 }
5441 }
5442 return 1;
5443}
5444
5445static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005446getAttributeId(XML_Parser parser, const ENCODING *enc,
5447 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005448{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005449 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005450 ATTRIBUTE_ID *id;
5451 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005452 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5453 return NULL;
5454 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005455 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005456 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00005457 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005458 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005459 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005460 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005461 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005462 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005463 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005464 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005465 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005466 if (!ns)
5467 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005468 else if (name[0] == XML_T(ASCII_x)
5469 && name[1] == XML_T(ASCII_m)
5470 && name[2] == XML_T(ASCII_l)
5471 && name[3] == XML_T(ASCII_n)
5472 && name[4] == XML_T(ASCII_s)
5473 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005474 if (name[5] == XML_T('\0'))
5475 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005476 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005477 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005478 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005479 }
5480 else {
5481 int i;
5482 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00005483 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005484 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005485 int j;
5486 for (j = 0; j < i; j++) {
5487 if (!poolAppendChar(&dtd->pool, name[j]))
5488 return NULL;
5489 }
5490 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5491 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005492 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005493 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07005494 if (!id->prefix)
5495 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005496 if (id->prefix->name == poolStart(&dtd->pool))
5497 poolFinish(&dtd->pool);
5498 else
5499 poolDiscard(&dtd->pool);
5500 break;
5501 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005502 }
5503 }
5504 }
5505 return id;
5506}
5507
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005508#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005509
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005510static const XML_Char *
5511getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005512{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005513 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005514 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005515 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005516
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005517 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005518 int i;
5519 int len;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005520 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005521 return NULL;
5522 len = dtd->defaultPrefix.binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005523 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005524 len--;
5525 for (i = 0; i < len; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005526 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
5527 return NULL;
5528 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005529 }
5530
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005531 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005532 for (;;) {
5533 int i;
5534 int len;
5535 const XML_Char *s;
5536 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
5537 if (!prefix)
5538 break;
5539 if (!prefix->binding)
5540 continue;
5541 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005542 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005543 for (s = prefix->name; *s; s++)
5544 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005545 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005546 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005547 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005548 len = prefix->binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005549 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005550 len--;
5551 for (i = 0; i < len; i++)
5552 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005553 return NULL;
5554 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005555 }
5556
5557
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005558 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005559 for (;;) {
5560 const XML_Char *s;
5561 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
5562 if (!e)
5563 break;
5564 if (!e->open)
5565 continue;
5566 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005567 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005568 for (s = e->name; *s; s++)
5569 if (!poolAppendChar(&tempPool, *s))
5570 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005571 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005572 }
5573
5574 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005575 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005576 return tempPool.start;
5577}
5578
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005579static XML_Bool
5580setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005581{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005582 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005583 const XML_Char *s = context;
5584
5585 while (*context != XML_T('\0')) {
5586 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5587 ENTITY *e;
5588 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005589 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005590 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005591 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005592 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005593 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005594 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005595 context = s;
5596 poolDiscard(&tempPool);
5597 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005598 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005599 PREFIX *prefix;
5600 if (poolLength(&tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005601 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005602 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005603 if (!poolAppendChar(&tempPool, XML_T('\0')))
5604 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005605 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005606 sizeof(PREFIX));
5607 if (!prefix)
5608 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005609 if (prefix->name == poolStart(&tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005610 prefix->name = poolCopyString(&dtd->pool, prefix->name);
5611 if (!prefix->name)
5612 return XML_FALSE;
5613 }
5614 poolDiscard(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005615 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005616 for (context = s + 1;
5617 *context != CONTEXT_SEP && *context != XML_T('\0');
5618 context++)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005619 if (!poolAppendChar(&tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005620 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005621 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005622 return XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00005623 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005624 &inheritedBindings) != XML_ERROR_NONE)
5625 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005626 poolDiscard(&tempPool);
5627 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005628 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005629 s = context;
5630 }
5631 else {
5632 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005633 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005634 s++;
5635 }
5636 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005637 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005638}
5639
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005640static void FASTCALL
5641normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005642{
5643 XML_Char *p = publicId;
5644 XML_Char *s;
5645 for (s = publicId; *s; s++) {
5646 switch (*s) {
5647 case 0x20:
5648 case 0xD:
5649 case 0xA:
5650 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005651 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005652 break;
5653 default:
5654 *p++ = *s;
5655 }
5656 }
5657 if (p != publicId && p[-1] == 0x20)
5658 --p;
5659 *p = XML_T('\0');
5660}
5661
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005662static DTD *
5663dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005664{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005665 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
5666 if (p == NULL)
5667 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005668 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005669 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005670 hashTableInit(&(p->generalEntities), ms);
5671 hashTableInit(&(p->elementTypes), ms);
5672 hashTableInit(&(p->attributeIds), ms);
5673 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005674#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005675 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005676 hashTableInit(&(p->paramEntities), ms);
5677#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005678 p->defaultPrefix.name = NULL;
5679 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005680
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005681 p->in_eldecl = XML_FALSE;
5682 p->scaffIndex = NULL;
5683 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005684 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005685 p->scaffSize = 0;
5686 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005687 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005688
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005689 p->keepProcessing = XML_TRUE;
5690 p->hasParamEntityRefs = XML_FALSE;
5691 p->standalone = XML_FALSE;
5692 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005693}
5694
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005695static void
5696dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005697{
5698 HASH_TABLE_ITER iter;
5699 hashTableIterInit(&iter, &(p->elementTypes));
5700 for (;;) {
5701 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5702 if (!e)
5703 break;
5704 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005705 ms->free_fcn(e->defaultAtts);
5706 }
5707 hashTableClear(&(p->generalEntities));
5708#ifdef XML_DTD
5709 p->paramEntityRead = XML_FALSE;
5710 hashTableClear(&(p->paramEntities));
5711#endif /* XML_DTD */
5712 hashTableClear(&(p->elementTypes));
5713 hashTableClear(&(p->attributeIds));
5714 hashTableClear(&(p->prefixes));
5715 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005716 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005717 p->defaultPrefix.name = NULL;
5718 p->defaultPrefix.binding = NULL;
5719
5720 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00005721
5722 ms->free_fcn(p->scaffIndex);
5723 p->scaffIndex = NULL;
5724 ms->free_fcn(p->scaffold);
5725 p->scaffold = NULL;
5726
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005727 p->scaffLevel = 0;
5728 p->scaffSize = 0;
5729 p->scaffCount = 0;
5730 p->contentStringLen = 0;
5731
5732 p->keepProcessing = XML_TRUE;
5733 p->hasParamEntityRefs = XML_FALSE;
5734 p->standalone = XML_FALSE;
5735}
5736
5737static void
5738dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
5739{
5740 HASH_TABLE_ITER iter;
5741 hashTableIterInit(&iter, &(p->elementTypes));
5742 for (;;) {
5743 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5744 if (!e)
5745 break;
5746 if (e->allocDefaultAtts != 0)
5747 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005748 }
5749 hashTableDestroy(&(p->generalEntities));
5750#ifdef XML_DTD
5751 hashTableDestroy(&(p->paramEntities));
5752#endif /* XML_DTD */
5753 hashTableDestroy(&(p->elementTypes));
5754 hashTableDestroy(&(p->attributeIds));
5755 hashTableDestroy(&(p->prefixes));
5756 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005757 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005758 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00005759 ms->free_fcn(p->scaffIndex);
5760 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005761 }
5762 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005763}
5764
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005765/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
5766 The new DTD has already been initialized.
5767*/
5768static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005769dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005770{
5771 HASH_TABLE_ITER iter;
5772
5773 /* Copy the prefix table. */
5774
5775 hashTableIterInit(&iter, &(oldDtd->prefixes));
5776 for (;;) {
5777 const XML_Char *name;
5778 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
5779 if (!oldP)
5780 break;
5781 name = poolCopyString(&(newDtd->pool), oldP->name);
5782 if (!name)
5783 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005784 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005785 return 0;
5786 }
5787
5788 hashTableIterInit(&iter, &(oldDtd->attributeIds));
5789
5790 /* Copy the attribute id table. */
5791
5792 for (;;) {
5793 ATTRIBUTE_ID *newA;
5794 const XML_Char *name;
5795 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
5796
5797 if (!oldA)
5798 break;
5799 /* Remember to allocate the scratch byte before the name. */
5800 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
5801 return 0;
5802 name = poolCopyString(&(newDtd->pool), oldA->name);
5803 if (!name)
5804 return 0;
5805 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005806 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005807 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005808 if (!newA)
5809 return 0;
5810 newA->maybeTokenized = oldA->maybeTokenized;
5811 if (oldA->prefix) {
5812 newA->xmlns = oldA->xmlns;
5813 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005814 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005815 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005816 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005817 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005818 }
5819 }
5820
5821 /* Copy the element type table. */
5822
5823 hashTableIterInit(&iter, &(oldDtd->elementTypes));
5824
5825 for (;;) {
5826 int i;
5827 ELEMENT_TYPE *newE;
5828 const XML_Char *name;
5829 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5830 if (!oldE)
5831 break;
5832 name = poolCopyString(&(newDtd->pool), oldE->name);
5833 if (!name)
5834 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005835 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005836 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005837 if (!newE)
5838 return 0;
5839 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005840 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
5841 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
5842 if (!newE->defaultAtts) {
5843 ms->free_fcn(newE);
5844 return 0;
5845 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005846 }
5847 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005848 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005849 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005850 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
5851 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005852 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005853 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005854 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005855 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005856 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005857 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
5858 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005859 newE->defaultAtts[i].value
5860 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
5861 if (!newE->defaultAtts[i].value)
5862 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005863 }
5864 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005865 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005866 }
5867 }
5868
5869 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005870 if (!copyEntityTable(oldParser,
5871 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005872 &(newDtd->pool),
5873 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005874 return 0;
5875
5876#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005877 if (!copyEntityTable(oldParser,
5878 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005879 &(newDtd->pool),
5880 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005881 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005882 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005883#endif /* XML_DTD */
5884
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005885 newDtd->keepProcessing = oldDtd->keepProcessing;
5886 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005887 newDtd->standalone = oldDtd->standalone;
5888
5889 /* Don't want deep copying for scaffolding */
5890 newDtd->in_eldecl = oldDtd->in_eldecl;
5891 newDtd->scaffold = oldDtd->scaffold;
5892 newDtd->contentStringLen = oldDtd->contentStringLen;
5893 newDtd->scaffSize = oldDtd->scaffSize;
5894 newDtd->scaffLevel = oldDtd->scaffLevel;
5895 newDtd->scaffIndex = oldDtd->scaffIndex;
5896
5897 return 1;
5898} /* End dtdCopy */
5899
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005900static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005901copyEntityTable(XML_Parser oldParser,
5902 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005903 STRING_POOL *newPool,
5904 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005905{
5906 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005907 const XML_Char *cachedOldBase = NULL;
5908 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005909
5910 hashTableIterInit(&iter, oldTable);
5911
5912 for (;;) {
5913 ENTITY *newE;
5914 const XML_Char *name;
5915 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
5916 if (!oldE)
5917 break;
5918 name = poolCopyString(newPool, oldE->name);
5919 if (!name)
5920 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005921 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005922 if (!newE)
5923 return 0;
5924 if (oldE->systemId) {
5925 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
5926 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005927 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005928 newE->systemId = tem;
5929 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005930 if (oldE->base == cachedOldBase)
5931 newE->base = cachedNewBase;
5932 else {
5933 cachedOldBase = oldE->base;
5934 tem = poolCopyString(newPool, cachedOldBase);
5935 if (!tem)
5936 return 0;
5937 cachedNewBase = newE->base = tem;
5938 }
5939 }
5940 if (oldE->publicId) {
5941 tem = poolCopyString(newPool, oldE->publicId);
5942 if (!tem)
5943 return 0;
5944 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005945 }
5946 }
5947 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005948 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
5949 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005950 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005951 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005952 newE->textPtr = tem;
5953 newE->textLen = oldE->textLen;
5954 }
5955 if (oldE->notation) {
5956 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
5957 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005958 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005959 newE->notation = tem;
5960 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005961 newE->is_param = oldE->is_param;
5962 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005963 }
5964 return 1;
5965}
5966
Fred Drake08317ae2003-10-21 15:38:55 +00005967#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005968
Fred Drake08317ae2003-10-21 15:38:55 +00005969static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005970keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005971{
5972 for (; *s1 == *s2; s1++, s2++)
5973 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00005974 return XML_TRUE;
5975 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005976}
5977
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005978static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005979hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005980{
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005981 unsigned long h = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005982 while (*s)
Fred Drake08317ae2003-10-21 15:38:55 +00005983 h = CHAR_HASH(h, *s++);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005984 return h;
5985}
5986
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005987static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005988lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005989{
5990 size_t i;
5991 if (table->size == 0) {
5992 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005993 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005994 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00005995 table->power = INIT_POWER;
5996 /* table->size is a power of 2 */
5997 table->size = (size_t)1 << INIT_POWER;
5998 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005999 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006000 if (!table->v) {
6001 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006002 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006003 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006004 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006005 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006006 }
6007 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006008 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006009 unsigned long mask = (unsigned long)table->size - 1;
6010 unsigned char step = 0;
6011 i = h & mask;
6012 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006013 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006014 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006015 if (!step)
6016 step = PROBE_STEP(h, mask, table->power);
6017 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006018 }
6019 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006020 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006021
6022 /* check for overflow (table is half full) */
6023 if (table->used >> (table->power - 1)) {
6024 unsigned char newPower = table->power + 1;
6025 size_t newSize = (size_t)1 << newPower;
6026 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006027 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006028 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006029 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006030 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006031 memset(newV, 0, tsize);
6032 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006033 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006034 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006035 size_t j = newHash & newMask;
6036 step = 0;
6037 while (newV[j]) {
6038 if (!step)
6039 step = PROBE_STEP(newHash, newMask, newPower);
6040 j < step ? (j += newSize - step) : (j -= step);
6041 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006042 newV[j] = table->v[i];
6043 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006044 table->mem->free_fcn(table->v);
6045 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006046 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006047 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006048 i = h & newMask;
6049 step = 0;
6050 while (table->v[i]) {
6051 if (!step)
6052 step = PROBE_STEP(h, newMask, newPower);
6053 i < step ? (i += newSize - step) : (i -= step);
6054 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006055 }
6056 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006057 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006058 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006059 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006060 memset(table->v[i], 0, createSize);
6061 table->v[i]->name = name;
6062 (table->used)++;
6063 return table->v[i];
6064}
6065
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006066static void FASTCALL
6067hashTableClear(HASH_TABLE *table)
6068{
6069 size_t i;
6070 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006071 table->mem->free_fcn(table->v[i]);
6072 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006073 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006074 table->used = 0;
6075}
6076
6077static void FASTCALL
6078hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006079{
6080 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006081 for (i = 0; i < table->size; i++)
6082 table->mem->free_fcn(table->v[i]);
6083 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006084}
6085
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006086static void FASTCALL
6087hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006088{
Fred Drake08317ae2003-10-21 15:38:55 +00006089 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006090 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006091 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006092 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006093 p->mem = ms;
6094}
6095
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006096static void FASTCALL
6097hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006098{
6099 iter->p = table->v;
6100 iter->end = iter->p + table->size;
6101}
6102
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006103static NAMED * FASTCALL
6104hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006105{
6106 while (iter->p != iter->end) {
6107 NAMED *tem = *(iter->p)++;
6108 if (tem)
6109 return tem;
6110 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006111 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006112}
6113
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006114static void FASTCALL
6115poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006116{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006117 pool->blocks = NULL;
6118 pool->freeBlocks = NULL;
6119 pool->start = NULL;
6120 pool->ptr = NULL;
6121 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006122 pool->mem = ms;
6123}
6124
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006125static void FASTCALL
6126poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006127{
6128 if (!pool->freeBlocks)
6129 pool->freeBlocks = pool->blocks;
6130 else {
6131 BLOCK *p = pool->blocks;
6132 while (p) {
6133 BLOCK *tem = p->next;
6134 p->next = pool->freeBlocks;
6135 pool->freeBlocks = p;
6136 p = tem;
6137 }
6138 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006139 pool->blocks = NULL;
6140 pool->start = NULL;
6141 pool->ptr = NULL;
6142 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006143}
6144
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006145static void FASTCALL
6146poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006147{
6148 BLOCK *p = pool->blocks;
6149 while (p) {
6150 BLOCK *tem = p->next;
6151 pool->mem->free_fcn(p);
6152 p = tem;
6153 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006154 p = pool->freeBlocks;
6155 while (p) {
6156 BLOCK *tem = p->next;
6157 pool->mem->free_fcn(p);
6158 p = tem;
6159 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006160}
6161
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006162static XML_Char *
6163poolAppend(STRING_POOL *pool, const ENCODING *enc,
6164 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006165{
6166 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006167 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006168 for (;;) {
6169 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6170 if (ptr == end)
6171 break;
6172 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006173 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006174 }
6175 return pool->start;
6176}
6177
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006178static const XML_Char * FASTCALL
6179poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006180{
6181 do {
6182 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006183 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006184 } while (*s++);
6185 s = pool->start;
6186 poolFinish(pool);
6187 return s;
6188}
6189
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006190static const XML_Char *
6191poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006192{
6193 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006194 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006195 for (; n > 0; --n, s++) {
6196 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006197 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006198 }
6199 s = pool->start;
6200 poolFinish(pool);
6201 return s;
6202}
6203
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006204static const XML_Char * FASTCALL
6205poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006206{
6207 while (*s) {
6208 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006209 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006210 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006211 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006212 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006213}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006214
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006215static XML_Char *
6216poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6217 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006218{
6219 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006220 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006221 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006222 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006223 *(pool->ptr)++ = 0;
6224 return pool->start;
6225}
6226
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006227static XML_Bool FASTCALL
6228poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006229{
6230 if (pool->freeBlocks) {
6231 if (pool->start == 0) {
6232 pool->blocks = pool->freeBlocks;
6233 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006234 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006235 pool->start = pool->blocks->s;
6236 pool->end = pool->start + pool->blocks->size;
6237 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006238 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006239 }
6240 if (pool->end - pool->start < pool->freeBlocks->size) {
6241 BLOCK *tem = pool->freeBlocks->next;
6242 pool->freeBlocks->next = pool->blocks;
6243 pool->blocks = pool->freeBlocks;
6244 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006245 memcpy(pool->blocks->s, pool->start,
6246 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006247 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6248 pool->start = pool->blocks->s;
6249 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006250 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006251 }
6252 }
6253 if (pool->blocks && pool->start == pool->blocks->s) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006254 int blockSize = (int)(pool->end - pool->start)*2;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006255 BLOCK *temp = (BLOCK *)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006256 pool->mem->realloc_fcn(pool->blocks,
Fred Drake08317ae2003-10-21 15:38:55 +00006257 (offsetof(BLOCK, s)
6258 + blockSize * sizeof(XML_Char)));
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006259 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006260 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006261 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006262 pool->blocks->size = blockSize;
6263 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6264 pool->start = pool->blocks->s;
6265 pool->end = pool->start + blockSize;
6266 }
6267 else {
6268 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006269 int blockSize = (int)(pool->end - pool->start);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006270 if (blockSize < INIT_BLOCK_SIZE)
6271 blockSize = INIT_BLOCK_SIZE;
6272 else
6273 blockSize *= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006274 tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s)
Fred Drake08317ae2003-10-21 15:38:55 +00006275 + blockSize * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006276 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006277 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006278 tem->size = blockSize;
6279 tem->next = pool->blocks;
6280 pool->blocks = tem;
6281 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006282 memcpy(tem->s, pool->start,
6283 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006284 pool->ptr = tem->s + (pool->ptr - pool->start);
6285 pool->start = tem->s;
6286 pool->end = tem->s + blockSize;
6287 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006288 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006289}
6290
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006291static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006292nextScaffoldPart(XML_Parser parser)
6293{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006294 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006295 CONTENT_SCAFFOLD * me;
6296 int next;
6297
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006298 if (!dtd->scaffIndex) {
6299 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
6300 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006302 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006303 }
6304
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006305 if (dtd->scaffCount >= dtd->scaffSize) {
6306 CONTENT_SCAFFOLD *temp;
6307 if (dtd->scaffold) {
6308 temp = (CONTENT_SCAFFOLD *)
6309 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
6310 if (temp == NULL)
6311 return -1;
6312 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006313 }
6314 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006315 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
6316 * sizeof(CONTENT_SCAFFOLD));
6317 if (temp == NULL)
6318 return -1;
6319 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006320 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006321 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006322 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006323 next = dtd->scaffCount++;
6324 me = &dtd->scaffold[next];
6325 if (dtd->scaffLevel) {
6326 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006327 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006328 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006329 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006330 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006331 parent->firstchild = next;
6332 parent->lastchild = next;
6333 parent->childcnt++;
6334 }
6335 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6336 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006337}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006338
6339static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006340build_node(XML_Parser parser,
6341 int src_node,
6342 XML_Content *dest,
6343 XML_Content **contpos,
6344 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006345{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006346 DTD * const dtd = _dtd; /* save one level of indirection */
6347 dest->type = dtd->scaffold[src_node].type;
6348 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006349 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006350 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006351 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006352 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006353 for (;;) {
6354 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006355 if (!*src)
6356 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006357 src++;
6358 }
6359 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006360 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006361 }
6362 else {
6363 unsigned int i;
6364 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006365 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006366 dest->children = *contpos;
6367 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006368 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
6369 i < dest->numchildren;
6370 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006371 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6372 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006373 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006374 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006375}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006376
6377static XML_Content *
6378build_model (XML_Parser parser)
6379{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006380 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006381 XML_Content *ret;
6382 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006383 XML_Char * str;
6384 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6385 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006386
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006387 ret = (XML_Content *)MALLOC(allocsize);
6388 if (!ret)
6389 return NULL;
6390
6391 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006392 cpos = &ret[1];
6393
6394 build_node(parser, 0, ret, &cpos, &str);
6395 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006396}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006397
6398static ELEMENT_TYPE *
6399getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006400 const ENCODING *enc,
6401 const char *ptr,
6402 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006403{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006404 DTD * const dtd = _dtd; /* save one level of indirection */
6405 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006406 ELEMENT_TYPE *ret;
6407
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006408 if (!name)
6409 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006410 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006411 if (!ret)
6412 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006413 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006414 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006415 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006416 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006417 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006418 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006419 }
6420 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006421}