blob: b308e67e2d390d244df194ac0eaca949b91c0462 [file] [log] [blame]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003*/
4
Victor Stinner23ec4b52017-06-15 00:54:36 +02005#include <stddef.h>
6#include <string.h> /* memset(), memcpy() */
7#include <assert.h>
8#include <limits.h> /* UINT_MAX */
9
10#ifdef WIN32
11#define getpid GetCurrentProcessId
12#else
13#include <sys/time.h> /* gettimeofday() */
14#include <sys/types.h> /* getpid() */
15#include <unistd.h> /* getpid() */
16#endif
17
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070018#define XML_BUILDING_EXPAT 1
19
Victor Stinner23ec4b52017-06-15 00:54:36 +020020#ifdef WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070021#include "winconfig.h"
22#elif defined(MACOS_CLASSIC)
23#include "macconfig.h"
24#elif defined(__amigaos__)
25#include "amigaconfig.h"
26#elif defined(__WATCOMC__)
27#include "watcomconfig.h"
28#elif defined(HAVE_EXPAT_CONFIG_H)
29#include <expat_config.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020030#endif /* ndef WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010031
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070032#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000033#include "expat.h"
34
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000035#ifdef XML_UNICODE
36#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
37#define XmlConvert XmlUtf16Convert
38#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
39#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
40#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070041/* Using pointer subtraction to convert to integer type. */
42#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000043typedef unsigned short ICHAR;
44#else
45#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
46#define XmlConvert XmlUtf8Convert
47#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
48#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
49#define XmlEncode XmlUtf8Encode
50#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
51typedef char ICHAR;
52#endif
53
54
55#ifndef XML_NS
56
57#define XmlInitEncodingNS XmlInitEncoding
58#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
59#undef XmlGetInternalEncodingNS
60#define XmlGetInternalEncodingNS XmlGetInternalEncoding
61#define XmlParseXmlDeclNS XmlParseXmlDecl
62
63#endif
64
Martin v. Löwisfc03a942003-01-25 22:41:29 +000065#ifdef XML_UNICODE
66
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000067#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +000068#define XML_T(x) (const wchar_t)x
69#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000070#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +000071#define XML_T(x) (const unsigned short)x
72#define XML_L(x) x
73#endif
74
75#else
76
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000077#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +000078#define XML_L(x) x
79
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000080#endif
81
82/* Round up n to be a multiple of sz, where sz is a power of 2. */
83#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
84
Fred Drake08317ae2003-10-21 15:38:55 +000085/* Handle the case where memmove() doesn't exist. */
86#ifndef HAVE_MEMMOVE
87#ifdef HAVE_BCOPY
88#define memmove(d,s,l) bcopy((s),(d),(l))
89#else
90#error memmove does not exist on this platform, nor is a substitute available
91#endif /* HAVE_BCOPY */
92#endif /* HAVE_MEMMOVE */
93
Martin v. Löwisfc03a942003-01-25 22:41:29 +000094#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000095#include "xmltok.h"
96#include "xmlrole.h"
97
98typedef const XML_Char *KEY;
99
100typedef struct {
101 KEY name;
102} NAMED;
103
104typedef struct {
105 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000106 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000107 size_t size;
108 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000109 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000110} HASH_TABLE;
111
Fred Drake08317ae2003-10-21 15:38:55 +0000112/* Basic character hash algorithm, taken from Python's string hash:
113 h = h * 1000003 ^ character, the constant being a prime number.
114
115*/
116#ifdef XML_UNICODE
117#define CHAR_HASH(h, c) \
118 (((h) * 0xF4243) ^ (unsigned short)(c))
119#else
120#define CHAR_HASH(h, c) \
121 (((h) * 0xF4243) ^ (unsigned char)(c))
122#endif
123
124/* For probing (after a collision) we need a step size relative prime
125 to the hash table size, which is a power of 2. We use double-hashing,
126 since we can calculate a second hash value cheaply by taking those bits
127 of the first hash value that were discarded (masked out) when the table
128 index was calculated: index = hash & mask, where mask = table->size - 1.
129 We limit the maximum step size to table->size / 4 (mask >> 2) and make
130 it odd, since odd numbers are always relative prime to a power of 2.
131*/
132#define SECOND_HASH(hash, mask, power) \
133 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
134#define PROBE_STEP(hash, mask, power) \
135 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
136
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000137typedef struct {
138 NAMED **p;
139 NAMED **end;
140} HASH_TABLE_ITER;
141
142#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
143#define INIT_DATA_BUF_SIZE 1024
144#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000145#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000146#define INIT_BLOCK_SIZE 1024
147#define INIT_BUFFER_SIZE 1024
148
149#define EXPAND_SPARE 24
150
151typedef struct binding {
152 struct prefix *prefix;
153 struct binding *nextTagBinding;
154 struct binding *prevPrefixBinding;
155 const struct attribute_id *attId;
156 XML_Char *uri;
157 int uriLen;
158 int uriAlloc;
159} BINDING;
160
161typedef struct prefix {
162 const XML_Char *name;
163 BINDING *binding;
164} PREFIX;
165
166typedef struct {
167 const XML_Char *str;
168 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000169 const XML_Char *prefix;
170 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000171 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000172 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000173} TAG_NAME;
174
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000175/* TAG represents an open element.
176 The name of the element is stored in both the document and API
177 encodings. The memory buffer 'buf' is a separately-allocated
178 memory area which stores the name. During the XML_Parse()/
179 XMLParseBuffer() when the element is open, the memory for the 'raw'
180 version of the name (in the document encoding) is shared with the
181 document buffer. If the element is open across calls to
182 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
183 contain the 'raw' name as well.
184
185 A parser re-uses these structures, maintaining a list of allocated
186 TAG objects in a free list.
187*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000188typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000189 struct tag *parent; /* parent of this element */
190 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000191 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000192 TAG_NAME name; /* tagName in the API encoding */
193 char *buf; /* buffer for name components */
194 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000195 BINDING *bindings;
196} TAG;
197
198typedef struct {
199 const XML_Char *name;
200 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000201 int textLen; /* length in XML_Chars */
202 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000203 const XML_Char *systemId;
204 const XML_Char *base;
205 const XML_Char *publicId;
206 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000207 XML_Bool open;
208 XML_Bool is_param;
209 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000210} ENTITY;
211
212typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000213 enum XML_Content_Type type;
214 enum XML_Content_Quant quant;
215 const XML_Char * name;
216 int firstchild;
217 int lastchild;
218 int childcnt;
219 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000220} CONTENT_SCAFFOLD;
221
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000222#define INIT_SCAFFOLD_ELEMENTS 32
223
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000224typedef struct block {
225 struct block *next;
226 int size;
227 XML_Char s[1];
228} BLOCK;
229
230typedef struct {
231 BLOCK *blocks;
232 BLOCK *freeBlocks;
233 const XML_Char *end;
234 XML_Char *ptr;
235 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000236 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000237} STRING_POOL;
238
239/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000240 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000241typedef struct attribute_id {
242 XML_Char *name;
243 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000244 XML_Bool maybeTokenized;
245 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000246} ATTRIBUTE_ID;
247
248typedef struct {
249 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000250 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000251 const XML_Char *value;
252} DEFAULT_ATTRIBUTE;
253
254typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000255 unsigned long version;
256 unsigned long hash;
257 const XML_Char *uriName;
258} NS_ATT;
259
260typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000261 const XML_Char *name;
262 PREFIX *prefix;
263 const ATTRIBUTE_ID *idAtt;
264 int nDefaultAtts;
265 int allocDefaultAtts;
266 DEFAULT_ATTRIBUTE *defaultAtts;
267} ELEMENT_TYPE;
268
269typedef struct {
270 HASH_TABLE generalEntities;
271 HASH_TABLE elementTypes;
272 HASH_TABLE attributeIds;
273 HASH_TABLE prefixes;
274 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000275 STRING_POOL entityValuePool;
276 /* false once a parameter entity reference has been skipped */
277 XML_Bool keepProcessing;
278 /* true once an internal or external PE reference has been encountered;
279 this includes the reference to an external subset */
280 XML_Bool hasParamEntityRefs;
281 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000282#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000283 /* indicates if external PE has been read */
284 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000285 HASH_TABLE paramEntities;
286#endif /* XML_DTD */
287 PREFIX defaultPrefix;
288 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000289 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000290 CONTENT_SCAFFOLD *scaffold;
291 unsigned contentStringLen;
292 unsigned scaffSize;
293 unsigned scaffCount;
294 int scaffLevel;
295 int *scaffIndex;
296} DTD;
297
298typedef struct open_internal_entity {
299 const char *internalEventPtr;
300 const char *internalEventEndPtr;
301 struct open_internal_entity *next;
302 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000303 int startTagLevel;
304 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000305} OPEN_INTERNAL_ENTITY;
306
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000307typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
308 const char *start,
309 const char *end,
310 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000311
312static Processor prologProcessor;
313static Processor prologInitProcessor;
314static Processor contentProcessor;
315static Processor cdataSectionProcessor;
316#ifdef XML_DTD
317static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000318static Processor externalParEntProcessor;
319static Processor externalParEntInitProcessor;
320static Processor entityValueProcessor;
321static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000322#endif /* XML_DTD */
323static Processor epilogProcessor;
324static Processor errorProcessor;
325static Processor externalEntityInitProcessor;
326static Processor externalEntityInitProcessor2;
327static Processor externalEntityInitProcessor3;
328static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000329static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000330
331static enum XML_Error
332handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
333static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000334processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000335 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000336static enum XML_Error
337initializeEncoding(XML_Parser parser);
338static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700339doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
340 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000341 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000342static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700343processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000344 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000345static enum XML_Error
346doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700347 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000348 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000349static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000350doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000351 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000352#ifdef XML_DTD
353static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000354doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000355 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000356#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000357
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000358static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000359storeAtts(XML_Parser parser, const ENCODING *, const char *s,
360 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000361static enum XML_Error
362addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
363 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000364static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700365defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000366 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000367static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000368storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
369 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000370static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000371appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
372 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000373static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000374getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
375 const char *end);
376static int
377setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000378static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000379storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
380 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000381static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000382reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
383 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000384static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000385reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
386 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000387static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000388reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
389 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000390
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000391static const XML_Char * getContext(XML_Parser parser);
392static XML_Bool
393setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000394
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000395static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000396
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000397static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
398/* do not call if parentParser != NULL */
399static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
400static void
401dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
402static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700403dtdCopy(XML_Parser oldParser,
404 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000405static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700406copyEntityTable(XML_Parser oldParser,
407 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000408static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700409lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000410static void FASTCALL
411hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
412static void FASTCALL hashTableClear(HASH_TABLE *);
413static void FASTCALL hashTableDestroy(HASH_TABLE *);
414static void FASTCALL
415hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
416static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000417
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000418static void FASTCALL
419poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
420static void FASTCALL poolClear(STRING_POOL *);
421static void FASTCALL poolDestroy(STRING_POOL *);
422static XML_Char *
423poolAppend(STRING_POOL *pool, const ENCODING *enc,
424 const char *ptr, const char *end);
425static XML_Char *
426poolStoreString(STRING_POOL *pool, const ENCODING *enc,
427 const char *ptr, const char *end);
428static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
429static const XML_Char * FASTCALL
430poolCopyString(STRING_POOL *pool, const XML_Char *s);
431static const XML_Char *
432poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
433static const XML_Char * FASTCALL
434poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000435
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000436static int FASTCALL nextScaffoldPart(XML_Parser parser);
437static XML_Content * build_model(XML_Parser parser);
438static ELEMENT_TYPE *
439getElementType(XML_Parser parser, const ENCODING *enc,
440 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000441
Victor Stinner23ec4b52017-06-15 00:54:36 +0200442static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700443static XML_Bool startParsing(XML_Parser parser);
444
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000445static XML_Parser
446parserCreate(const XML_Char *encodingName,
447 const XML_Memory_Handling_Suite *memsuite,
448 const XML_Char *nameSep,
449 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700450
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000451static void
452parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000453
454#define poolStart(pool) ((pool)->start)
455#define poolEnd(pool) ((pool)->ptr)
456#define poolLength(pool) ((pool)->ptr - (pool)->start)
457#define poolChop(pool) ((void)--(pool->ptr))
458#define poolLastChar(pool) (((pool)->ptr)[-1])
459#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
460#define poolFinish(pool) ((pool)->start = (pool)->ptr)
461#define poolAppendChar(pool, c) \
462 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
463 ? 0 \
464 : ((*((pool)->ptr)++ = c), 1))
465
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466struct XML_ParserStruct {
467 /* The first member must be userData so that the XML_GetUserData
468 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000469 void *m_userData;
470 void *m_handlerArg;
471 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000472 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000473 /* first character to be parsed */
474 const char *m_bufferPtr;
475 /* past last character to be parsed */
476 char *m_bufferEnd;
477 /* allocated end of buffer */
478 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000479 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000480 const char *m_parseEndPtr;
481 XML_Char *m_dataBuf;
482 XML_Char *m_dataBufEnd;
483 XML_StartElementHandler m_startElementHandler;
484 XML_EndElementHandler m_endElementHandler;
485 XML_CharacterDataHandler m_characterDataHandler;
486 XML_ProcessingInstructionHandler m_processingInstructionHandler;
487 XML_CommentHandler m_commentHandler;
488 XML_StartCdataSectionHandler m_startCdataSectionHandler;
489 XML_EndCdataSectionHandler m_endCdataSectionHandler;
490 XML_DefaultHandler m_defaultHandler;
491 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
492 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
493 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
494 XML_NotationDeclHandler m_notationDeclHandler;
495 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
496 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
497 XML_NotStandaloneHandler m_notStandaloneHandler;
498 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000499 XML_Parser m_externalEntityRefHandlerArg;
500 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000501 XML_UnknownEncodingHandler m_unknownEncodingHandler;
502 XML_ElementDeclHandler m_elementDeclHandler;
503 XML_AttlistDeclHandler m_attlistDeclHandler;
504 XML_EntityDeclHandler m_entityDeclHandler;
505 XML_XmlDeclHandler m_xmlDeclHandler;
506 const ENCODING *m_encoding;
507 INIT_ENCODING m_initEncoding;
508 const ENCODING *m_internalEncoding;
509 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000510 XML_Bool m_ns;
511 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000512 void *m_unknownEncodingMem;
513 void *m_unknownEncodingData;
514 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000515 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000516 PROLOG_STATE m_prologState;
517 Processor *m_processor;
518 enum XML_Error m_errorCode;
519 const char *m_eventPtr;
520 const char *m_eventEndPtr;
521 const char *m_positionPtr;
522 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000523 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000524 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000525 int m_tagLevel;
526 ENTITY *m_declEntity;
527 const XML_Char *m_doctypeName;
528 const XML_Char *m_doctypeSysid;
529 const XML_Char *m_doctypePubid;
530 const XML_Char *m_declAttributeType;
531 const XML_Char *m_declNotationName;
532 const XML_Char *m_declNotationPublicId;
533 ELEMENT_TYPE *m_declElementType;
534 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000535 XML_Bool m_declAttributeIsCdata;
536 XML_Bool m_declAttributeIsId;
537 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000538 const XML_Char *m_curBase;
539 TAG *m_tagStack;
540 TAG *m_freeTagList;
541 BINDING *m_inheritedBindings;
542 BINDING *m_freeBindingList;
543 int m_attsSize;
544 int m_nSpecifiedAtts;
545 int m_idAttIndex;
546 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000547 NS_ATT *m_nsAtts;
548 unsigned long m_nsAttsVersion;
549 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700550#ifdef XML_ATTR_INFO
551 XML_AttrInfo *m_attInfo;
552#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000553 POSITION m_position;
554 STRING_POOL m_tempPool;
555 STRING_POOL m_temp2Pool;
556 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000557 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000558 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000559 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000560 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000561#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000562 XML_Bool m_isParamEntity;
563 XML_Bool m_useForeignDTD;
564 enum XML_ParamEntityParsing m_paramEntityParsing;
565#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700566 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000567};
568
569#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
570#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
571#define FREE(p) (parser->m_mem.free_fcn((p)))
572
573#define userData (parser->m_userData)
574#define handlerArg (parser->m_handlerArg)
575#define startElementHandler (parser->m_startElementHandler)
576#define endElementHandler (parser->m_endElementHandler)
577#define characterDataHandler (parser->m_characterDataHandler)
578#define processingInstructionHandler \
579 (parser->m_processingInstructionHandler)
580#define commentHandler (parser->m_commentHandler)
581#define startCdataSectionHandler \
582 (parser->m_startCdataSectionHandler)
583#define endCdataSectionHandler (parser->m_endCdataSectionHandler)
584#define defaultHandler (parser->m_defaultHandler)
585#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
586#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
587#define unparsedEntityDeclHandler \
588 (parser->m_unparsedEntityDeclHandler)
589#define notationDeclHandler (parser->m_notationDeclHandler)
590#define startNamespaceDeclHandler \
591 (parser->m_startNamespaceDeclHandler)
592#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
593#define notStandaloneHandler (parser->m_notStandaloneHandler)
594#define externalEntityRefHandler \
595 (parser->m_externalEntityRefHandler)
596#define externalEntityRefHandlerArg \
597 (parser->m_externalEntityRefHandlerArg)
598#define internalEntityRefHandler \
599 (parser->m_internalEntityRefHandler)
600#define skippedEntityHandler (parser->m_skippedEntityHandler)
601#define unknownEncodingHandler (parser->m_unknownEncodingHandler)
602#define elementDeclHandler (parser->m_elementDeclHandler)
603#define attlistDeclHandler (parser->m_attlistDeclHandler)
604#define entityDeclHandler (parser->m_entityDeclHandler)
605#define xmlDeclHandler (parser->m_xmlDeclHandler)
606#define encoding (parser->m_encoding)
607#define initEncoding (parser->m_initEncoding)
608#define internalEncoding (parser->m_internalEncoding)
609#define unknownEncodingMem (parser->m_unknownEncodingMem)
610#define unknownEncodingData (parser->m_unknownEncodingData)
611#define unknownEncodingHandlerData \
612 (parser->m_unknownEncodingHandlerData)
613#define unknownEncodingRelease (parser->m_unknownEncodingRelease)
614#define protocolEncodingName (parser->m_protocolEncodingName)
615#define ns (parser->m_ns)
616#define ns_triplets (parser->m_ns_triplets)
617#define prologState (parser->m_prologState)
618#define processor (parser->m_processor)
619#define errorCode (parser->m_errorCode)
620#define eventPtr (parser->m_eventPtr)
621#define eventEndPtr (parser->m_eventEndPtr)
622#define positionPtr (parser->m_positionPtr)
623#define position (parser->m_position)
624#define openInternalEntities (parser->m_openInternalEntities)
Fred Drake31d485c2004-08-03 07:06:22 +0000625#define freeInternalEntities (parser->m_freeInternalEntities)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000626#define defaultExpandInternalEntities \
627 (parser->m_defaultExpandInternalEntities)
628#define tagLevel (parser->m_tagLevel)
629#define buffer (parser->m_buffer)
630#define bufferPtr (parser->m_bufferPtr)
631#define bufferEnd (parser->m_bufferEnd)
632#define parseEndByteIndex (parser->m_parseEndByteIndex)
633#define parseEndPtr (parser->m_parseEndPtr)
634#define bufferLim (parser->m_bufferLim)
635#define dataBuf (parser->m_dataBuf)
636#define dataBufEnd (parser->m_dataBufEnd)
637#define _dtd (parser->m_dtd)
638#define curBase (parser->m_curBase)
639#define declEntity (parser->m_declEntity)
640#define doctypeName (parser->m_doctypeName)
641#define doctypeSysid (parser->m_doctypeSysid)
642#define doctypePubid (parser->m_doctypePubid)
643#define declAttributeType (parser->m_declAttributeType)
644#define declNotationName (parser->m_declNotationName)
645#define declNotationPublicId (parser->m_declNotationPublicId)
646#define declElementType (parser->m_declElementType)
647#define declAttributeId (parser->m_declAttributeId)
648#define declAttributeIsCdata (parser->m_declAttributeIsCdata)
649#define declAttributeIsId (parser->m_declAttributeIsId)
650#define freeTagList (parser->m_freeTagList)
651#define freeBindingList (parser->m_freeBindingList)
652#define inheritedBindings (parser->m_inheritedBindings)
653#define tagStack (parser->m_tagStack)
654#define atts (parser->m_atts)
655#define attsSize (parser->m_attsSize)
656#define nSpecifiedAtts (parser->m_nSpecifiedAtts)
657#define idAttIndex (parser->m_idAttIndex)
Fred Drake08317ae2003-10-21 15:38:55 +0000658#define nsAtts (parser->m_nsAtts)
659#define nsAttsVersion (parser->m_nsAttsVersion)
660#define nsAttsPower (parser->m_nsAttsPower)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700661#define attInfo (parser->m_attInfo)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000662#define tempPool (parser->m_tempPool)
663#define temp2Pool (parser->m_temp2Pool)
664#define groupConnector (parser->m_groupConnector)
665#define groupSize (parser->m_groupSize)
666#define namespaceSeparator (parser->m_namespaceSeparator)
667#define parentParser (parser->m_parentParser)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668#define ps_parsing (parser->m_parsingStatus.parsing)
669#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000670#ifdef XML_DTD
671#define isParamEntity (parser->m_isParamEntity)
672#define useForeignDTD (parser->m_useForeignDTD)
673#define paramEntityParsing (parser->m_paramEntityParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000674#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700675#define hash_secret_salt (parser->m_hash_secret_salt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000676
Fred Drake08317ae2003-10-21 15:38:55 +0000677XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000678XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000679{
680 return XML_ParserCreate_MM(encodingName, NULL, NULL);
681}
682
Fred Drake08317ae2003-10-21 15:38:55 +0000683XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000684XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000685{
686 XML_Char tmp[2];
687 *tmp = nsSep;
688 return XML_ParserCreate_MM(encodingName, NULL, tmp);
689}
690
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000691static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700692 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
693 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
694 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
695 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
696 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
697 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000698};
699
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700700static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200701gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000702{
Victor Stinner23ec4b52017-06-15 00:54:36 +0200703#ifdef WIN32
704 FILETIME ft;
705 GetSystemTimeAsFileTime(&ft); /* never fails */
706 return ft.dwHighDateTime ^ ft.dwLowDateTime;
707#else
708 struct timeval tv;
709 int gettimeofday_res;
710
711 gettimeofday_res = gettimeofday(&tv, NULL);
712 assert (gettimeofday_res == 0);
713
714 /* Microseconds time is <20 bits entropy */
715 return tv.tv_usec;
716#endif
717}
718
719static unsigned long
720generate_hash_secret_salt(XML_Parser parser)
721{
722 /* Process ID is 0 bits entropy if attacker has local access
723 * XML_Parser address is few bits of entropy if attacker has local access */
724 const unsigned long entropy =
725 gather_time_entropy() ^ getpid() ^ (unsigned long)parser;
726
727 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
728 if (sizeof(unsigned long) == 4) {
729 return entropy * 2147483647;
730 } else {
731 return entropy * (unsigned long)2305843009213693951;
732 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700733}
734
735static XML_Bool /* only valid for root parser */
736startParsing(XML_Parser parser)
737{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700738 /* hash functions must be initialized before setContext() is called */
739 if (hash_secret_salt == 0)
Victor Stinner23ec4b52017-06-15 00:54:36 +0200740 hash_secret_salt = generate_hash_secret_salt(parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700741 if (ns) {
742 /* implicit context only set for root parser, since child
743 parsers (i.e. external entity parsers) will inherit it
744 */
745 return setContext(parser, implicitContext);
746 }
747 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700748}
749
750XML_Parser XMLCALL
751XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700752 const XML_Memory_Handling_Suite *memsuite,
753 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700754{
755 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000756}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000757
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000758static XML_Parser
759parserCreate(const XML_Char *encodingName,
760 const XML_Memory_Handling_Suite *memsuite,
761 const XML_Char *nameSep,
762 DTD *dtd)
763{
764 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000765
766 if (memsuite) {
767 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000768 parser = (XML_Parser)
769 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
770 if (parser != NULL) {
771 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
772 mtemp->malloc_fcn = memsuite->malloc_fcn;
773 mtemp->realloc_fcn = memsuite->realloc_fcn;
774 mtemp->free_fcn = memsuite->free_fcn;
775 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000776 }
777 else {
778 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000779 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
780 if (parser != NULL) {
781 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
782 mtemp->malloc_fcn = malloc;
783 mtemp->realloc_fcn = realloc;
784 mtemp->free_fcn = free;
785 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000786 }
787
788 if (!parser)
789 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000790
791 buffer = NULL;
792 bufferLim = NULL;
793
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000794 attsSize = INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000795 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
796 if (atts == NULL) {
797 FREE(parser);
798 return NULL;
799 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700800#ifdef XML_ATTR_INFO
801 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
802 if (attInfo == NULL) {
803 FREE(atts);
804 FREE(parser);
805 return NULL;
806 }
807#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000808 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
809 if (dataBuf == NULL) {
810 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700811#ifdef XML_ATTR_INFO
812 FREE(attInfo);
813#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000814 FREE(parser);
815 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000816 }
817 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
818
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000819 if (dtd)
820 _dtd = dtd;
821 else {
822 _dtd = dtdCreate(&parser->m_mem);
823 if (_dtd == NULL) {
824 FREE(dataBuf);
825 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700826#ifdef XML_ATTR_INFO
827 FREE(attInfo);
828#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000829 FREE(parser);
830 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000831 }
832 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000833
834 freeBindingList = NULL;
835 freeTagList = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +0000836 freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000837
838 groupSize = 0;
839 groupConnector = NULL;
840
841 unknownEncodingHandler = NULL;
842 unknownEncodingHandlerData = NULL;
843
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700844 namespaceSeparator = ASCII_EXCL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000845 ns = XML_FALSE;
846 ns_triplets = XML_FALSE;
847
Fred Drake08317ae2003-10-21 15:38:55 +0000848 nsAtts = NULL;
849 nsAttsVersion = 0;
850 nsAttsPower = 0;
851
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000852 poolInit(&tempPool, &(parser->m_mem));
853 poolInit(&temp2Pool, &(parser->m_mem));
854 parserInit(parser, encodingName);
855
856 if (encodingName && !protocolEncodingName) {
857 XML_ParserFree(parser);
858 return NULL;
859 }
860
861 if (nameSep) {
862 ns = XML_TRUE;
863 internalEncoding = XmlGetInternalEncodingNS();
864 namespaceSeparator = *nameSep;
865 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000866 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000867 internalEncoding = XmlGetInternalEncoding();
868 }
869
870 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000871}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000872
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000873static void
874parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000875{
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000876 processor = prologInitProcessor;
877 XmlPrologStateInit(&prologState);
878 protocolEncodingName = (encodingName != NULL
879 ? poolCopyString(&tempPool, encodingName)
880 : NULL);
881 curBase = NULL;
882 XmlInitEncoding(&initEncoding, &encoding, 0);
883 userData = NULL;
884 handlerArg = NULL;
885 startElementHandler = NULL;
886 endElementHandler = NULL;
887 characterDataHandler = NULL;
888 processingInstructionHandler = NULL;
889 commentHandler = NULL;
890 startCdataSectionHandler = NULL;
891 endCdataSectionHandler = NULL;
892 defaultHandler = NULL;
893 startDoctypeDeclHandler = NULL;
894 endDoctypeDeclHandler = NULL;
895 unparsedEntityDeclHandler = NULL;
896 notationDeclHandler = NULL;
897 startNamespaceDeclHandler = NULL;
898 endNamespaceDeclHandler = NULL;
899 notStandaloneHandler = NULL;
900 externalEntityRefHandler = NULL;
901 externalEntityRefHandlerArg = parser;
902 skippedEntityHandler = NULL;
903 elementDeclHandler = NULL;
904 attlistDeclHandler = NULL;
905 entityDeclHandler = NULL;
906 xmlDeclHandler = NULL;
907 bufferPtr = buffer;
908 bufferEnd = buffer;
909 parseEndByteIndex = 0;
910 parseEndPtr = NULL;
911 declElementType = NULL;
912 declAttributeId = NULL;
913 declEntity = NULL;
914 doctypeName = NULL;
915 doctypeSysid = NULL;
916 doctypePubid = NULL;
917 declAttributeType = NULL;
918 declNotationName = NULL;
919 declNotationPublicId = NULL;
920 declAttributeIsCdata = XML_FALSE;
921 declAttributeIsId = XML_FALSE;
922 memset(&position, 0, sizeof(POSITION));
923 errorCode = XML_ERROR_NONE;
924 eventPtr = NULL;
925 eventEndPtr = NULL;
926 positionPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +0000927 openInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000928 defaultExpandInternalEntities = XML_TRUE;
929 tagLevel = 0;
930 tagStack = NULL;
931 inheritedBindings = NULL;
932 nSpecifiedAtts = 0;
933 unknownEncodingMem = NULL;
934 unknownEncodingRelease = NULL;
935 unknownEncodingData = NULL;
936 parentParser = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000937 ps_parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000938#ifdef XML_DTD
939 isParamEntity = XML_FALSE;
940 useForeignDTD = XML_FALSE;
941 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
942#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700943 hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000944}
945
946/* moves list of bindings to freeBindingList */
947static void FASTCALL
948moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
949{
950 while (bindings) {
951 BINDING *b = bindings;
952 bindings = bindings->nextTagBinding;
953 b->nextTagBinding = freeBindingList;
954 freeBindingList = b;
955 }
956}
957
Fred Drake08317ae2003-10-21 15:38:55 +0000958XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000959XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
960{
961 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +0000962 OPEN_INTERNAL_ENTITY *openEntityList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000963 if (parentParser)
964 return XML_FALSE;
965 /* move tagStack to freeTagList */
966 tStk = tagStack;
967 while (tStk) {
968 TAG *tag = tStk;
969 tStk = tStk->parent;
970 tag->parent = freeTagList;
971 moveToFreeBindingList(parser, tag->bindings);
972 tag->bindings = NULL;
973 freeTagList = tag;
974 }
Fred Drake31d485c2004-08-03 07:06:22 +0000975 /* move openInternalEntities to freeInternalEntities */
976 openEntityList = openInternalEntities;
977 while (openEntityList) {
978 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
979 openEntityList = openEntity->next;
980 openEntity->next = freeInternalEntities;
981 freeInternalEntities = openEntity;
982 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000983 moveToFreeBindingList(parser, inheritedBindings);
Fred Drake08317ae2003-10-21 15:38:55 +0000984 FREE(unknownEncodingMem);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000985 if (unknownEncodingRelease)
986 unknownEncodingRelease(unknownEncodingData);
987 poolClear(&tempPool);
988 poolClear(&temp2Pool);
989 parserInit(parser, encodingName);
990 dtdReset(_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700991 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000992}
993
Fred Drake08317ae2003-10-21 15:38:55 +0000994enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000995XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
996{
997 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
998 XXX There's no way for the caller to determine which of the
999 XXX possible error cases caused the XML_STATUS_ERROR return.
1000 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001001 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001002 return XML_STATUS_ERROR;
1003 if (encodingName == NULL)
1004 protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001005 else {
1006 protocolEncodingName = poolCopyString(&tempPool, encodingName);
1007 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001008 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001009 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001010 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001011}
1012
Fred Drake08317ae2003-10-21 15:38:55 +00001013XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001014XML_ExternalEntityParserCreate(XML_Parser oldParser,
1015 const XML_Char *context,
1016 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001017{
1018 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001019 DTD *newDtd = NULL;
1020 DTD *oldDtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001021 XML_StartElementHandler oldStartElementHandler = startElementHandler;
1022 XML_EndElementHandler oldEndElementHandler = endElementHandler;
1023 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001024 XML_ProcessingInstructionHandler oldProcessingInstructionHandler
1025 = processingInstructionHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001026 XML_CommentHandler oldCommentHandler = commentHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001027 XML_StartCdataSectionHandler oldStartCdataSectionHandler
1028 = startCdataSectionHandler;
1029 XML_EndCdataSectionHandler oldEndCdataSectionHandler
1030 = endCdataSectionHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001031 XML_DefaultHandler oldDefaultHandler = defaultHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001032 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler
1033 = unparsedEntityDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001034 XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001035 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler
1036 = startNamespaceDeclHandler;
1037 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler
1038 = endNamespaceDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001039 XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001040 XML_ExternalEntityRefHandler oldExternalEntityRefHandler
1041 = externalEntityRefHandler;
1042 XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler;
1043 XML_UnknownEncodingHandler oldUnknownEncodingHandler
1044 = unknownEncodingHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001045 XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler;
1046 XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler;
1047 XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler;
1048 XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler;
1049 ELEMENT_TYPE * oldDeclElementType = declElementType;
1050
1051 void *oldUserData = userData;
1052 void *oldHandlerArg = handlerArg;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001053 XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1054 XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001055#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001056 enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing;
1057 int oldInEntityValue = prologState.inEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001058#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001059 XML_Bool oldns_triplets = ns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001060 /* Note that the new parser shares the same hash secret as the old
1061 parser, so that dtdCopy and copyEntityTable can lookup values
1062 from hash tables associated with either parser without us having
1063 to worry which hash secrets each table has.
1064 */
1065 unsigned long oldhash_secret_salt = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001066
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001067#ifdef XML_DTD
1068 if (!context)
1069 newDtd = oldDtd;
1070#endif /* XML_DTD */
1071
1072 /* Note that the magical uses of the pre-processor to make field
1073 access look more like C++ require that `parser' be overwritten
1074 here. This makes this function more painful to follow than it
1075 would be otherwise.
1076 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001077 if (ns) {
1078 XML_Char tmp[2];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001079 *tmp = namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001080 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001081 }
1082 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001083 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001084 }
1085
1086 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001087 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001088
1089 startElementHandler = oldStartElementHandler;
1090 endElementHandler = oldEndElementHandler;
1091 characterDataHandler = oldCharacterDataHandler;
1092 processingInstructionHandler = oldProcessingInstructionHandler;
1093 commentHandler = oldCommentHandler;
1094 startCdataSectionHandler = oldStartCdataSectionHandler;
1095 endCdataSectionHandler = oldEndCdataSectionHandler;
1096 defaultHandler = oldDefaultHandler;
1097 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1098 notationDeclHandler = oldNotationDeclHandler;
1099 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1100 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1101 notStandaloneHandler = oldNotStandaloneHandler;
1102 externalEntityRefHandler = oldExternalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001103 skippedEntityHandler = oldSkippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001104 unknownEncodingHandler = oldUnknownEncodingHandler;
1105 elementDeclHandler = oldElementDeclHandler;
1106 attlistDeclHandler = oldAttlistDeclHandler;
1107 entityDeclHandler = oldEntityDeclHandler;
1108 xmlDeclHandler = oldXmlDeclHandler;
1109 declElementType = oldDeclElementType;
1110 userData = oldUserData;
1111 if (oldUserData == oldHandlerArg)
1112 handlerArg = userData;
1113 else
1114 handlerArg = parser;
1115 if (oldExternalEntityRefHandlerArg != oldParser)
1116 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1117 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1118 ns_triplets = oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001119 hash_secret_salt = oldhash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001120 parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001121#ifdef XML_DTD
1122 paramEntityParsing = oldParamEntityParsing;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001123 prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001124 if (context) {
1125#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001126 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001127 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001128 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001129 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001130 }
1131 processor = externalEntityInitProcessor;
1132#ifdef XML_DTD
1133 }
1134 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001135 /* The DTD instance referenced by _dtd is shared between the document's
1136 root parser and external PE parsers, therefore one does not need to
1137 call setContext. In addition, one also *must* not call setContext,
1138 because this would overwrite existing prefix->binding pointers in
1139 _dtd with ones that get destroyed with the external PE parser.
1140 This would leave those prefixes with dangling pointers.
1141 */
1142 isParamEntity = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001143 XmlPrologStateInitExternalEntity(&prologState);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001144 processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001145 }
1146#endif /* XML_DTD */
1147 return parser;
1148}
1149
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001150static void FASTCALL
1151destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001152{
1153 for (;;) {
1154 BINDING *b = bindings;
1155 if (!b)
1156 break;
1157 bindings = b->nextTagBinding;
1158 FREE(b->uri);
1159 FREE(b);
1160 }
1161}
1162
Fred Drake08317ae2003-10-21 15:38:55 +00001163void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001164XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001165{
Fred Drake31d485c2004-08-03 07:06:22 +00001166 TAG *tagList;
1167 OPEN_INTERNAL_ENTITY *entityList;
1168 if (parser == NULL)
1169 return;
1170 /* free tagStack and freeTagList */
1171 tagList = tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001172 for (;;) {
1173 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001174 if (tagList == NULL) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001175 if (freeTagList == NULL)
1176 break;
Fred Drake31d485c2004-08-03 07:06:22 +00001177 tagList = freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001178 freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001179 }
Fred Drake31d485c2004-08-03 07:06:22 +00001180 p = tagList;
1181 tagList = tagList->parent;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001182 FREE(p->buf);
1183 destroyBindings(p->bindings, parser);
1184 FREE(p);
1185 }
Fred Drake31d485c2004-08-03 07:06:22 +00001186 /* free openInternalEntities and freeInternalEntities */
1187 entityList = openInternalEntities;
1188 for (;;) {
1189 OPEN_INTERNAL_ENTITY *openEntity;
1190 if (entityList == NULL) {
1191 if (freeInternalEntities == NULL)
1192 break;
1193 entityList = freeInternalEntities;
1194 freeInternalEntities = NULL;
1195 }
1196 openEntity = entityList;
1197 entityList = entityList->next;
1198 FREE(openEntity);
1199 }
1200
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001201 destroyBindings(freeBindingList, parser);
1202 destroyBindings(inheritedBindings, parser);
1203 poolDestroy(&tempPool);
1204 poolDestroy(&temp2Pool);
1205#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001206 /* external parameter entity parsers share the DTD structure
1207 parser->m_dtd with the root parser, so we must not destroy it
1208 */
1209 if (!isParamEntity && _dtd)
1210#else
1211 if (_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001212#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001213 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001214 FREE((void *)atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001215#ifdef XML_ATTR_INFO
1216 FREE((void *)attInfo);
1217#endif
Fred Drake08317ae2003-10-21 15:38:55 +00001218 FREE(groupConnector);
1219 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001220 FREE(dataBuf);
Fred Drake08317ae2003-10-21 15:38:55 +00001221 FREE(nsAtts);
1222 FREE(unknownEncodingMem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001223 if (unknownEncodingRelease)
1224 unknownEncodingRelease(unknownEncodingData);
1225 FREE(parser);
1226}
1227
Fred Drake08317ae2003-10-21 15:38:55 +00001228void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001229XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001230{
1231 handlerArg = parser;
1232}
1233
Fred Drake08317ae2003-10-21 15:38:55 +00001234enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001235XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1236{
1237#ifdef XML_DTD
1238 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001239 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001240 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1241 useForeignDTD = useDTD;
1242 return XML_ERROR_NONE;
1243#else
1244 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1245#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001246}
1247
Fred Drake08317ae2003-10-21 15:38:55 +00001248void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001249XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1250{
1251 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001252 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001253 return;
1254 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1255}
1256
Fred Drake08317ae2003-10-21 15:38:55 +00001257void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001258XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001259{
1260 if (handlerArg == userData)
1261 handlerArg = userData = p;
1262 else
1263 userData = p;
1264}
1265
Fred Drake08317ae2003-10-21 15:38:55 +00001266enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001267XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001268{
1269 if (p) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001270 p = poolCopyString(&_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001271 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001272 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001273 curBase = p;
1274 }
1275 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001276 curBase = NULL;
1277 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001278}
1279
Fred Drake08317ae2003-10-21 15:38:55 +00001280const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001281XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001282{
1283 return curBase;
1284}
1285
Fred Drake08317ae2003-10-21 15:38:55 +00001286int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001287XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001288{
1289 return nSpecifiedAtts;
1290}
1291
Fred Drake08317ae2003-10-21 15:38:55 +00001292int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001293XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001294{
1295 return idAttIndex;
1296}
1297
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001298#ifdef XML_ATTR_INFO
1299const XML_AttrInfo * XMLCALL
1300XML_GetAttributeInfo(XML_Parser parser)
1301{
1302 return attInfo;
1303}
1304#endif
1305
Fred Drake08317ae2003-10-21 15:38:55 +00001306void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001307XML_SetElementHandler(XML_Parser parser,
1308 XML_StartElementHandler start,
1309 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001310{
1311 startElementHandler = start;
1312 endElementHandler = end;
1313}
1314
Fred Drake08317ae2003-10-21 15:38:55 +00001315void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001316XML_SetStartElementHandler(XML_Parser parser,
1317 XML_StartElementHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001318 startElementHandler = start;
1319}
1320
Fred Drake08317ae2003-10-21 15:38:55 +00001321void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001322XML_SetEndElementHandler(XML_Parser parser,
1323 XML_EndElementHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001324 endElementHandler = end;
1325}
1326
Fred Drake08317ae2003-10-21 15:38:55 +00001327void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001328XML_SetCharacterDataHandler(XML_Parser parser,
1329 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001330{
1331 characterDataHandler = handler;
1332}
1333
Fred Drake08317ae2003-10-21 15:38:55 +00001334void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001335XML_SetProcessingInstructionHandler(XML_Parser parser,
1336 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001337{
1338 processingInstructionHandler = handler;
1339}
1340
Fred Drake08317ae2003-10-21 15:38:55 +00001341void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001342XML_SetCommentHandler(XML_Parser parser,
1343 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001344{
1345 commentHandler = handler;
1346}
1347
Fred Drake08317ae2003-10-21 15:38:55 +00001348void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001349XML_SetCdataSectionHandler(XML_Parser parser,
1350 XML_StartCdataSectionHandler start,
1351 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001352{
1353 startCdataSectionHandler = start;
1354 endCdataSectionHandler = end;
1355}
1356
Fred Drake08317ae2003-10-21 15:38:55 +00001357void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001358XML_SetStartCdataSectionHandler(XML_Parser parser,
1359 XML_StartCdataSectionHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001360 startCdataSectionHandler = start;
1361}
1362
Fred Drake08317ae2003-10-21 15:38:55 +00001363void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001364XML_SetEndCdataSectionHandler(XML_Parser parser,
1365 XML_EndCdataSectionHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001366 endCdataSectionHandler = end;
1367}
1368
Fred Drake08317ae2003-10-21 15:38:55 +00001369void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001370XML_SetDefaultHandler(XML_Parser parser,
1371 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001372{
1373 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001374 defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001375}
1376
Fred Drake08317ae2003-10-21 15:38:55 +00001377void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001378XML_SetDefaultHandlerExpand(XML_Parser parser,
1379 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001380{
1381 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001382 defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001383}
1384
Fred Drake08317ae2003-10-21 15:38:55 +00001385void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001386XML_SetDoctypeDeclHandler(XML_Parser parser,
1387 XML_StartDoctypeDeclHandler start,
1388 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001389{
1390 startDoctypeDeclHandler = start;
1391 endDoctypeDeclHandler = end;
1392}
1393
Fred Drake08317ae2003-10-21 15:38:55 +00001394void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001395XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1396 XML_StartDoctypeDeclHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001397 startDoctypeDeclHandler = start;
1398}
1399
Fred Drake08317ae2003-10-21 15:38:55 +00001400void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001401XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1402 XML_EndDoctypeDeclHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001403 endDoctypeDeclHandler = end;
1404}
1405
Fred Drake08317ae2003-10-21 15:38:55 +00001406void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001407XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1408 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001409{
1410 unparsedEntityDeclHandler = handler;
1411}
1412
Fred Drake08317ae2003-10-21 15:38:55 +00001413void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001414XML_SetNotationDeclHandler(XML_Parser parser,
1415 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001416{
1417 notationDeclHandler = handler;
1418}
1419
Fred Drake08317ae2003-10-21 15:38:55 +00001420void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001421XML_SetNamespaceDeclHandler(XML_Parser parser,
1422 XML_StartNamespaceDeclHandler start,
1423 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001424{
1425 startNamespaceDeclHandler = start;
1426 endNamespaceDeclHandler = end;
1427}
1428
Fred Drake08317ae2003-10-21 15:38:55 +00001429void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001430XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1431 XML_StartNamespaceDeclHandler start) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001432 startNamespaceDeclHandler = start;
1433}
1434
Fred Drake08317ae2003-10-21 15:38:55 +00001435void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001436XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1437 XML_EndNamespaceDeclHandler end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001438 endNamespaceDeclHandler = end;
1439}
1440
Fred Drake08317ae2003-10-21 15:38:55 +00001441void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001442XML_SetNotStandaloneHandler(XML_Parser parser,
1443 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001444{
1445 notStandaloneHandler = handler;
1446}
1447
Fred Drake08317ae2003-10-21 15:38:55 +00001448void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001449XML_SetExternalEntityRefHandler(XML_Parser parser,
1450 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001451{
1452 externalEntityRefHandler = handler;
1453}
1454
Fred Drake08317ae2003-10-21 15:38:55 +00001455void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001456XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001457{
1458 if (arg)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001459 externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001460 else
1461 externalEntityRefHandlerArg = parser;
1462}
1463
Fred Drake08317ae2003-10-21 15:38:55 +00001464void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001465XML_SetSkippedEntityHandler(XML_Parser parser,
1466 XML_SkippedEntityHandler handler)
1467{
1468 skippedEntityHandler = handler;
1469}
1470
Fred Drake08317ae2003-10-21 15:38:55 +00001471void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001472XML_SetUnknownEncodingHandler(XML_Parser parser,
1473 XML_UnknownEncodingHandler handler,
1474 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001475{
1476 unknownEncodingHandler = handler;
1477 unknownEncodingHandlerData = data;
1478}
1479
Fred Drake08317ae2003-10-21 15:38:55 +00001480void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001481XML_SetElementDeclHandler(XML_Parser parser,
1482 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001483{
1484 elementDeclHandler = eldecl;
1485}
1486
Fred Drake08317ae2003-10-21 15:38:55 +00001487void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001488XML_SetAttlistDeclHandler(XML_Parser parser,
1489 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001490{
1491 attlistDeclHandler = attdecl;
1492}
1493
Fred Drake08317ae2003-10-21 15:38:55 +00001494void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001495XML_SetEntityDeclHandler(XML_Parser parser,
1496 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001497{
1498 entityDeclHandler = handler;
1499}
1500
Fred Drake08317ae2003-10-21 15:38:55 +00001501void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001502XML_SetXmlDeclHandler(XML_Parser parser,
1503 XML_XmlDeclHandler handler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001504 xmlDeclHandler = handler;
1505}
1506
Fred Drake08317ae2003-10-21 15:38:55 +00001507int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001508XML_SetParamEntityParsing(XML_Parser parser,
1509 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001510{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001511 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001512 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001513 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001514#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001515 paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001516 return 1;
1517#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001518 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001519#endif
1520}
1521
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001522int XMLCALL
1523XML_SetHashSalt(XML_Parser parser,
1524 unsigned long hash_salt)
1525{
1526 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1527 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1528 return 0;
1529 hash_secret_salt = hash_salt;
1530 return 1;
1531}
1532
Fred Drake08317ae2003-10-21 15:38:55 +00001533enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001534XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001535{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001536 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001537 case XML_SUSPENDED:
1538 errorCode = XML_ERROR_SUSPENDED;
1539 return XML_STATUS_ERROR;
1540 case XML_FINISHED:
1541 errorCode = XML_ERROR_FINISHED;
1542 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001543 case XML_INITIALIZED:
1544 if (parentParser == NULL && !startParsing(parser)) {
1545 errorCode = XML_ERROR_NO_MEMORY;
1546 return XML_STATUS_ERROR;
1547 }
Fred Drake31d485c2004-08-03 07:06:22 +00001548 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001549 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001550 }
1551
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001552 if (len == 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001553 ps_finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001554 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001555 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001556 positionPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001557 parseEndPtr = bufferEnd;
1558
1559 /* If data are left over from last buffer, and we now know that these
1560 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001561 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001562 */
1563 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1564
1565 if (errorCode == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001566 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001567 case XML_SUSPENDED:
1568 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1569 positionPtr = bufferPtr;
1570 return XML_STATUS_SUSPENDED;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001571 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001572 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001573 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001574 /* fall through */
1575 default:
1576 return XML_STATUS_OK;
1577 }
1578 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001579 eventEndPtr = eventPtr;
1580 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001581 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001582 }
1583#ifndef XML_CONTEXT_BYTES
1584 else if (bufferPtr == bufferEnd) {
1585 const char *end;
1586 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001587 enum XML_Status result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001588 parseEndByteIndex += len;
1589 positionPtr = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001590 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001591
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001592 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001593
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001594 if (errorCode != XML_ERROR_NONE) {
1595 eventEndPtr = eventPtr;
1596 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001597 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001598 }
Fred Drake31d485c2004-08-03 07:06:22 +00001599 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001600 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001601 case XML_SUSPENDED:
1602 result = XML_STATUS_SUSPENDED;
1603 break;
1604 case XML_INITIALIZED:
1605 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001606 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001607 ps_parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001608 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001609 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001610 /* fall through */
1611 default:
1612 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001613 }
1614 }
1615
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001616 XmlUpdatePosition(encoding, positionPtr, end, &position);
1617 nLeftOver = s + len - end;
1618 if (nLeftOver) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001619 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
1620 /* FIXME avoid integer overflow */
1621 char *temp;
1622 temp = (buffer == NULL
1623 ? (char *)MALLOC(len * 2)
1624 : (char *)REALLOC(buffer, len * 2));
1625 if (temp == NULL) {
1626 errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001627 eventPtr = eventEndPtr = NULL;
1628 processor = errorProcessor;
1629 return XML_STATUS_ERROR;
1630 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001631 buffer = temp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001632 bufferLim = buffer + len * 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001633 }
1634 memcpy(buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001635 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001636 bufferPtr = buffer;
1637 bufferEnd = buffer + nLeftOver;
1638 positionPtr = bufferPtr;
1639 parseEndPtr = bufferEnd;
1640 eventPtr = bufferPtr;
1641 eventEndPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001642 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001643 }
1644#endif /* not defined XML_CONTEXT_BYTES */
1645 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001646 void *buff = XML_GetBuffer(parser, len);
1647 if (buff == NULL)
1648 return XML_STATUS_ERROR;
1649 else {
1650 memcpy(buff, s, len);
1651 return XML_ParseBuffer(parser, len, isFinal);
1652 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001653 }
1654}
1655
Fred Drake08317ae2003-10-21 15:38:55 +00001656enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001657XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001658{
Fred Drake31d485c2004-08-03 07:06:22 +00001659 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001660 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001661
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001662 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001663 case XML_SUSPENDED:
1664 errorCode = XML_ERROR_SUSPENDED;
1665 return XML_STATUS_ERROR;
1666 case XML_FINISHED:
1667 errorCode = XML_ERROR_FINISHED;
1668 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001669 case XML_INITIALIZED:
1670 if (parentParser == NULL && !startParsing(parser)) {
1671 errorCode = XML_ERROR_NO_MEMORY;
1672 return XML_STATUS_ERROR;
1673 }
Fred Drake31d485c2004-08-03 07:06:22 +00001674 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001675 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001676 }
1677
1678 start = bufferPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001679 positionPtr = start;
1680 bufferEnd += len;
Fred Drake31d485c2004-08-03 07:06:22 +00001681 parseEndPtr = bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001682 parseEndByteIndex += len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001683 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001684
1685 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
1686
1687 if (errorCode != XML_ERROR_NONE) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001688 eventEndPtr = eventPtr;
1689 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001690 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001691 }
Fred Drake31d485c2004-08-03 07:06:22 +00001692 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001693 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001694 case XML_SUSPENDED:
1695 result = XML_STATUS_SUSPENDED;
1696 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001697 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001698 case XML_PARSING:
1699 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001700 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001701 return result;
1702 }
1703 default: ; /* should not happen */
1704 }
1705 }
1706
1707 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1708 positionPtr = bufferPtr;
1709 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001710}
1711
Fred Drake08317ae2003-10-21 15:38:55 +00001712void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001713XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001714{
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001715 if (len < 0) {
1716 errorCode = XML_ERROR_NO_MEMORY;
1717 return NULL;
1718 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001719 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001720 case XML_SUSPENDED:
1721 errorCode = XML_ERROR_SUSPENDED;
1722 return NULL;
1723 case XML_FINISHED:
1724 errorCode = XML_ERROR_FINISHED;
1725 return NULL;
1726 default: ;
1727 }
1728
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001729 if (len > bufferLim - bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001730#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001731 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02001732#endif /* defined XML_CONTEXT_BYTES */
1733 /* Do not invoke signed arithmetic overflow: */
1734 int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001735 if (neededSize < 0) {
1736 errorCode = XML_ERROR_NO_MEMORY;
1737 return NULL;
1738 }
1739#ifdef XML_CONTEXT_BYTES
1740 keep = (int)(bufferPtr - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001741 if (keep > XML_CONTEXT_BYTES)
1742 keep = XML_CONTEXT_BYTES;
1743 neededSize += keep;
1744#endif /* defined XML_CONTEXT_BYTES */
1745 if (neededSize <= bufferLim - buffer) {
1746#ifdef XML_CONTEXT_BYTES
1747 if (keep < bufferPtr - buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001748 int offset = (int)(bufferPtr - buffer) - keep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001749 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
1750 bufferEnd -= offset;
1751 bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001752 }
1753#else
1754 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
1755 bufferEnd = buffer + (bufferEnd - bufferPtr);
1756 bufferPtr = buffer;
1757#endif /* not defined XML_CONTEXT_BYTES */
1758 }
1759 else {
1760 char *newBuf;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001761 int bufferSize = (int)(bufferLim - bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001762 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001763 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001764 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02001765 /* Do not invoke signed arithmetic overflow: */
1766 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001767 } while (bufferSize < neededSize && bufferSize > 0);
1768 if (bufferSize <= 0) {
1769 errorCode = XML_ERROR_NO_MEMORY;
1770 return NULL;
1771 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001772 newBuf = (char *)MALLOC(bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001773 if (newBuf == 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001774 errorCode = XML_ERROR_NO_MEMORY;
1775 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001776 }
1777 bufferLim = newBuf + bufferSize;
1778#ifdef XML_CONTEXT_BYTES
1779 if (bufferPtr) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001780 int keep = (int)(bufferPtr - buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001781 if (keep > XML_CONTEXT_BYTES)
1782 keep = XML_CONTEXT_BYTES;
1783 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
1784 FREE(buffer);
1785 buffer = newBuf;
1786 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
1787 bufferPtr = buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001788 }
1789 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001790 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1791 bufferPtr = buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001792 }
1793#else
1794 if (bufferPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001795 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
1796 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001797 }
1798 bufferEnd = newBuf + (bufferEnd - bufferPtr);
1799 bufferPtr = buffer = newBuf;
1800#endif /* not defined XML_CONTEXT_BYTES */
1801 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001802 eventPtr = eventEndPtr = NULL;
1803 positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001804 }
1805 return bufferEnd;
1806}
1807
Fred Drake31d485c2004-08-03 07:06:22 +00001808enum XML_Status XMLCALL
1809XML_StopParser(XML_Parser parser, XML_Bool resumable)
1810{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001811 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001812 case XML_SUSPENDED:
1813 if (resumable) {
1814 errorCode = XML_ERROR_SUSPENDED;
1815 return XML_STATUS_ERROR;
1816 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001817 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001818 break;
1819 case XML_FINISHED:
1820 errorCode = XML_ERROR_FINISHED;
1821 return XML_STATUS_ERROR;
1822 default:
1823 if (resumable) {
1824#ifdef XML_DTD
1825 if (isParamEntity) {
1826 errorCode = XML_ERROR_SUSPEND_PE;
1827 return XML_STATUS_ERROR;
1828 }
1829#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001830 ps_parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001831 }
1832 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001833 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001834 }
1835 return XML_STATUS_OK;
1836}
1837
1838enum XML_Status XMLCALL
1839XML_ResumeParser(XML_Parser parser)
1840{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001841 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001842
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001843 if (ps_parsing != XML_SUSPENDED) {
Fred Drake31d485c2004-08-03 07:06:22 +00001844 errorCode = XML_ERROR_NOT_SUSPENDED;
1845 return XML_STATUS_ERROR;
1846 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001847 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001848
1849 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1850
1851 if (errorCode != XML_ERROR_NONE) {
1852 eventEndPtr = eventPtr;
1853 processor = errorProcessor;
1854 return XML_STATUS_ERROR;
1855 }
1856 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001857 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001858 case XML_SUSPENDED:
1859 result = XML_STATUS_SUSPENDED;
1860 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001861 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001862 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001863 if (ps_finalBuffer) {
1864 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001865 return result;
1866 }
1867 default: ;
1868 }
1869 }
1870
1871 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1872 positionPtr = bufferPtr;
1873 return result;
1874}
1875
1876void XMLCALL
1877XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
1878{
1879 assert(status != NULL);
1880 *status = parser->m_parsingStatus;
1881}
1882
Fred Drake08317ae2003-10-21 15:38:55 +00001883enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001884XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001885{
1886 return errorCode;
1887}
1888
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001889XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001890XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001891{
1892 if (eventPtr)
Victor Stinner23ec4b52017-06-15 00:54:36 +02001893 return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001894 return -1;
1895}
1896
Fred Drake08317ae2003-10-21 15:38:55 +00001897int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001898XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001899{
1900 if (eventEndPtr && eventPtr)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001901 return (int)(eventEndPtr - eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001902 return 0;
1903}
1904
Fred Drake08317ae2003-10-21 15:38:55 +00001905const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001906XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001907{
1908#ifdef XML_CONTEXT_BYTES
1909 if (eventPtr && buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001910 *offset = (int)(eventPtr - buffer);
1911 *size = (int)(bufferEnd - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001912 return buffer;
1913 }
1914#endif /* defined XML_CONTEXT_BYTES */
1915 return (char *) 0;
1916}
1917
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001918XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001919XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001920{
Fred Drake31d485c2004-08-03 07:06:22 +00001921 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001922 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1923 positionPtr = eventPtr;
1924 }
1925 return position.lineNumber + 1;
1926}
1927
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001928XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001929XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001930{
Fred Drake31d485c2004-08-03 07:06:22 +00001931 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001932 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1933 positionPtr = eventPtr;
1934 }
1935 return position.columnNumber;
1936}
1937
Fred Drake08317ae2003-10-21 15:38:55 +00001938void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001939XML_FreeContentModel(XML_Parser parser, XML_Content *model)
1940{
1941 FREE(model);
1942}
1943
Fred Drake08317ae2003-10-21 15:38:55 +00001944void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001945XML_MemMalloc(XML_Parser parser, size_t size)
1946{
1947 return MALLOC(size);
1948}
1949
Fred Drake08317ae2003-10-21 15:38:55 +00001950void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001951XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
1952{
1953 return REALLOC(ptr, size);
1954}
1955
Fred Drake08317ae2003-10-21 15:38:55 +00001956void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001957XML_MemFree(XML_Parser parser, void *ptr)
1958{
1959 FREE(ptr);
1960}
1961
Fred Drake08317ae2003-10-21 15:38:55 +00001962void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001963XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001964{
1965 if (defaultHandler) {
1966 if (openInternalEntities)
1967 reportDefault(parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001968 internalEncoding,
1969 openInternalEntities->internalEventPtr,
1970 openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001971 else
1972 reportDefault(parser, encoding, eventPtr, eventEndPtr);
1973 }
1974}
1975
Fred Drake08317ae2003-10-21 15:38:55 +00001976const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001977XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001978{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001979 static const XML_LChar* const message[] = {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001980 0,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001981 XML_L("out of memory"),
1982 XML_L("syntax error"),
1983 XML_L("no element found"),
1984 XML_L("not well-formed (invalid token)"),
1985 XML_L("unclosed token"),
1986 XML_L("partial character"),
1987 XML_L("mismatched tag"),
1988 XML_L("duplicate attribute"),
1989 XML_L("junk after document element"),
1990 XML_L("illegal parameter entity reference"),
1991 XML_L("undefined entity"),
1992 XML_L("recursive entity reference"),
1993 XML_L("asynchronous entity"),
1994 XML_L("reference to invalid character number"),
1995 XML_L("reference to binary entity"),
1996 XML_L("reference to external entity in attribute"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001997 XML_L("XML or text declaration not at start of entity"),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001998 XML_L("unknown encoding"),
1999 XML_L("encoding specified in XML declaration is incorrect"),
2000 XML_L("unclosed CDATA section"),
2001 XML_L("error in processing external entity reference"),
2002 XML_L("document is not standalone"),
2003 XML_L("unexpected parser state - please send a bug report"),
2004 XML_L("entity declared in parameter entity"),
2005 XML_L("requested feature requires XML_DTD support in Expat"),
Fred Drake08317ae2003-10-21 15:38:55 +00002006 XML_L("cannot change setting once parsing has begun"),
Fred Drake31d485c2004-08-03 07:06:22 +00002007 XML_L("unbound prefix"),
2008 XML_L("must not undeclare prefix"),
2009 XML_L("incomplete markup in parameter entity"),
2010 XML_L("XML declaration not well-formed"),
2011 XML_L("text declaration not well-formed"),
2012 XML_L("illegal character(s) in public id"),
2013 XML_L("parser suspended"),
2014 XML_L("parser not suspended"),
2015 XML_L("parsing aborted"),
2016 XML_L("parsing finished"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002017 XML_L("cannot suspend in external parameter entity"),
2018 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
2019 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
2020 XML_L("prefix must not be bound to one of the reserved namespace names")
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002021 };
2022 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
2023 return message[code];
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002024 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002025}
2026
Fred Drake08317ae2003-10-21 15:38:55 +00002027const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002028XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002029
2030 /* V1 is used to string-ize the version number. However, it would
2031 string-ize the actual version macro *names* unless we get them
2032 substituted before being passed to V1. CPP is defined to expand
2033 a macro, then rescan for more expansions. Thus, we use V2 to expand
2034 the version macros, then CPP will expand the resulting V1() macro
2035 with the correct numerals. */
2036 /* ### I'm assuming cpp is portable in this respect... */
2037
2038#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2039#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2040
2041 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2042
2043#undef V1
2044#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002045}
2046
Fred Drake08317ae2003-10-21 15:38:55 +00002047XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002048XML_ExpatVersionInfo(void)
2049{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002050 XML_Expat_Version version;
2051
2052 version.major = XML_MAJOR_VERSION;
2053 version.minor = XML_MINOR_VERSION;
2054 version.micro = XML_MICRO_VERSION;
2055
2056 return version;
2057}
2058
Fred Drake08317ae2003-10-21 15:38:55 +00002059const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002060XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002061{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002062 static const XML_Feature features[] = {
2063 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2064 sizeof(XML_Char)},
2065 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2066 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002067#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002068 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002069#endif
2070#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002071 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002072#endif
2073#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002074 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002075#endif
2076#ifdef XML_CONTEXT_BYTES
2077 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2078 XML_CONTEXT_BYTES},
2079#endif
2080#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002081 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002082#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002083#ifdef XML_NS
2084 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2085#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002086#ifdef XML_LARGE_SIZE
2087 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2088#endif
2089#ifdef XML_ATTR_INFO
2090 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2091#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002092 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002093 };
2094
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002095 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002096}
2097
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002098/* Initially tag->rawName always points into the parse buffer;
2099 for those TAG instances opened while the current parse buffer was
2100 processed, and not yet closed, we need to store tag->rawName in a more
2101 permanent location, since the parse buffer is about to be discarded.
2102*/
2103static XML_Bool
2104storeRawNames(XML_Parser parser)
2105{
2106 TAG *tag = tagStack;
2107 while (tag) {
2108 int bufSize;
2109 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2110 char *rawNameBuf = tag->buf + nameLen;
2111 /* Stop if already stored. Since tagStack is a stack, we can stop
2112 at the first entry that has already been copied; everything
2113 below it in the stack is already been accounted for in a
2114 previous call to this function.
2115 */
2116 if (tag->rawName == rawNameBuf)
2117 break;
2118 /* For re-use purposes we need to ensure that the
2119 size of tag->buf is a multiple of sizeof(XML_Char).
2120 */
2121 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2122 if (bufSize > tag->bufEnd - tag->buf) {
2123 char *temp = (char *)REALLOC(tag->buf, bufSize);
2124 if (temp == NULL)
2125 return XML_FALSE;
2126 /* if tag->name.str points to tag->buf (only when namespace
2127 processing is off) then we have to update it
2128 */
2129 if (tag->name.str == (XML_Char *)tag->buf)
2130 tag->name.str = (XML_Char *)temp;
2131 /* if tag->name.localPart is set (when namespace processing is on)
2132 then update it as well, since it will always point into tag->buf
2133 */
2134 if (tag->name.localPart)
2135 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2136 (XML_Char *)tag->buf);
2137 tag->buf = temp;
2138 tag->bufEnd = temp + bufSize;
2139 rawNameBuf = temp + nameLen;
2140 }
2141 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2142 tag->rawName = rawNameBuf;
2143 tag = tag->parent;
2144 }
2145 return XML_TRUE;
2146}
2147
2148static enum XML_Error PTRCALL
2149contentProcessor(XML_Parser parser,
2150 const char *start,
2151 const char *end,
2152 const char **endPtr)
2153{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002154 enum XML_Error result = doContent(parser, 0, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002155 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002156 if (result == XML_ERROR_NONE) {
2157 if (!storeRawNames(parser))
2158 return XML_ERROR_NO_MEMORY;
2159 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002160 return result;
2161}
2162
2163static enum XML_Error PTRCALL
2164externalEntityInitProcessor(XML_Parser parser,
2165 const char *start,
2166 const char *end,
2167 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002168{
2169 enum XML_Error result = initializeEncoding(parser);
2170 if (result != XML_ERROR_NONE)
2171 return result;
2172 processor = externalEntityInitProcessor2;
2173 return externalEntityInitProcessor2(parser, start, end, endPtr);
2174}
2175
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002176static enum XML_Error PTRCALL
2177externalEntityInitProcessor2(XML_Parser parser,
2178 const char *start,
2179 const char *end,
2180 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002181{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002182 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002183 int tok = XmlContentTok(encoding, start, end, &next);
2184 switch (tok) {
2185 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002186 /* If we are at the end of the buffer, this would cause the next stage,
2187 i.e. externalEntityInitProcessor3, to pass control directly to
2188 doContent (by detecting XML_TOK_NONE) without processing any xml text
2189 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2190 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002191 if (next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002192 *endPtr = next;
2193 return XML_ERROR_NONE;
2194 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002195 start = next;
2196 break;
2197 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002198 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002199 *endPtr = start;
2200 return XML_ERROR_NONE;
2201 }
2202 eventPtr = start;
2203 return XML_ERROR_UNCLOSED_TOKEN;
2204 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002205 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002206 *endPtr = start;
2207 return XML_ERROR_NONE;
2208 }
2209 eventPtr = start;
2210 return XML_ERROR_PARTIAL_CHAR;
2211 }
2212 processor = externalEntityInitProcessor3;
2213 return externalEntityInitProcessor3(parser, start, end, endPtr);
2214}
2215
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002216static enum XML_Error PTRCALL
2217externalEntityInitProcessor3(XML_Parser parser,
2218 const char *start,
2219 const char *end,
2220 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002221{
Fred Drake31d485c2004-08-03 07:06:22 +00002222 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002223 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Fred Drake31d485c2004-08-03 07:06:22 +00002224 eventPtr = start;
2225 tok = XmlContentTok(encoding, start, end, &next);
2226 eventEndPtr = next;
2227
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002228 switch (tok) {
2229 case XML_TOK_XML_DECL:
2230 {
Fred Drake31d485c2004-08-03 07:06:22 +00002231 enum XML_Error result;
2232 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002233 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002234 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002235 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002236 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002237 *endPtr = next;
2238 return XML_ERROR_NONE;
2239 case XML_FINISHED:
2240 return XML_ERROR_ABORTED;
2241 default:
2242 start = next;
2243 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002244 }
2245 break;
2246 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002247 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002248 *endPtr = start;
2249 return XML_ERROR_NONE;
2250 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002251 return XML_ERROR_UNCLOSED_TOKEN;
2252 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002253 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002254 *endPtr = start;
2255 return XML_ERROR_NONE;
2256 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002257 return XML_ERROR_PARTIAL_CHAR;
2258 }
2259 processor = externalEntityContentProcessor;
2260 tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002261 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002262}
2263
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002264static enum XML_Error PTRCALL
2265externalEntityContentProcessor(XML_Parser parser,
2266 const char *start,
2267 const char *end,
2268 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002269{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002270 enum XML_Error result = doContent(parser, 1, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002271 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002272 if (result == XML_ERROR_NONE) {
2273 if (!storeRawNames(parser))
2274 return XML_ERROR_NO_MEMORY;
2275 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002276 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002277}
2278
2279static enum XML_Error
2280doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002281 int startTagLevel,
2282 const ENCODING *enc,
2283 const char *s,
2284 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002285 const char **nextPtr,
2286 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002287{
Fred Drake31d485c2004-08-03 07:06:22 +00002288 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002289 DTD * const dtd = _dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002290
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002291 const char **eventPP;
2292 const char **eventEndPP;
2293 if (enc == encoding) {
2294 eventPP = &eventPtr;
2295 eventEndPP = &eventEndPtr;
2296 }
2297 else {
2298 eventPP = &(openInternalEntities->internalEventPtr);
2299 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2300 }
2301 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002302
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002303 for (;;) {
2304 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2305 int tok = XmlContentTok(enc, s, end, &next);
2306 *eventEndPP = next;
2307 switch (tok) {
2308 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002309 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002310 *nextPtr = s;
2311 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002312 }
2313 *eventEndPP = end;
2314 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002315 XML_Char c = 0xA;
2316 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002317 }
2318 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002319 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002320 /* We are at the end of the final buffer, should we check for
2321 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002322 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002323 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002324 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002325 if (tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002326 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002327 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002328 return XML_ERROR_NONE;
2329 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002330 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002331 *nextPtr = s;
2332 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002333 }
2334 if (startTagLevel > 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002335 if (tagLevel != startTagLevel)
2336 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002337 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002338 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002339 }
2340 return XML_ERROR_NO_ELEMENTS;
2341 case XML_TOK_INVALID:
2342 *eventPP = next;
2343 return XML_ERROR_INVALID_TOKEN;
2344 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002345 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002346 *nextPtr = s;
2347 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002348 }
2349 return XML_ERROR_UNCLOSED_TOKEN;
2350 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002351 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002352 *nextPtr = s;
2353 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002354 }
2355 return XML_ERROR_PARTIAL_CHAR;
2356 case XML_TOK_ENTITY_REF:
2357 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002358 const XML_Char *name;
2359 ENTITY *entity;
2360 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2361 s + enc->minBytesPerChar,
2362 next - enc->minBytesPerChar);
2363 if (ch) {
2364 if (characterDataHandler)
2365 characterDataHandler(handlerArg, &ch, 1);
2366 else if (defaultHandler)
2367 reportDefault(parser, enc, s, next);
2368 break;
2369 }
2370 name = poolStoreString(&dtd->pool, enc,
2371 s + enc->minBytesPerChar,
2372 next - enc->minBytesPerChar);
2373 if (!name)
2374 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002375 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002376 poolDiscard(&dtd->pool);
2377 /* First, determine if a check for an existing declaration is needed;
2378 if yes, check that the entity exists, and that it is internal,
2379 otherwise call the skipped entity or default handler.
2380 */
2381 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2382 if (!entity)
2383 return XML_ERROR_UNDEFINED_ENTITY;
2384 else if (!entity->is_internal)
2385 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2386 }
2387 else if (!entity) {
2388 if (skippedEntityHandler)
2389 skippedEntityHandler(handlerArg, name, 0);
2390 else if (defaultHandler)
2391 reportDefault(parser, enc, s, next);
2392 break;
2393 }
2394 if (entity->open)
2395 return XML_ERROR_RECURSIVE_ENTITY_REF;
2396 if (entity->notation)
2397 return XML_ERROR_BINARY_ENTITY_REF;
2398 if (entity->textPtr) {
2399 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002400 if (!defaultExpandInternalEntities) {
2401 if (skippedEntityHandler)
2402 skippedEntityHandler(handlerArg, entity->name, 0);
2403 else if (defaultHandler)
2404 reportDefault(parser, enc, s, next);
2405 break;
2406 }
Fred Drake31d485c2004-08-03 07:06:22 +00002407 result = processInternalEntity(parser, entity, XML_FALSE);
2408 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002409 return result;
2410 }
2411 else if (externalEntityRefHandler) {
2412 const XML_Char *context;
2413 entity->open = XML_TRUE;
2414 context = getContext(parser);
2415 entity->open = XML_FALSE;
2416 if (!context)
2417 return XML_ERROR_NO_MEMORY;
Fred Drake31d485c2004-08-03 07:06:22 +00002418 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002419 context,
2420 entity->base,
2421 entity->systemId,
2422 entity->publicId))
2423 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2424 poolDiscard(&tempPool);
2425 }
2426 else if (defaultHandler)
2427 reportDefault(parser, enc, s, next);
2428 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002429 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002430 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002431 /* fall through */
2432 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002433 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002434 TAG *tag;
2435 enum XML_Error result;
2436 XML_Char *toPtr;
2437 if (freeTagList) {
2438 tag = freeTagList;
2439 freeTagList = freeTagList->parent;
2440 }
2441 else {
2442 tag = (TAG *)MALLOC(sizeof(TAG));
2443 if (!tag)
2444 return XML_ERROR_NO_MEMORY;
2445 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2446 if (!tag->buf) {
2447 FREE(tag);
2448 return XML_ERROR_NO_MEMORY;
2449 }
2450 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2451 }
2452 tag->bindings = NULL;
2453 tag->parent = tagStack;
2454 tagStack = tag;
2455 tag->name.localPart = NULL;
2456 tag->name.prefix = NULL;
2457 tag->rawName = s + enc->minBytesPerChar;
2458 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2459 ++tagLevel;
2460 {
2461 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2462 const char *fromPtr = tag->rawName;
2463 toPtr = (XML_Char *)tag->buf;
2464 for (;;) {
2465 int bufSize;
2466 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002467 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002468 &fromPtr, rawNameEnd,
2469 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002470 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner23ec4b52017-06-15 00:54:36 +02002471 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002472 tag->name.strLen = convLen;
2473 break;
2474 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002475 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002476 {
2477 char *temp = (char *)REALLOC(tag->buf, bufSize);
2478 if (temp == NULL)
2479 return XML_ERROR_NO_MEMORY;
2480 tag->buf = temp;
2481 tag->bufEnd = temp + bufSize;
2482 toPtr = (XML_Char *)temp + convLen;
2483 }
2484 }
2485 }
2486 tag->name.str = (XML_Char *)tag->buf;
2487 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002488 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2489 if (result)
2490 return result;
2491 if (startElementHandler)
2492 startElementHandler(handlerArg, tag->name.str,
2493 (const XML_Char **)atts);
2494 else if (defaultHandler)
2495 reportDefault(parser, enc, s, next);
2496 poolClear(&tempPool);
2497 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002498 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002499 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002500 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002501 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2502 {
2503 const char *rawName = s + enc->minBytesPerChar;
2504 enum XML_Error result;
2505 BINDING *bindings = NULL;
2506 XML_Bool noElmHandlers = XML_TRUE;
2507 TAG_NAME name;
2508 name.str = poolStoreString(&tempPool, enc, rawName,
2509 rawName + XmlNameLength(enc, rawName));
2510 if (!name.str)
2511 return XML_ERROR_NO_MEMORY;
2512 poolFinish(&tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002513 result = storeAtts(parser, enc, s, &name, &bindings);
2514 if (result)
2515 return result;
2516 poolFinish(&tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002517 if (startElementHandler) {
2518 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2519 noElmHandlers = XML_FALSE;
2520 }
2521 if (endElementHandler) {
2522 if (startElementHandler)
2523 *eventPP = *eventEndPP;
2524 endElementHandler(handlerArg, name.str);
2525 noElmHandlers = XML_FALSE;
2526 }
2527 if (noElmHandlers && defaultHandler)
2528 reportDefault(parser, enc, s, next);
2529 poolClear(&tempPool);
2530 while (bindings) {
2531 BINDING *b = bindings;
2532 if (endNamespaceDeclHandler)
2533 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2534 bindings = bindings->nextTagBinding;
2535 b->nextTagBinding = freeBindingList;
2536 freeBindingList = b;
2537 b->prefix->binding = b->prevPrefixBinding;
2538 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002539 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002540 if (tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002541 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002542 break;
2543 case XML_TOK_END_TAG:
2544 if (tagLevel == startTagLevel)
2545 return XML_ERROR_ASYNC_ENTITY;
2546 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002547 int len;
2548 const char *rawName;
2549 TAG *tag = tagStack;
2550 tagStack = tag->parent;
2551 tag->parent = freeTagList;
2552 freeTagList = tag;
2553 rawName = s + enc->minBytesPerChar*2;
2554 len = XmlNameLength(enc, rawName);
2555 if (len != tag->rawNameLength
2556 || memcmp(tag->rawName, rawName, len) != 0) {
2557 *eventPP = rawName;
2558 return XML_ERROR_TAG_MISMATCH;
2559 }
2560 --tagLevel;
2561 if (endElementHandler) {
2562 const XML_Char *localPart;
2563 const XML_Char *prefix;
2564 XML_Char *uri;
2565 localPart = tag->name.localPart;
2566 if (ns && localPart) {
2567 /* localPart and prefix may have been overwritten in
2568 tag->name.str, since this points to the binding->uri
2569 buffer which gets re-used; so we have to add them again
2570 */
2571 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2572 /* don't need to check for space - already done in storeAtts() */
2573 while (*localPart) *uri++ = *localPart++;
2574 prefix = (XML_Char *)tag->name.prefix;
2575 if (ns_triplets && prefix) {
2576 *uri++ = namespaceSeparator;
2577 while (*prefix) *uri++ = *prefix++;
2578 }
2579 *uri = XML_T('\0');
2580 }
2581 endElementHandler(handlerArg, tag->name.str);
2582 }
2583 else if (defaultHandler)
2584 reportDefault(parser, enc, s, next);
2585 while (tag->bindings) {
2586 BINDING *b = tag->bindings;
2587 if (endNamespaceDeclHandler)
2588 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2589 tag->bindings = tag->bindings->nextTagBinding;
2590 b->nextTagBinding = freeBindingList;
2591 freeBindingList = b;
2592 b->prefix->binding = b->prevPrefixBinding;
2593 }
2594 if (tagLevel == 0)
2595 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002596 }
2597 break;
2598 case XML_TOK_CHAR_REF:
2599 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002600 int n = XmlCharRefNumber(enc, s);
2601 if (n < 0)
2602 return XML_ERROR_BAD_CHAR_REF;
2603 if (characterDataHandler) {
2604 XML_Char buf[XML_ENCODE_MAX];
2605 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2606 }
2607 else if (defaultHandler)
2608 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002609 }
2610 break;
2611 case XML_TOK_XML_DECL:
2612 return XML_ERROR_MISPLACED_XML_PI;
2613 case XML_TOK_DATA_NEWLINE:
2614 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002615 XML_Char c = 0xA;
2616 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002617 }
2618 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002619 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002620 break;
2621 case XML_TOK_CDATA_SECT_OPEN:
2622 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002623 enum XML_Error result;
2624 if (startCdataSectionHandler)
2625 startCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002626#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002627 /* Suppose you doing a transformation on a document that involves
2628 changing only the character data. You set up a defaultHandler
2629 and a characterDataHandler. The defaultHandler simply copies
2630 characters through. The characterDataHandler does the
2631 transformation and writes the characters out escaping them as
2632 necessary. This case will fail to work if we leave out the
2633 following two lines (because & and < inside CDATA sections will
2634 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002635
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002636 However, now we have a start/endCdataSectionHandler, so it seems
2637 easier to let the user deal with this.
2638 */
2639 else if (characterDataHandler)
2640 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002641#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002642 else if (defaultHandler)
2643 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00002644 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2645 if (result != XML_ERROR_NONE)
2646 return result;
2647 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002648 processor = cdataSectionProcessor;
2649 return result;
2650 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002651 }
2652 break;
2653 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00002654 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002655 *nextPtr = s;
2656 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002657 }
2658 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002659 if (MUST_CONVERT(enc, s)) {
2660 ICHAR *dataPtr = (ICHAR *)dataBuf;
2661 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2662 characterDataHandler(handlerArg, dataBuf,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002663 (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002664 }
2665 else
2666 characterDataHandler(handlerArg,
2667 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002668 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002669 }
2670 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002671 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002672 /* We are at the end of the final buffer, should we check for
2673 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002674 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002675 if (startTagLevel == 0) {
2676 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002677 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002678 }
2679 if (tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002680 *eventPP = end;
2681 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002682 }
Fred Drake31d485c2004-08-03 07:06:22 +00002683 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002684 return XML_ERROR_NONE;
2685 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002686 {
2687 XML_CharacterDataHandler charDataHandler = characterDataHandler;
2688 if (charDataHandler) {
2689 if (MUST_CONVERT(enc, s)) {
2690 for (;;) {
2691 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002692 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002693 *eventEndPP = s;
2694 charDataHandler(handlerArg, dataBuf,
2695 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02002696 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002697 break;
2698 *eventPP = s;
2699 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002700 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002701 else
2702 charDataHandler(handlerArg,
2703 (XML_Char *)s,
2704 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002705 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002706 else if (defaultHandler)
2707 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002708 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002709 break;
2710 case XML_TOK_PI:
2711 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002712 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002713 break;
2714 case XML_TOK_COMMENT:
2715 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002716 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002717 break;
2718 default:
2719 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002720 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002721 break;
2722 }
2723 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002724 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002725 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002726 *nextPtr = next;
2727 return XML_ERROR_NONE;
2728 case XML_FINISHED:
2729 return XML_ERROR_ABORTED;
2730 default: ;
2731 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002732 }
2733 /* not reached */
2734}
2735
Fred Drake4faea012003-01-28 06:42:40 +00002736/* Precondition: all arguments must be non-NULL;
2737 Purpose:
2738 - normalize attributes
2739 - check attributes for well-formedness
2740 - generate namespace aware attribute names (URI, prefix)
2741 - build list of attributes for startElementHandler
2742 - default attributes
2743 - process namespace declarations (check and report them)
2744 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002745*/
2746static enum XML_Error
2747storeAtts(XML_Parser parser, const ENCODING *enc,
2748 const char *attStr, TAG_NAME *tagNamePtr,
2749 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002750{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002751 DTD * const dtd = _dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00002752 ELEMENT_TYPE *elementType;
2753 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002754 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002755 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002756 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002757 int i;
2758 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002759 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002760 int nPrefixes = 0;
2761 BINDING *binding;
2762 const XML_Char *localPart;
2763
2764 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002765 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00002766 if (!elementType) {
2767 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
2768 if (!name)
2769 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002770 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00002771 sizeof(ELEMENT_TYPE));
2772 if (!elementType)
2773 return XML_ERROR_NO_MEMORY;
2774 if (ns && !setElementTypePrefix(parser, elementType))
2775 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002776 }
Fred Drake4faea012003-01-28 06:42:40 +00002777 nDefaultAtts = elementType->nDefaultAtts;
2778
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002779 /* get the attributes from the tokenizer */
2780 n = XmlGetAttributes(enc, attStr, attsSize, atts);
2781 if (n + nDefaultAtts > attsSize) {
2782 int oldAttsSize = attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002783 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002784#ifdef XML_ATTR_INFO
2785 XML_AttrInfo *temp2;
2786#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002787 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002788 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
2789 if (temp == NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002790 return XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002791 atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002792#ifdef XML_ATTR_INFO
2793 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
2794 if (temp2 == NULL)
2795 return XML_ERROR_NO_MEMORY;
2796 attInfo = temp2;
2797#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002798 if (n > oldAttsSize)
2799 XmlGetAttributes(enc, attStr, n, atts);
2800 }
Fred Drake4faea012003-01-28 06:42:40 +00002801
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002802 appAtts = (const XML_Char **)atts;
2803 for (i = 0; i < n; i++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002804 ATTRIBUTE *currAtt = &atts[i];
2805#ifdef XML_ATTR_INFO
2806 XML_AttrInfo *currAttInfo = &attInfo[i];
2807#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002808 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002809 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
2810 currAtt->name
2811 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002812 if (!attId)
2813 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002814#ifdef XML_ATTR_INFO
2815 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
2816 currAttInfo->nameEnd = currAttInfo->nameStart +
2817 XmlNameLength(enc, currAtt->name);
2818 currAttInfo->valueStart = parseEndByteIndex -
2819 (parseEndPtr - currAtt->valuePtr);
2820 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
2821#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002822 /* Detect duplicate attributes by their QNames. This does not work when
2823 namespace processing is turned on and different prefixes for the same
2824 namespace are used. For this case we have a check further down.
2825 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002826 if ((attId->name)[-1]) {
2827 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002828 eventPtr = atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002829 return XML_ERROR_DUPLICATE_ATTRIBUTE;
2830 }
2831 (attId->name)[-1] = 1;
2832 appAtts[attIndex++] = attId->name;
2833 if (!atts[i].normalized) {
2834 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002835 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002836
2837 /* figure out whether declared as other than CDATA */
2838 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002839 int j;
2840 for (j = 0; j < nDefaultAtts; j++) {
2841 if (attId == elementType->defaultAtts[j].id) {
2842 isCdata = elementType->defaultAtts[j].isCdata;
2843 break;
2844 }
2845 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002846 }
2847
2848 /* normalize the attribute value */
2849 result = storeAttributeValue(parser, enc, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002850 atts[i].valuePtr, atts[i].valueEnd,
2851 &tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002852 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002853 return result;
Fred Drake4faea012003-01-28 06:42:40 +00002854 appAtts[attIndex] = poolStart(&tempPool);
2855 poolFinish(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002856 }
Fred Drake4faea012003-01-28 06:42:40 +00002857 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002858 /* the value did not need normalizing */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002859 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
2860 atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002861 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002862 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002863 poolFinish(&tempPool);
2864 }
2865 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00002866 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002867 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002868 /* deal with namespace declarations here */
2869 enum XML_Error result = addBinding(parser, attId->prefix, attId,
2870 appAtts[attIndex], bindingsPtr);
2871 if (result)
2872 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002873 --attIndex;
2874 }
2875 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002876 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002877 attIndex++;
2878 nPrefixes++;
2879 (attId->name)[-1] = 2;
2880 }
2881 }
2882 else
2883 attIndex++;
2884 }
Fred Drake4faea012003-01-28 06:42:40 +00002885
2886 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
2887 nSpecifiedAtts = attIndex;
2888 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
2889 for (i = 0; i < attIndex; i += 2)
2890 if (appAtts[i] == elementType->idAtt->name) {
2891 idAttIndex = i;
2892 break;
2893 }
2894 }
2895 else
2896 idAttIndex = -1;
2897
2898 /* do attribute defaulting */
2899 for (i = 0; i < nDefaultAtts; i++) {
2900 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
2901 if (!(da->id->name)[-1] && da->value) {
2902 if (da->id->prefix) {
2903 if (da->id->xmlns) {
2904 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
2905 da->value, bindingsPtr);
2906 if (result)
2907 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002908 }
2909 else {
Fred Drake4faea012003-01-28 06:42:40 +00002910 (da->id->name)[-1] = 2;
2911 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002912 appAtts[attIndex++] = da->id->name;
2913 appAtts[attIndex++] = da->value;
2914 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002915 }
Fred Drake4faea012003-01-28 06:42:40 +00002916 else {
2917 (da->id->name)[-1] = 1;
2918 appAtts[attIndex++] = da->id->name;
2919 appAtts[attIndex++] = da->value;
2920 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002921 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002922 }
Fred Drake4faea012003-01-28 06:42:40 +00002923 appAtts[attIndex] = 0;
2924
Fred Drake08317ae2003-10-21 15:38:55 +00002925 /* expand prefixed attribute names, check for duplicates,
2926 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002927 i = 0;
2928 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00002929 int j; /* hash table index */
2930 unsigned long version = nsAttsVersion;
2931 int nsAttsSize = (int)1 << nsAttsPower;
2932 /* size of hash table must be at least 2 * (# of prefixed attributes) */
2933 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
2934 NS_ATT *temp;
2935 /* hash table size must also be a power of 2 and >= 8 */
2936 while (nPrefixes >> nsAttsPower++);
2937 if (nsAttsPower < 3)
2938 nsAttsPower = 3;
2939 nsAttsSize = (int)1 << nsAttsPower;
2940 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
2941 if (!temp)
2942 return XML_ERROR_NO_MEMORY;
2943 nsAtts = temp;
2944 version = 0; /* force re-initialization of nsAtts hash table */
2945 }
2946 /* using a version flag saves us from initializing nsAtts every time */
2947 if (!version) { /* initialize version flags when version wraps around */
2948 version = INIT_ATTS_VERSION;
2949 for (j = nsAttsSize; j != 0; )
2950 nsAtts[--j].version = version;
2951 }
2952 nsAttsVersion = --version;
2953
2954 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002955 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00002956 const XML_Char *s = appAtts[i];
2957 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002958 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00002959 const BINDING *b;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002960 unsigned long uriHash = hash_secret_salt;
Fred Drake08317ae2003-10-21 15:38:55 +00002961 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002962 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002963 if (!id || !id->prefix)
2964 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00002965 b = id->prefix->binding;
2966 if (!b)
2967 return XML_ERROR_UNBOUND_PREFIX;
2968
2969 /* as we expand the name we also calculate its hash value */
2970 for (j = 0; j < b->uriLen; j++) {
2971 const XML_Char c = b->uri[j];
2972 if (!poolAppendChar(&tempPool, c))
2973 return XML_ERROR_NO_MEMORY;
2974 uriHash = CHAR_HASH(uriHash, c);
2975 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002976 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00002977 ;
2978 do { /* copies null terminator */
2979 const XML_Char c = *s;
2980 if (!poolAppendChar(&tempPool, *s))
2981 return XML_ERROR_NO_MEMORY;
2982 uriHash = CHAR_HASH(uriHash, c);
2983 } while (*s++);
2984
2985 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002986 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00002987 */
2988 unsigned char step = 0;
2989 unsigned long mask = nsAttsSize - 1;
2990 j = uriHash & mask; /* index into hash table */
2991 while (nsAtts[j].version == version) {
2992 /* for speed we compare stored hash values first */
2993 if (uriHash == nsAtts[j].hash) {
2994 const XML_Char *s1 = poolStart(&tempPool);
2995 const XML_Char *s2 = nsAtts[j].uriName;
2996 /* s1 is null terminated, but not s2 */
2997 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
2998 if (*s1 == 0)
2999 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3000 }
3001 if (!step)
3002 step = PROBE_STEP(uriHash, mask, nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003003 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003004 }
Fred Drake08317ae2003-10-21 15:38:55 +00003005 }
3006
3007 if (ns_triplets) { /* append namespace separator and prefix */
3008 tempPool.ptr[-1] = namespaceSeparator;
3009 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003010 do {
3011 if (!poolAppendChar(&tempPool, *s))
3012 return XML_ERROR_NO_MEMORY;
3013 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003014 }
Fred Drake08317ae2003-10-21 15:38:55 +00003015
3016 /* store expanded name in attribute list */
3017 s = poolStart(&tempPool);
3018 poolFinish(&tempPool);
3019 appAtts[i] = s;
3020
3021 /* fill empty slot with new version, uriName and hash value */
3022 nsAtts[j].version = version;
3023 nsAtts[j].hash = uriHash;
3024 nsAtts[j].uriName = s;
3025
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003026 if (!--nPrefixes) {
3027 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003028 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003029 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003030 }
Fred Drake08317ae2003-10-21 15:38:55 +00003031 else /* not prefixed */
3032 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003033 }
3034 }
Fred Drake08317ae2003-10-21 15:38:55 +00003035 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003036 for (; i < attIndex; i += 2)
3037 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003038 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3039 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003040
Fred Drake08317ae2003-10-21 15:38:55 +00003041 if (!ns)
3042 return XML_ERROR_NONE;
3043
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003044 /* expand the element type name */
3045 if (elementType->prefix) {
3046 binding = elementType->prefix->binding;
3047 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003048 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003049 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003050 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003051 ;
3052 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003053 else if (dtd->defaultPrefix.binding) {
3054 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003055 localPart = tagNamePtr->str;
3056 }
3057 else
3058 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003059 prefixLen = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00003060 if (ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003061 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003062 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003063 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003064 tagNamePtr->localPart = localPart;
3065 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003066 tagNamePtr->prefix = binding->prefix->name;
3067 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003068 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003069 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003070 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003071 if (n > binding->uriAlloc) {
3072 TAG *p;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003073 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003074 if (!uri)
3075 return XML_ERROR_NO_MEMORY;
3076 binding->uriAlloc = n + EXPAND_SPARE;
3077 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3078 for (p = tagStack; p; p = p->parent)
3079 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003080 p->name.str = uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003081 FREE(binding->uri);
3082 binding->uri = uri;
3083 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003084 /* if namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003085 uri = binding->uri + binding->uriLen;
3086 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003087 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003088 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003089 uri += i - 1;
3090 *uri = namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003091 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3092 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003093 tagNamePtr->str = binding->uri;
3094 return XML_ERROR_NONE;
3095}
3096
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003097/* addBinding() overwrites the value of prefix->binding without checking.
3098 Therefore one must keep track of the old value outside of addBinding().
3099*/
3100static enum XML_Error
3101addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3102 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003103{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003104 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003105 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3106 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3107 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3108 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3109 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3110 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003111 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003112 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003113 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3114 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003115 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3116 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3117 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3118 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3119 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003120 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003121 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003122 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3123
3124 XML_Bool mustBeXML = XML_FALSE;
3125 XML_Bool isXML = XML_TRUE;
3126 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003127
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003128 BINDING *b;
3129 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003130
Fred Drake31d485c2004-08-03 07:06:22 +00003131 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003132 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003133 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003134
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003135 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003136 && prefix->name[0] == XML_T(ASCII_x)
3137 && prefix->name[1] == XML_T(ASCII_m)
3138 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003139
3140 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003141 if (prefix->name[3] == XML_T(ASCII_n)
3142 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003143 && prefix->name[5] == XML_T('\0'))
3144 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3145
3146 if (prefix->name[3] == XML_T('\0'))
3147 mustBeXML = XML_TRUE;
3148 }
3149
3150 for (len = 0; uri[len]; len++) {
3151 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3152 isXML = XML_FALSE;
3153
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003154 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003155 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3156 isXMLNS = XML_FALSE;
3157 }
3158 isXML = isXML && len == xmlLen;
3159 isXMLNS = isXMLNS && len == xmlnsLen;
3160
3161 if (mustBeXML != isXML)
3162 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3163 : XML_ERROR_RESERVED_NAMESPACE_URI;
3164
3165 if (isXMLNS)
3166 return XML_ERROR_RESERVED_NAMESPACE_URI;
3167
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003168 if (namespaceSeparator)
3169 len++;
3170 if (freeBindingList) {
3171 b = freeBindingList;
3172 if (len > b->uriAlloc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003173 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3174 sizeof(XML_Char) * (len + EXPAND_SPARE));
3175 if (temp == NULL)
3176 return XML_ERROR_NO_MEMORY;
3177 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003178 b->uriAlloc = len + EXPAND_SPARE;
3179 }
3180 freeBindingList = b->nextTagBinding;
3181 }
3182 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003183 b = (BINDING *)MALLOC(sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003184 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003185 return XML_ERROR_NO_MEMORY;
3186 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003187 if (!b->uri) {
3188 FREE(b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003189 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003190 }
3191 b->uriAlloc = len + EXPAND_SPARE;
3192 }
3193 b->uriLen = len;
3194 memcpy(b->uri, uri, len * sizeof(XML_Char));
3195 if (namespaceSeparator)
3196 b->uri[len - 1] = namespaceSeparator;
3197 b->prefix = prefix;
3198 b->attId = attId;
3199 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003200 /* NULL binding when default namespace undeclared */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003201 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3202 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003203 else
3204 prefix->binding = b;
3205 b->nextTagBinding = *bindingsPtr;
3206 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003207 /* if attId == NULL then we are not starting a namespace scope */
3208 if (attId && startNamespaceDeclHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003209 startNamespaceDeclHandler(handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003210 prefix->binding ? uri : 0);
3211 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003212}
3213
3214/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003215 the whole file is parsed with one call.
3216*/
3217static enum XML_Error PTRCALL
3218cdataSectionProcessor(XML_Parser parser,
3219 const char *start,
3220 const char *end,
3221 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003222{
Fred Drake31d485c2004-08-03 07:06:22 +00003223 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003224 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003225 if (result != XML_ERROR_NONE)
3226 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003227 if (start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003228 if (parentParser) { /* we are parsing an external entity */
3229 processor = externalEntityContentProcessor;
3230 return externalEntityContentProcessor(parser, start, end, endPtr);
3231 }
3232 else {
3233 processor = contentProcessor;
3234 return contentProcessor(parser, start, end, endPtr);
3235 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003236 }
3237 return result;
3238}
3239
Fred Drake31d485c2004-08-03 07:06:22 +00003240/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003241 the section is not yet closed.
3242*/
3243static enum XML_Error
3244doCdataSection(XML_Parser parser,
3245 const ENCODING *enc,
3246 const char **startPtr,
3247 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003248 const char **nextPtr,
3249 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003250{
3251 const char *s = *startPtr;
3252 const char **eventPP;
3253 const char **eventEndPP;
3254 if (enc == encoding) {
3255 eventPP = &eventPtr;
3256 *eventPP = s;
3257 eventEndPP = &eventEndPtr;
3258 }
3259 else {
3260 eventPP = &(openInternalEntities->internalEventPtr);
3261 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3262 }
3263 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003264 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003265
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003266 for (;;) {
3267 const char *next;
3268 int tok = XmlCdataSectionTok(enc, s, end, &next);
3269 *eventEndPP = next;
3270 switch (tok) {
3271 case XML_TOK_CDATA_SECT_CLOSE:
3272 if (endCdataSectionHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003273 endCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003274#if 0
3275 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3276 else if (characterDataHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003277 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003278#endif
3279 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003280 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003281 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003282 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003283 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003284 return XML_ERROR_ABORTED;
3285 else
3286 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003287 case XML_TOK_DATA_NEWLINE:
3288 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003289 XML_Char c = 0xA;
3290 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003291 }
3292 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003293 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003294 break;
3295 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003296 {
3297 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3298 if (charDataHandler) {
3299 if (MUST_CONVERT(enc, s)) {
3300 for (;;) {
3301 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02003302 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003303 *eventEndPP = next;
3304 charDataHandler(handlerArg, dataBuf,
3305 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003306 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003307 break;
3308 *eventPP = s;
3309 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003310 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003311 else
3312 charDataHandler(handlerArg,
3313 (XML_Char *)s,
3314 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003315 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003316 else if (defaultHandler)
3317 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003318 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003319 break;
3320 case XML_TOK_INVALID:
3321 *eventPP = next;
3322 return XML_ERROR_INVALID_TOKEN;
3323 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003324 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003325 *nextPtr = s;
3326 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003327 }
3328 return XML_ERROR_PARTIAL_CHAR;
3329 case XML_TOK_PARTIAL:
3330 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003331 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003332 *nextPtr = s;
3333 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003334 }
3335 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3336 default:
3337 *eventPP = next;
3338 return XML_ERROR_UNEXPECTED_STATE;
3339 }
Fred Drake31d485c2004-08-03 07:06:22 +00003340
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003341 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003342 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003343 case XML_SUSPENDED:
3344 *nextPtr = next;
3345 return XML_ERROR_NONE;
3346 case XML_FINISHED:
3347 return XML_ERROR_ABORTED;
3348 default: ;
3349 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003350 }
3351 /* not reached */
3352}
3353
3354#ifdef XML_DTD
3355
3356/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003357 the whole file is parsed with one call.
3358*/
3359static enum XML_Error PTRCALL
3360ignoreSectionProcessor(XML_Parser parser,
3361 const char *start,
3362 const char *end,
3363 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003364{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003365 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003366 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003367 if (result != XML_ERROR_NONE)
3368 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003369 if (start) {
3370 processor = prologProcessor;
3371 return prologProcessor(parser, start, end, endPtr);
3372 }
3373 return result;
3374}
3375
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003376/* startPtr gets set to non-null is the section is closed, and to null
3377 if the section is not yet closed.
3378*/
3379static enum XML_Error
3380doIgnoreSection(XML_Parser parser,
3381 const ENCODING *enc,
3382 const char **startPtr,
3383 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003384 const char **nextPtr,
3385 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003386{
3387 const char *next;
3388 int tok;
3389 const char *s = *startPtr;
3390 const char **eventPP;
3391 const char **eventEndPP;
3392 if (enc == encoding) {
3393 eventPP = &eventPtr;
3394 *eventPP = s;
3395 eventEndPP = &eventEndPtr;
3396 }
3397 else {
3398 eventPP = &(openInternalEntities->internalEventPtr);
3399 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3400 }
3401 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003402 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003403 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3404 *eventEndPP = next;
3405 switch (tok) {
3406 case XML_TOK_IGNORE_SECT:
3407 if (defaultHandler)
3408 reportDefault(parser, enc, s, next);
3409 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003410 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003411 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003412 return XML_ERROR_ABORTED;
3413 else
3414 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003415 case XML_TOK_INVALID:
3416 *eventPP = next;
3417 return XML_ERROR_INVALID_TOKEN;
3418 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003419 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003420 *nextPtr = s;
3421 return XML_ERROR_NONE;
3422 }
3423 return XML_ERROR_PARTIAL_CHAR;
3424 case XML_TOK_PARTIAL:
3425 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003426 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003427 *nextPtr = s;
3428 return XML_ERROR_NONE;
3429 }
3430 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3431 default:
3432 *eventPP = next;
3433 return XML_ERROR_UNEXPECTED_STATE;
3434 }
3435 /* not reached */
3436}
3437
3438#endif /* XML_DTD */
3439
3440static enum XML_Error
3441initializeEncoding(XML_Parser parser)
3442{
3443 const char *s;
3444#ifdef XML_UNICODE
3445 char encodingBuf[128];
3446 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003447 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003448 else {
3449 int i;
3450 for (i = 0; protocolEncodingName[i]; i++) {
3451 if (i == sizeof(encodingBuf) - 1
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003452 || (protocolEncodingName[i] & ~0x7f) != 0) {
3453 encodingBuf[0] = '\0';
3454 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003455 }
3456 encodingBuf[i] = (char)protocolEncodingName[i];
3457 }
3458 encodingBuf[i] = '\0';
3459 s = encodingBuf;
3460 }
3461#else
3462 s = protocolEncodingName;
3463#endif
3464 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3465 return XML_ERROR_NONE;
3466 return handleUnknownEncoding(parser, protocolEncodingName);
3467}
3468
3469static enum XML_Error
3470processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003471 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003472{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003473 const char *encodingName = NULL;
3474 const XML_Char *storedEncName = NULL;
3475 const ENCODING *newEncoding = NULL;
3476 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003477 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003478 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003479 int standalone = -1;
3480 if (!(ns
3481 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003482 : XmlParseXmlDecl)(isGeneralTextEntity,
3483 encoding,
3484 s,
3485 next,
3486 &eventPtr,
3487 &version,
3488 &versionend,
3489 &encodingName,
3490 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003491 &standalone)) {
3492 if (isGeneralTextEntity)
3493 return XML_ERROR_TEXT_DECL;
3494 else
3495 return XML_ERROR_XML_DECL;
3496 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003497 if (!isGeneralTextEntity && standalone == 1) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003498 _dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003499#ifdef XML_DTD
3500 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3501 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3502#endif /* XML_DTD */
3503 }
3504 if (xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003505 if (encodingName != NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003506 storedEncName = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003507 encoding,
3508 encodingName,
3509 encodingName
3510 + XmlNameLength(encoding, encodingName));
3511 if (!storedEncName)
3512 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003513 poolFinish(&temp2Pool);
3514 }
3515 if (version) {
3516 storedversion = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003517 encoding,
3518 version,
3519 versionend - encoding->minBytesPerChar);
3520 if (!storedversion)
3521 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003522 }
3523 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
3524 }
3525 else if (defaultHandler)
3526 reportDefault(parser, encoding, s, next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003527 if (protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003528 if (newEncoding) {
3529 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003530 eventPtr = encodingName;
3531 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003532 }
3533 encoding = newEncoding;
3534 }
3535 else if (encodingName) {
3536 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003537 if (!storedEncName) {
3538 storedEncName = poolStoreString(
3539 &temp2Pool, encoding, encodingName,
3540 encodingName + XmlNameLength(encoding, encodingName));
3541 if (!storedEncName)
3542 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003543 }
3544 result = handleUnknownEncoding(parser, storedEncName);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003545 poolClear(&temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003546 if (result == XML_ERROR_UNKNOWN_ENCODING)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003547 eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003548 return result;
3549 }
3550 }
3551
3552 if (storedEncName || storedversion)
3553 poolClear(&temp2Pool);
3554
3555 return XML_ERROR_NONE;
3556}
3557
3558static enum XML_Error
3559handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
3560{
3561 if (unknownEncodingHandler) {
3562 XML_Encoding info;
3563 int i;
3564 for (i = 0; i < 256; i++)
3565 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003566 info.convert = NULL;
3567 info.data = NULL;
3568 info.release = NULL;
3569 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
3570 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003571 ENCODING *enc;
3572 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
3573 if (!unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003574 if (info.release)
3575 info.release(info.data);
3576 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003577 }
3578 enc = (ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003579 ? XmlInitUnknownEncodingNS
3580 : XmlInitUnknownEncoding)(unknownEncodingMem,
3581 info.map,
3582 info.convert,
3583 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003584 if (enc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003585 unknownEncodingData = info.data;
3586 unknownEncodingRelease = info.release;
3587 encoding = enc;
3588 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003589 }
3590 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003591 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003592 info.release(info.data);
3593 }
3594 return XML_ERROR_UNKNOWN_ENCODING;
3595}
3596
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003597static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003598prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003599 const char *s,
3600 const char *end,
3601 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003602{
3603 enum XML_Error result = initializeEncoding(parser);
3604 if (result != XML_ERROR_NONE)
3605 return result;
3606 processor = prologProcessor;
3607 return prologProcessor(parser, s, end, nextPtr);
3608}
3609
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003610#ifdef XML_DTD
3611
3612static enum XML_Error PTRCALL
3613externalParEntInitProcessor(XML_Parser parser,
3614 const char *s,
3615 const char *end,
3616 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003617{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003618 enum XML_Error result = initializeEncoding(parser);
3619 if (result != XML_ERROR_NONE)
3620 return result;
3621
3622 /* we know now that XML_Parse(Buffer) has been called,
3623 so we consider the external parameter entity read */
3624 _dtd->paramEntityRead = XML_TRUE;
3625
3626 if (prologState.inEntityValue) {
3627 processor = entityValueInitProcessor;
3628 return entityValueInitProcessor(parser, s, end, nextPtr);
3629 }
3630 else {
3631 processor = externalParEntProcessor;
3632 return externalParEntProcessor(parser, s, end, nextPtr);
3633 }
3634}
3635
3636static enum XML_Error PTRCALL
3637entityValueInitProcessor(XML_Parser parser,
3638 const char *s,
3639 const char *end,
3640 const char **nextPtr)
3641{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003642 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00003643 const char *start = s;
3644 const char *next = start;
3645 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003646
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003647 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003648 tok = XmlPrologTok(encoding, start, end, &next);
Fred Drake31d485c2004-08-03 07:06:22 +00003649 eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003650 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003651 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00003652 *nextPtr = s;
3653 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003654 }
3655 switch (tok) {
3656 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00003657 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003658 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00003659 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003660 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003661 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003662 case XML_TOK_NONE: /* start == end */
3663 default:
3664 break;
3665 }
Fred Drake31d485c2004-08-03 07:06:22 +00003666 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003667 return storeEntityValue(parser, encoding, s, end);
3668 }
3669 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00003670 enum XML_Error result;
3671 result = processXmlDecl(parser, 0, start, next);
3672 if (result != XML_ERROR_NONE)
3673 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003674 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003675 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003676 *nextPtr = next;
3677 return XML_ERROR_NONE;
3678 case XML_FINISHED:
3679 return XML_ERROR_ABORTED;
3680 default:
3681 *nextPtr = next;
3682 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003683 /* stop scanning for text declaration - we found one */
3684 processor = entityValueProcessor;
3685 return entityValueProcessor(parser, next, end, nextPtr);
3686 }
3687 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3688 return XML_TOK_NONE on the next call, which would then cause the
3689 function to exit with *nextPtr set to s - that is what we want for other
3690 tokens, but not for the BOM - we would rather like to skip it;
3691 then, when this routine is entered the next time, XmlPrologTok will
3692 return XML_TOK_INVALID, since the BOM is still in the buffer
3693 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003694 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003695 *nextPtr = next;
3696 return XML_ERROR_NONE;
3697 }
3698 start = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003699 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003700 }
3701}
3702
3703static enum XML_Error PTRCALL
3704externalParEntProcessor(XML_Parser parser,
3705 const char *s,
3706 const char *end,
3707 const char **nextPtr)
3708{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003709 const char *next = s;
3710 int tok;
3711
Fred Drake31d485c2004-08-03 07:06:22 +00003712 tok = XmlPrologTok(encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003713 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003714 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003715 *nextPtr = s;
3716 return XML_ERROR_NONE;
3717 }
3718 switch (tok) {
3719 case XML_TOK_INVALID:
3720 return XML_ERROR_INVALID_TOKEN;
3721 case XML_TOK_PARTIAL:
3722 return XML_ERROR_UNCLOSED_TOKEN;
3723 case XML_TOK_PARTIAL_CHAR:
3724 return XML_ERROR_PARTIAL_CHAR;
3725 case XML_TOK_NONE: /* start == end */
3726 default:
3727 break;
3728 }
3729 }
3730 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
3731 However, when parsing an external subset, doProlog will not accept a BOM
3732 as valid, and report a syntax error, so we have to skip the BOM
3733 */
3734 else if (tok == XML_TOK_BOM) {
3735 s = next;
3736 tok = XmlPrologTok(encoding, s, end, &next);
3737 }
3738
3739 processor = prologProcessor;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003740 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003741 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003742}
3743
3744static enum XML_Error PTRCALL
3745entityValueProcessor(XML_Parser parser,
3746 const char *s,
3747 const char *end,
3748 const char **nextPtr)
3749{
3750 const char *start = s;
3751 const char *next = s;
3752 const ENCODING *enc = encoding;
3753 int tok;
3754
3755 for (;;) {
3756 tok = XmlPrologTok(enc, start, end, &next);
3757 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003758 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003759 *nextPtr = s;
3760 return XML_ERROR_NONE;
3761 }
3762 switch (tok) {
3763 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00003764 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003765 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00003766 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003767 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003768 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003769 case XML_TOK_NONE: /* start == end */
3770 default:
3771 break;
3772 }
Fred Drake31d485c2004-08-03 07:06:22 +00003773 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003774 return storeEntityValue(parser, enc, s, end);
3775 }
3776 start = next;
3777 }
3778}
3779
3780#endif /* XML_DTD */
3781
3782static enum XML_Error PTRCALL
3783prologProcessor(XML_Parser parser,
3784 const char *s,
3785 const char *end,
3786 const char **nextPtr)
3787{
3788 const char *next = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003789 int tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003790 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003791 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003792}
3793
3794static enum XML_Error
3795doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003796 const ENCODING *enc,
3797 const char *s,
3798 const char *end,
3799 int tok,
3800 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00003801 const char **nextPtr,
3802 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003803{
3804#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003805 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003806#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003807 static const XML_Char atypeCDATA[] =
3808 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
3809 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
3810 static const XML_Char atypeIDREF[] =
3811 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
3812 static const XML_Char atypeIDREFS[] =
3813 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
3814 static const XML_Char atypeENTITY[] =
3815 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
3816 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
3817 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003818 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003819 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
3820 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
3821 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
3822 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
3823 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
3824 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
3825 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003826
Fred Drake31d485c2004-08-03 07:06:22 +00003827 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003828 DTD * const dtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003829
3830 const char **eventPP;
3831 const char **eventEndPP;
3832 enum XML_Content_Quant quant;
3833
3834 if (enc == encoding) {
3835 eventPP = &eventPtr;
3836 eventEndPP = &eventEndPtr;
3837 }
3838 else {
3839 eventPP = &(openInternalEntities->internalEventPtr);
3840 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3841 }
Fred Drake31d485c2004-08-03 07:06:22 +00003842
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003843 for (;;) {
3844 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003845 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003846 *eventPP = s;
3847 *eventEndPP = next;
3848 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00003849 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003850 *nextPtr = s;
3851 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003852 }
3853 switch (tok) {
3854 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003855 *eventPP = next;
3856 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003857 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003858 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003859 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003860 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00003861 case -XML_TOK_PROLOG_S:
3862 tok = -tok;
3863 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003864 case XML_TOK_NONE:
3865#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00003866 /* for internal PE NOT referenced between declarations */
3867 if (enc != encoding && !openInternalEntities->betweenDecl) {
3868 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003869 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00003870 }
3871 /* WFC: PE Between Declarations - must check that PE contains
3872 complete markup, not only for external PEs, but also for
3873 internal PEs if the reference occurs between declarations.
3874 */
3875 if (isParamEntity || enc != encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003876 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
3877 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00003878 return XML_ERROR_INCOMPLETE_PE;
3879 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003880 return XML_ERROR_NONE;
3881 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003882#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003883 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003884 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003885 tok = -tok;
3886 next = end;
3887 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003888 }
3889 }
3890 role = XmlTokenRole(&prologState, tok, s, next, enc);
3891 switch (role) {
3892 case XML_ROLE_XML_DECL:
3893 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003894 enum XML_Error result = processXmlDecl(parser, 0, s, next);
3895 if (result != XML_ERROR_NONE)
3896 return result;
3897 enc = encoding;
3898 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003899 }
3900 break;
3901 case XML_ROLE_DOCTYPE_NAME:
3902 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003903 doctypeName = poolStoreString(&tempPool, enc, s, next);
3904 if (!doctypeName)
3905 return XML_ERROR_NO_MEMORY;
3906 poolFinish(&tempPool);
3907 doctypePubid = NULL;
3908 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003909 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003910 doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003911 break;
3912 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
3913 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003914 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
3915 doctypePubid, 1);
3916 doctypeName = NULL;
3917 poolClear(&tempPool);
3918 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003919 }
3920 break;
3921#ifdef XML_DTD
3922 case XML_ROLE_TEXT_DECL:
3923 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003924 enum XML_Error result = processXmlDecl(parser, 1, s, next);
3925 if (result != XML_ERROR_NONE)
3926 return result;
3927 enc = encoding;
3928 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003929 }
3930 break;
3931#endif /* XML_DTD */
3932 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003933#ifdef XML_DTD
3934 useForeignDTD = XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003935 declEntity = (ENTITY *)lookup(parser,
3936 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003937 externalSubsetName,
3938 sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003939 if (!declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003940 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003941#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00003942 dtd->hasParamEntityRefs = XML_TRUE;
3943 if (startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003944 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00003945 if (!XmlIsPublicId(enc, s, next, eventPP))
3946 return XML_ERROR_PUBLICID;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003947 pubId = poolStoreString(&tempPool, enc,
3948 s + enc->minBytesPerChar,
3949 next - enc->minBytesPerChar);
3950 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00003951 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003952 normalizePublicId(pubId);
Fred Drake31d485c2004-08-03 07:06:22 +00003953 poolFinish(&tempPool);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003954 doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00003955 handleDefault = XML_FALSE;
3956 goto alreadyChecked;
3957 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003958 /* fall through */
3959 case XML_ROLE_ENTITY_PUBLIC_ID:
3960 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00003961 return XML_ERROR_PUBLICID;
3962 alreadyChecked:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003963 if (dtd->keepProcessing && declEntity) {
3964 XML_Char *tem = poolStoreString(&dtd->pool,
3965 enc,
3966 s + enc->minBytesPerChar,
3967 next - enc->minBytesPerChar);
3968 if (!tem)
3969 return XML_ERROR_NO_MEMORY;
3970 normalizePublicId(tem);
3971 declEntity->publicId = tem;
3972 poolFinish(&dtd->pool);
3973 if (entityDeclHandler)
3974 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003975 }
3976 break;
3977 case XML_ROLE_DOCTYPE_CLOSE:
3978 if (doctypeName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003979 startDoctypeDeclHandler(handlerArg, doctypeName,
3980 doctypeSysid, doctypePubid, 0);
3981 poolClear(&tempPool);
3982 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003983 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003984 /* doctypeSysid will be non-NULL in the case of a previous
3985 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
3986 was not set, indicating an external subset
3987 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003988#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003989 if (doctypeSysid || useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003990 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
3991 dtd->hasParamEntityRefs = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003992 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003993 ENTITY *entity = (ENTITY *)lookup(parser,
3994 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003995 externalSubsetName,
3996 sizeof(ENTITY));
3997 if (!entity)
3998 return XML_ERROR_NO_MEMORY;
3999 if (useForeignDTD)
4000 entity->base = curBase;
4001 dtd->paramEntityRead = XML_FALSE;
4002 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4003 0,
4004 entity->base,
4005 entity->systemId,
4006 entity->publicId))
4007 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004008 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004009 if (!dtd->standalone &&
4010 notStandaloneHandler &&
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004011 !notStandaloneHandler(handlerArg))
4012 return XML_ERROR_NOT_STANDALONE;
4013 }
4014 /* if we didn't read the foreign DTD then this means that there
4015 is no external subset and we must reset dtd->hasParamEntityRefs
4016 */
4017 else if (!doctypeSysid)
4018 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004019 /* end of DTD - no need to update dtd->keepProcessing */
4020 }
4021 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004022 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004023#endif /* XML_DTD */
4024 if (endDoctypeDeclHandler) {
4025 endDoctypeDeclHandler(handlerArg);
4026 handleDefault = XML_FALSE;
4027 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004028 break;
4029 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004030#ifdef XML_DTD
4031 /* if there is no DOCTYPE declaration then now is the
4032 last chance to read the foreign DTD
4033 */
4034 if (useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004035 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004036 dtd->hasParamEntityRefs = XML_TRUE;
4037 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004038 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004039 externalSubsetName,
4040 sizeof(ENTITY));
4041 if (!entity)
4042 return XML_ERROR_NO_MEMORY;
4043 entity->base = curBase;
4044 dtd->paramEntityRead = XML_FALSE;
4045 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4046 0,
4047 entity->base,
4048 entity->systemId,
4049 entity->publicId))
4050 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004051 if (dtd->paramEntityRead) {
4052 if (!dtd->standalone &&
4053 notStandaloneHandler &&
4054 !notStandaloneHandler(handlerArg))
4055 return XML_ERROR_NOT_STANDALONE;
4056 }
4057 /* if we didn't read the foreign DTD then this means that there
4058 is no external subset and we must reset dtd->hasParamEntityRefs
4059 */
4060 else
4061 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004062 /* end of DTD - no need to update dtd->keepProcessing */
4063 }
4064 }
4065#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004066 processor = contentProcessor;
4067 return contentProcessor(parser, s, end, nextPtr);
4068 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4069 declElementType = getElementType(parser, enc, s, next);
4070 if (!declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004071 return XML_ERROR_NO_MEMORY;
4072 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004073 case XML_ROLE_ATTRIBUTE_NAME:
4074 declAttributeId = getAttributeId(parser, enc, s, next);
4075 if (!declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004076 return XML_ERROR_NO_MEMORY;
4077 declAttributeIsCdata = XML_FALSE;
4078 declAttributeType = NULL;
4079 declAttributeIsId = XML_FALSE;
4080 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004081 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004082 declAttributeIsCdata = XML_TRUE;
4083 declAttributeType = atypeCDATA;
4084 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004085 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004086 declAttributeIsId = XML_TRUE;
4087 declAttributeType = atypeID;
4088 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004089 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004090 declAttributeType = atypeIDREF;
4091 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004092 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004093 declAttributeType = atypeIDREFS;
4094 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004095 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004096 declAttributeType = atypeENTITY;
4097 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004098 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004099 declAttributeType = atypeENTITIES;
4100 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004101 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004102 declAttributeType = atypeNMTOKEN;
4103 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004104 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004105 declAttributeType = atypeNMTOKENS;
4106 checkAttListDeclHandler:
4107 if (dtd->keepProcessing && attlistDeclHandler)
4108 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004109 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004110 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4111 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004112 if (dtd->keepProcessing && attlistDeclHandler) {
4113 const XML_Char *prefix;
4114 if (declAttributeType) {
4115 prefix = enumValueSep;
4116 }
4117 else {
4118 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4119 ? notationPrefix
4120 : enumValueStart);
4121 }
4122 if (!poolAppendString(&tempPool, prefix))
4123 return XML_ERROR_NO_MEMORY;
4124 if (!poolAppend(&tempPool, enc, s, next))
4125 return XML_ERROR_NO_MEMORY;
4126 declAttributeType = tempPool.start;
4127 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004128 }
4129 break;
4130 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4131 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004132 if (dtd->keepProcessing) {
4133 if (!defineAttribute(declElementType, declAttributeId,
Fred Drake08317ae2003-10-21 15:38:55 +00004134 declAttributeIsCdata, declAttributeIsId,
4135 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004136 return XML_ERROR_NO_MEMORY;
4137 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004138 if (*declAttributeType == XML_T(ASCII_LPAREN)
4139 || (*declAttributeType == XML_T(ASCII_N)
4140 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004141 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004142 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004143 || !poolAppendChar(&tempPool, XML_T('\0')))
4144 return XML_ERROR_NO_MEMORY;
4145 declAttributeType = tempPool.start;
4146 poolFinish(&tempPool);
4147 }
4148 *eventEndPP = s;
4149 attlistDeclHandler(handlerArg, declElementType->name,
4150 declAttributeId->name, declAttributeType,
4151 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4152 poolClear(&tempPool);
4153 handleDefault = XML_FALSE;
4154 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004155 }
4156 break;
4157 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4158 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004159 if (dtd->keepProcessing) {
4160 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004161 enum XML_Error result =
4162 storeAttributeValue(parser, enc, declAttributeIsCdata,
4163 s + enc->minBytesPerChar,
4164 next - enc->minBytesPerChar,
4165 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004166 if (result)
4167 return result;
4168 attVal = poolStart(&dtd->pool);
4169 poolFinish(&dtd->pool);
4170 /* ID attributes aren't allowed to have a default */
4171 if (!defineAttribute(declElementType, declAttributeId,
4172 declAttributeIsCdata, XML_FALSE, attVal, parser))
4173 return XML_ERROR_NO_MEMORY;
4174 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004175 if (*declAttributeType == XML_T(ASCII_LPAREN)
4176 || (*declAttributeType == XML_T(ASCII_N)
4177 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004178 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004179 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004180 || !poolAppendChar(&tempPool, XML_T('\0')))
4181 return XML_ERROR_NO_MEMORY;
4182 declAttributeType = tempPool.start;
4183 poolFinish(&tempPool);
4184 }
4185 *eventEndPP = s;
4186 attlistDeclHandler(handlerArg, declElementType->name,
4187 declAttributeId->name, declAttributeType,
4188 attVal,
4189 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4190 poolClear(&tempPool);
4191 handleDefault = XML_FALSE;
4192 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004193 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004194 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004195 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004196 if (dtd->keepProcessing) {
4197 enum XML_Error result = storeEntityValue(parser, enc,
4198 s + enc->minBytesPerChar,
4199 next - enc->minBytesPerChar);
4200 if (declEntity) {
4201 declEntity->textPtr = poolStart(&dtd->entityValuePool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004202 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004203 poolFinish(&dtd->entityValuePool);
4204 if (entityDeclHandler) {
4205 *eventEndPP = s;
4206 entityDeclHandler(handlerArg,
4207 declEntity->name,
4208 declEntity->is_param,
4209 declEntity->textPtr,
4210 declEntity->textLen,
4211 curBase, 0, 0, 0);
4212 handleDefault = XML_FALSE;
4213 }
4214 }
4215 else
4216 poolDiscard(&dtd->entityValuePool);
4217 if (result != XML_ERROR_NONE)
4218 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004219 }
4220 break;
4221 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004222#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004223 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004224#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004225 dtd->hasParamEntityRefs = XML_TRUE;
4226 if (startDoctypeDeclHandler) {
4227 doctypeSysid = poolStoreString(&tempPool, enc,
4228 s + enc->minBytesPerChar,
4229 next - enc->minBytesPerChar);
4230 if (doctypeSysid == NULL)
4231 return XML_ERROR_NO_MEMORY;
4232 poolFinish(&tempPool);
4233 handleDefault = XML_FALSE;
4234 }
4235#ifdef XML_DTD
4236 else
4237 /* use externalSubsetName to make doctypeSysid non-NULL
4238 for the case where no startDoctypeDeclHandler is set */
4239 doctypeSysid = externalSubsetName;
4240#endif /* XML_DTD */
4241 if (!dtd->standalone
4242#ifdef XML_DTD
4243 && !paramEntityParsing
4244#endif /* XML_DTD */
4245 && notStandaloneHandler
4246 && !notStandaloneHandler(handlerArg))
4247 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004248#ifndef XML_DTD
4249 break;
4250#else /* XML_DTD */
4251 if (!declEntity) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004252 declEntity = (ENTITY *)lookup(parser,
4253 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004254 externalSubsetName,
4255 sizeof(ENTITY));
4256 if (!declEntity)
4257 return XML_ERROR_NO_MEMORY;
4258 declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004259 }
4260 /* fall through */
4261#endif /* XML_DTD */
4262 case XML_ROLE_ENTITY_SYSTEM_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004263 if (dtd->keepProcessing && declEntity) {
4264 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4265 s + enc->minBytesPerChar,
4266 next - enc->minBytesPerChar);
4267 if (!declEntity->systemId)
4268 return XML_ERROR_NO_MEMORY;
4269 declEntity->base = curBase;
4270 poolFinish(&dtd->pool);
4271 if (entityDeclHandler)
4272 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004273 }
4274 break;
4275 case XML_ROLE_ENTITY_COMPLETE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004276 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4277 *eventEndPP = s;
4278 entityDeclHandler(handlerArg,
4279 declEntity->name,
4280 declEntity->is_param,
4281 0,0,
4282 declEntity->base,
4283 declEntity->systemId,
4284 declEntity->publicId,
4285 0);
4286 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004287 }
4288 break;
4289 case XML_ROLE_ENTITY_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004290 if (dtd->keepProcessing && declEntity) {
4291 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4292 if (!declEntity->notation)
4293 return XML_ERROR_NO_MEMORY;
4294 poolFinish(&dtd->pool);
4295 if (unparsedEntityDeclHandler) {
4296 *eventEndPP = s;
4297 unparsedEntityDeclHandler(handlerArg,
4298 declEntity->name,
4299 declEntity->base,
4300 declEntity->systemId,
4301 declEntity->publicId,
4302 declEntity->notation);
4303 handleDefault = XML_FALSE;
4304 }
4305 else if (entityDeclHandler) {
4306 *eventEndPP = s;
4307 entityDeclHandler(handlerArg,
4308 declEntity->name,
4309 0,0,0,
4310 declEntity->base,
4311 declEntity->systemId,
4312 declEntity->publicId,
4313 declEntity->notation);
4314 handleDefault = XML_FALSE;
4315 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004316 }
4317 break;
4318 case XML_ROLE_GENERAL_ENTITY_NAME:
4319 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004320 if (XmlPredefinedEntityName(enc, s, next)) {
4321 declEntity = NULL;
4322 break;
4323 }
4324 if (dtd->keepProcessing) {
4325 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4326 if (!name)
4327 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004328 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004329 sizeof(ENTITY));
4330 if (!declEntity)
4331 return XML_ERROR_NO_MEMORY;
4332 if (declEntity->name != name) {
4333 poolDiscard(&dtd->pool);
4334 declEntity = NULL;
4335 }
4336 else {
4337 poolFinish(&dtd->pool);
4338 declEntity->publicId = NULL;
4339 declEntity->is_param = XML_FALSE;
4340 /* if we have a parent parser or are reading an internal parameter
4341 entity, then the entity declaration is not considered "internal"
4342 */
4343 declEntity->is_internal = !(parentParser || openInternalEntities);
4344 if (entityDeclHandler)
4345 handleDefault = XML_FALSE;
4346 }
4347 }
4348 else {
4349 poolDiscard(&dtd->pool);
4350 declEntity = NULL;
4351 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004352 }
4353 break;
4354 case XML_ROLE_PARAM_ENTITY_NAME:
4355#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004356 if (dtd->keepProcessing) {
4357 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4358 if (!name)
4359 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004360 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004361 name, sizeof(ENTITY));
4362 if (!declEntity)
4363 return XML_ERROR_NO_MEMORY;
4364 if (declEntity->name != name) {
4365 poolDiscard(&dtd->pool);
4366 declEntity = NULL;
4367 }
4368 else {
4369 poolFinish(&dtd->pool);
4370 declEntity->publicId = NULL;
4371 declEntity->is_param = XML_TRUE;
4372 /* if we have a parent parser or are reading an internal parameter
4373 entity, then the entity declaration is not considered "internal"
4374 */
4375 declEntity->is_internal = !(parentParser || openInternalEntities);
4376 if (entityDeclHandler)
4377 handleDefault = XML_FALSE;
4378 }
4379 }
4380 else {
4381 poolDiscard(&dtd->pool);
4382 declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004383 }
4384#else /* not XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004385 declEntity = NULL;
4386#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004387 break;
4388 case XML_ROLE_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004389 declNotationPublicId = NULL;
4390 declNotationName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004391 if (notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004392 declNotationName = poolStoreString(&tempPool, enc, s, next);
4393 if (!declNotationName)
4394 return XML_ERROR_NO_MEMORY;
4395 poolFinish(&tempPool);
4396 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004397 }
4398 break;
4399 case XML_ROLE_NOTATION_PUBLIC_ID:
4400 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004401 return XML_ERROR_PUBLICID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004402 if (declNotationName) { /* means notationDeclHandler != NULL */
4403 XML_Char *tem = poolStoreString(&tempPool,
4404 enc,
4405 s + enc->minBytesPerChar,
4406 next - enc->minBytesPerChar);
4407 if (!tem)
4408 return XML_ERROR_NO_MEMORY;
4409 normalizePublicId(tem);
4410 declNotationPublicId = tem;
4411 poolFinish(&tempPool);
4412 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004413 }
4414 break;
4415 case XML_ROLE_NOTATION_SYSTEM_ID:
4416 if (declNotationName && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004417 const XML_Char *systemId
4418 = poolStoreString(&tempPool, enc,
4419 s + enc->minBytesPerChar,
4420 next - enc->minBytesPerChar);
4421 if (!systemId)
4422 return XML_ERROR_NO_MEMORY;
4423 *eventEndPP = s;
4424 notationDeclHandler(handlerArg,
4425 declNotationName,
4426 curBase,
4427 systemId,
4428 declNotationPublicId);
4429 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004430 }
4431 poolClear(&tempPool);
4432 break;
4433 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4434 if (declNotationPublicId && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004435 *eventEndPP = s;
4436 notationDeclHandler(handlerArg,
4437 declNotationName,
4438 curBase,
4439 0,
4440 declNotationPublicId);
4441 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004442 }
4443 poolClear(&tempPool);
4444 break;
4445 case XML_ROLE_ERROR:
4446 switch (tok) {
4447 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004448 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004449 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004450 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004451 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004452 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004453 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004454 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004455 }
4456#ifdef XML_DTD
4457 case XML_ROLE_IGNORE_SECT:
4458 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004459 enum XML_Error result;
4460 if (defaultHandler)
4461 reportDefault(parser, enc, s, next);
4462 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004463 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4464 if (result != XML_ERROR_NONE)
4465 return result;
4466 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004467 processor = ignoreSectionProcessor;
4468 return result;
4469 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004470 }
4471 break;
4472#endif /* XML_DTD */
4473 case XML_ROLE_GROUP_OPEN:
4474 if (prologState.level >= groupSize) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004475 if (groupSize) {
4476 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
4477 if (temp == NULL)
4478 return XML_ERROR_NO_MEMORY;
4479 groupConnector = temp;
4480 if (dtd->scaffIndex) {
4481 int *temp = (int *)REALLOC(dtd->scaffIndex,
4482 groupSize * sizeof(int));
4483 if (temp == NULL)
4484 return XML_ERROR_NO_MEMORY;
4485 dtd->scaffIndex = temp;
4486 }
4487 }
4488 else {
4489 groupConnector = (char *)MALLOC(groupSize = 32);
4490 if (!groupConnector)
4491 return XML_ERROR_NO_MEMORY;
4492 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004493 }
4494 groupConnector[prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004495 if (dtd->in_eldecl) {
4496 int myindex = nextScaffoldPart(parser);
4497 if (myindex < 0)
4498 return XML_ERROR_NO_MEMORY;
4499 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4500 dtd->scaffLevel++;
4501 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4502 if (elementDeclHandler)
4503 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004504 }
4505 break;
4506 case XML_ROLE_GROUP_SEQUENCE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004507 if (groupConnector[prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004508 return XML_ERROR_SYNTAX;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004509 groupConnector[prologState.level] = ASCII_COMMA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004510 if (dtd->in_eldecl && elementDeclHandler)
4511 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004512 break;
4513 case XML_ROLE_GROUP_CHOICE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004514 if (groupConnector[prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004515 return XML_ERROR_SYNTAX;
4516 if (dtd->in_eldecl
4517 && !groupConnector[prologState.level]
4518 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4519 != XML_CTYPE_MIXED)
4520 ) {
4521 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4522 = XML_CTYPE_CHOICE;
4523 if (elementDeclHandler)
4524 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004525 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004526 groupConnector[prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004527 break;
4528 case XML_ROLE_PARAM_ENTITY_REF:
4529#ifdef XML_DTD
4530 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004531 dtd->hasParamEntityRefs = XML_TRUE;
4532 if (!paramEntityParsing)
4533 dtd->keepProcessing = dtd->standalone;
4534 else {
4535 const XML_Char *name;
4536 ENTITY *entity;
4537 name = poolStoreString(&dtd->pool, enc,
4538 s + enc->minBytesPerChar,
4539 next - enc->minBytesPerChar);
4540 if (!name)
4541 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004542 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004543 poolDiscard(&dtd->pool);
4544 /* first, determine if a check for an existing declaration is needed;
4545 if yes, check that the entity exists, and that it is internal,
4546 otherwise call the skipped entity handler
4547 */
4548 if (prologState.documentEntity &&
4549 (dtd->standalone
4550 ? !openInternalEntities
4551 : !dtd->hasParamEntityRefs)) {
4552 if (!entity)
4553 return XML_ERROR_UNDEFINED_ENTITY;
4554 else if (!entity->is_internal)
4555 return XML_ERROR_ENTITY_DECLARED_IN_PE;
4556 }
4557 else if (!entity) {
4558 dtd->keepProcessing = dtd->standalone;
4559 /* cannot report skipped entities in declarations */
4560 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
4561 skippedEntityHandler(handlerArg, name, 1);
4562 handleDefault = XML_FALSE;
4563 }
4564 break;
4565 }
4566 if (entity->open)
4567 return XML_ERROR_RECURSIVE_ENTITY_REF;
4568 if (entity->textPtr) {
4569 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004570 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00004571 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
4572 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004573 if (result != XML_ERROR_NONE)
4574 return result;
4575 handleDefault = XML_FALSE;
4576 break;
4577 }
4578 if (externalEntityRefHandler) {
4579 dtd->paramEntityRead = XML_FALSE;
4580 entity->open = XML_TRUE;
4581 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4582 0,
4583 entity->base,
4584 entity->systemId,
4585 entity->publicId)) {
4586 entity->open = XML_FALSE;
4587 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4588 }
4589 entity->open = XML_FALSE;
4590 handleDefault = XML_FALSE;
4591 if (!dtd->paramEntityRead) {
4592 dtd->keepProcessing = dtd->standalone;
4593 break;
4594 }
4595 }
4596 else {
4597 dtd->keepProcessing = dtd->standalone;
4598 break;
4599 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004600 }
4601#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004602 if (!dtd->standalone &&
4603 notStandaloneHandler &&
4604 !notStandaloneHandler(handlerArg))
4605 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004606 break;
4607
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004608 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004609
4610 case XML_ROLE_ELEMENT_NAME:
4611 if (elementDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004612 declElementType = getElementType(parser, enc, s, next);
4613 if (!declElementType)
4614 return XML_ERROR_NO_MEMORY;
4615 dtd->scaffLevel = 0;
4616 dtd->scaffCount = 0;
4617 dtd->in_eldecl = XML_TRUE;
4618 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004619 }
4620 break;
4621
4622 case XML_ROLE_CONTENT_ANY:
4623 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004624 if (dtd->in_eldecl) {
4625 if (elementDeclHandler) {
4626 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
4627 if (!content)
4628 return XML_ERROR_NO_MEMORY;
4629 content->quant = XML_CQUANT_NONE;
4630 content->name = NULL;
4631 content->numchildren = 0;
4632 content->children = NULL;
4633 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
4634 XML_CTYPE_ANY :
4635 XML_CTYPE_EMPTY);
4636 *eventEndPP = s;
4637 elementDeclHandler(handlerArg, declElementType->name, content);
4638 handleDefault = XML_FALSE;
4639 }
4640 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004641 }
4642 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004643
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004644 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004645 if (dtd->in_eldecl) {
4646 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4647 = XML_CTYPE_MIXED;
4648 if (elementDeclHandler)
4649 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004650 }
4651 break;
4652
4653 case XML_ROLE_CONTENT_ELEMENT:
4654 quant = XML_CQUANT_NONE;
4655 goto elementContent;
4656 case XML_ROLE_CONTENT_ELEMENT_OPT:
4657 quant = XML_CQUANT_OPT;
4658 goto elementContent;
4659 case XML_ROLE_CONTENT_ELEMENT_REP:
4660 quant = XML_CQUANT_REP;
4661 goto elementContent;
4662 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4663 quant = XML_CQUANT_PLUS;
4664 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004665 if (dtd->in_eldecl) {
4666 ELEMENT_TYPE *el;
4667 const XML_Char *name;
4668 int nameLen;
4669 const char *nxt = (quant == XML_CQUANT_NONE
4670 ? next
4671 : next - enc->minBytesPerChar);
4672 int myindex = nextScaffoldPart(parser);
4673 if (myindex < 0)
4674 return XML_ERROR_NO_MEMORY;
4675 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4676 dtd->scaffold[myindex].quant = quant;
4677 el = getElementType(parser, enc, s, nxt);
4678 if (!el)
4679 return XML_ERROR_NO_MEMORY;
4680 name = el->name;
4681 dtd->scaffold[myindex].name = name;
4682 nameLen = 0;
4683 for (; name[nameLen++]; );
4684 dtd->contentStringLen += nameLen;
4685 if (elementDeclHandler)
4686 handleDefault = XML_FALSE;
4687 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004688 break;
4689
4690 case XML_ROLE_GROUP_CLOSE:
4691 quant = XML_CQUANT_NONE;
4692 goto closeGroup;
4693 case XML_ROLE_GROUP_CLOSE_OPT:
4694 quant = XML_CQUANT_OPT;
4695 goto closeGroup;
4696 case XML_ROLE_GROUP_CLOSE_REP:
4697 quant = XML_CQUANT_REP;
4698 goto closeGroup;
4699 case XML_ROLE_GROUP_CLOSE_PLUS:
4700 quant = XML_CQUANT_PLUS;
4701 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004702 if (dtd->in_eldecl) {
4703 if (elementDeclHandler)
4704 handleDefault = XML_FALSE;
4705 dtd->scaffLevel--;
4706 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
4707 if (dtd->scaffLevel == 0) {
4708 if (!handleDefault) {
4709 XML_Content *model = build_model(parser);
4710 if (!model)
4711 return XML_ERROR_NO_MEMORY;
4712 *eventEndPP = s;
4713 elementDeclHandler(handlerArg, declElementType->name, model);
4714 }
4715 dtd->in_eldecl = XML_FALSE;
4716 dtd->contentStringLen = 0;
4717 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004718 }
4719 break;
4720 /* End element declaration stuff */
4721
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004722 case XML_ROLE_PI:
4723 if (!reportProcessingInstruction(parser, enc, s, next))
4724 return XML_ERROR_NO_MEMORY;
4725 handleDefault = XML_FALSE;
4726 break;
4727 case XML_ROLE_COMMENT:
4728 if (!reportComment(parser, enc, s, next))
4729 return XML_ERROR_NO_MEMORY;
4730 handleDefault = XML_FALSE;
4731 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004732 case XML_ROLE_NONE:
4733 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004734 case XML_TOK_BOM:
4735 handleDefault = XML_FALSE;
4736 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004737 }
4738 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004739 case XML_ROLE_DOCTYPE_NONE:
4740 if (startDoctypeDeclHandler)
4741 handleDefault = XML_FALSE;
4742 break;
4743 case XML_ROLE_ENTITY_NONE:
4744 if (dtd->keepProcessing && entityDeclHandler)
4745 handleDefault = XML_FALSE;
4746 break;
4747 case XML_ROLE_NOTATION_NONE:
4748 if (notationDeclHandler)
4749 handleDefault = XML_FALSE;
4750 break;
4751 case XML_ROLE_ATTLIST_NONE:
4752 if (dtd->keepProcessing && attlistDeclHandler)
4753 handleDefault = XML_FALSE;
4754 break;
4755 case XML_ROLE_ELEMENT_NONE:
4756 if (elementDeclHandler)
4757 handleDefault = XML_FALSE;
4758 break;
4759 } /* end of big switch */
4760
4761 if (handleDefault && defaultHandler)
4762 reportDefault(parser, enc, s, next);
4763
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004764 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004765 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00004766 *nextPtr = next;
4767 return XML_ERROR_NONE;
4768 case XML_FINISHED:
4769 return XML_ERROR_ABORTED;
4770 default:
4771 s = next;
4772 tok = XmlPrologTok(enc, s, end, &next);
4773 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004774 }
4775 /* not reached */
4776}
4777
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004778static enum XML_Error PTRCALL
4779epilogProcessor(XML_Parser parser,
4780 const char *s,
4781 const char *end,
4782 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004783{
4784 processor = epilogProcessor;
4785 eventPtr = s;
4786 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004787 const char *next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004788 int tok = XmlPrologTok(encoding, s, end, &next);
4789 eventEndPtr = next;
4790 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004791 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004792 case -XML_TOK_PROLOG_S:
4793 if (defaultHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004794 reportDefault(parser, encoding, s, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004795 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004796 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004797 }
Fred Drake31d485c2004-08-03 07:06:22 +00004798 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004799 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004800 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00004801 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004802 return XML_ERROR_NONE;
4803 case XML_TOK_PROLOG_S:
4804 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004805 reportDefault(parser, encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004806 break;
4807 case XML_TOK_PI:
4808 if (!reportProcessingInstruction(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004809 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004810 break;
4811 case XML_TOK_COMMENT:
4812 if (!reportComment(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004813 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004814 break;
4815 case XML_TOK_INVALID:
4816 eventPtr = next;
4817 return XML_ERROR_INVALID_TOKEN;
4818 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004819 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004820 *nextPtr = s;
4821 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004822 }
4823 return XML_ERROR_UNCLOSED_TOKEN;
4824 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004825 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004826 *nextPtr = s;
4827 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004828 }
4829 return XML_ERROR_PARTIAL_CHAR;
4830 default:
4831 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
4832 }
4833 eventPtr = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004834 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004835 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00004836 *nextPtr = next;
4837 return XML_ERROR_NONE;
4838 case XML_FINISHED:
4839 return XML_ERROR_ABORTED;
4840 default: ;
4841 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004842 }
4843}
4844
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004845static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00004846processInternalEntity(XML_Parser parser, ENTITY *entity,
4847 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004848{
Fred Drake31d485c2004-08-03 07:06:22 +00004849 const char *textStart, *textEnd;
4850 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004851 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00004852 OPEN_INTERNAL_ENTITY *openEntity;
4853
4854 if (freeInternalEntities) {
4855 openEntity = freeInternalEntities;
4856 freeInternalEntities = openEntity->next;
4857 }
4858 else {
4859 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
4860 if (!openEntity)
4861 return XML_ERROR_NO_MEMORY;
4862 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004863 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00004864 entity->processed = 0;
4865 openEntity->next = openInternalEntities;
4866 openInternalEntities = openEntity;
4867 openEntity->entity = entity;
4868 openEntity->startTagLevel = tagLevel;
4869 openEntity->betweenDecl = betweenDecl;
4870 openEntity->internalEventPtr = NULL;
4871 openEntity->internalEventEndPtr = NULL;
4872 textStart = (char *)entity->textPtr;
4873 textEnd = (char *)(entity->textPtr + entity->textLen);
4874
4875#ifdef XML_DTD
4876 if (entity->is_param) {
4877 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004878 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00004879 next, &next, XML_FALSE);
4880 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004881 else
Fred Drake31d485c2004-08-03 07:06:22 +00004882#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004883 result = doContent(parser, tagLevel, internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00004884 textEnd, &next, XML_FALSE);
4885
4886 if (result == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004887 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4888 entity->processed = (int)(next - textStart);
Fred Drake31d485c2004-08-03 07:06:22 +00004889 processor = internalEntityProcessor;
4890 }
4891 else {
4892 entity->open = XML_FALSE;
4893 openInternalEntities = openEntity->next;
4894 /* put openEntity back in list of free instances */
4895 openEntity->next = freeInternalEntities;
4896 freeInternalEntities = openEntity;
4897 }
4898 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004899 return result;
4900}
4901
Fred Drake31d485c2004-08-03 07:06:22 +00004902static enum XML_Error PTRCALL
4903internalEntityProcessor(XML_Parser parser,
4904 const char *s,
4905 const char *end,
4906 const char **nextPtr)
4907{
4908 ENTITY *entity;
4909 const char *textStart, *textEnd;
4910 const char *next;
4911 enum XML_Error result;
4912 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
4913 if (!openEntity)
4914 return XML_ERROR_UNEXPECTED_STATE;
4915
4916 entity = openEntity->entity;
4917 textStart = ((char *)entity->textPtr) + entity->processed;
4918 textEnd = (char *)(entity->textPtr + entity->textLen);
4919
4920#ifdef XML_DTD
4921 if (entity->is_param) {
4922 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004923 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00004924 next, &next, XML_FALSE);
4925 }
4926 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004927#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004928 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
4929 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00004930
4931 if (result != XML_ERROR_NONE)
4932 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004933 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
4934 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00004935 return result;
4936 }
4937 else {
4938 entity->open = XML_FALSE;
4939 openInternalEntities = openEntity->next;
4940 /* put openEntity back in list of free instances */
4941 openEntity->next = freeInternalEntities;
4942 freeInternalEntities = openEntity;
4943 }
4944
4945#ifdef XML_DTD
4946 if (entity->is_param) {
4947 int tok;
4948 processor = prologProcessor;
4949 tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004950 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004951 (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00004952 }
4953 else
4954#endif /* XML_DTD */
4955 {
4956 processor = contentProcessor;
4957 /* see externalEntityContentProcessor vs contentProcessor */
4958 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004959 nextPtr, (XML_Bool)!ps_finalBuffer);
4960 }
Fred Drake31d485c2004-08-03 07:06:22 +00004961}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004962
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004963static enum XML_Error PTRCALL
4964errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02004965 const char *UNUSED_P(s),
4966 const char *UNUSED_P(end),
4967 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004968{
4969 return errorCode;
4970}
4971
4972static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004973storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4974 const char *ptr, const char *end,
4975 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004976{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004977 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
4978 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004979 if (result)
4980 return result;
4981 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
4982 poolChop(pool);
4983 if (!poolAppendChar(pool, XML_T('\0')))
4984 return XML_ERROR_NO_MEMORY;
4985 return XML_ERROR_NONE;
4986}
4987
4988static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004989appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
4990 const char *ptr, const char *end,
4991 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004992{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004993 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004994 for (;;) {
4995 const char *next;
4996 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
4997 switch (tok) {
4998 case XML_TOK_NONE:
4999 return XML_ERROR_NONE;
5000 case XML_TOK_INVALID:
5001 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005002 eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005003 return XML_ERROR_INVALID_TOKEN;
5004 case XML_TOK_PARTIAL:
5005 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005006 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005007 return XML_ERROR_INVALID_TOKEN;
5008 case XML_TOK_CHAR_REF:
5009 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005010 XML_Char buf[XML_ENCODE_MAX];
5011 int i;
5012 int n = XmlCharRefNumber(enc, ptr);
5013 if (n < 0) {
5014 if (enc == encoding)
5015 eventPtr = ptr;
5016 return XML_ERROR_BAD_CHAR_REF;
5017 }
5018 if (!isCdata
5019 && n == 0x20 /* space */
5020 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5021 break;
5022 n = XmlEncode(n, (ICHAR *)buf);
5023 if (!n) {
5024 if (enc == encoding)
5025 eventPtr = ptr;
5026 return XML_ERROR_BAD_CHAR_REF;
5027 }
5028 for (i = 0; i < n; i++) {
5029 if (!poolAppendChar(pool, buf[i]))
5030 return XML_ERROR_NO_MEMORY;
5031 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005032 }
5033 break;
5034 case XML_TOK_DATA_CHARS:
5035 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005036 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005037 break;
5038 case XML_TOK_TRAILING_CR:
5039 next = ptr + enc->minBytesPerChar;
5040 /* fall through */
5041 case XML_TOK_ATTRIBUTE_VALUE_S:
5042 case XML_TOK_DATA_NEWLINE:
5043 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005044 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005045 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005046 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005047 break;
5048 case XML_TOK_ENTITY_REF:
5049 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005050 const XML_Char *name;
5051 ENTITY *entity;
5052 char checkEntityDecl;
5053 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5054 ptr + enc->minBytesPerChar,
5055 next - enc->minBytesPerChar);
5056 if (ch) {
5057 if (!poolAppendChar(pool, ch))
5058 return XML_ERROR_NO_MEMORY;
5059 break;
5060 }
5061 name = poolStoreString(&temp2Pool, enc,
5062 ptr + enc->minBytesPerChar,
5063 next - enc->minBytesPerChar);
5064 if (!name)
5065 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005066 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005067 poolDiscard(&temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005068 /* First, determine if a check for an existing declaration is needed;
5069 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005070 */
5071 if (pool == &dtd->pool) /* are we called from prolog? */
5072 checkEntityDecl =
5073#ifdef XML_DTD
5074 prologState.documentEntity &&
5075#endif /* XML_DTD */
5076 (dtd->standalone
5077 ? !openInternalEntities
5078 : !dtd->hasParamEntityRefs);
5079 else /* if (pool == &tempPool): we are called from content */
5080 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5081 if (checkEntityDecl) {
5082 if (!entity)
5083 return XML_ERROR_UNDEFINED_ENTITY;
5084 else if (!entity->is_internal)
5085 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5086 }
5087 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005088 /* Cannot report skipped entity here - see comments on
5089 skippedEntityHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005090 if (skippedEntityHandler)
5091 skippedEntityHandler(handlerArg, name, 0);
5092 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005093 /* Cannot call the default handler because this would be
5094 out of sync with the call to the startElementHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005095 if ((pool == &tempPool) && defaultHandler)
5096 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005097 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005098 break;
5099 }
5100 if (entity->open) {
5101 if (enc == encoding)
5102 eventPtr = ptr;
5103 return XML_ERROR_RECURSIVE_ENTITY_REF;
5104 }
5105 if (entity->notation) {
5106 if (enc == encoding)
5107 eventPtr = ptr;
5108 return XML_ERROR_BINARY_ENTITY_REF;
5109 }
5110 if (!entity->textPtr) {
5111 if (enc == encoding)
5112 eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005113 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005114 }
5115 else {
5116 enum XML_Error result;
5117 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5118 entity->open = XML_TRUE;
5119 result = appendAttributeValue(parser, internalEncoding, isCdata,
5120 (char *)entity->textPtr,
5121 (char *)textEnd, pool);
5122 entity->open = XML_FALSE;
5123 if (result)
5124 return result;
5125 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005126 }
5127 break;
5128 default:
5129 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005130 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005131 return XML_ERROR_UNEXPECTED_STATE;
5132 }
5133 ptr = next;
5134 }
5135 /* not reached */
5136}
5137
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005138static enum XML_Error
5139storeEntityValue(XML_Parser parser,
5140 const ENCODING *enc,
5141 const char *entityTextPtr,
5142 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005143{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005144 DTD * const dtd = _dtd; /* save one level of indirection */
5145 STRING_POOL *pool = &(dtd->entityValuePool);
5146 enum XML_Error result = XML_ERROR_NONE;
5147#ifdef XML_DTD
5148 int oldInEntityValue = prologState.inEntityValue;
5149 prologState.inEntityValue = 1;
5150#endif /* XML_DTD */
5151 /* never return Null for the value argument in EntityDeclHandler,
5152 since this would indicate an external entity; therefore we
5153 have to make sure that entityValuePool.start is not null */
5154 if (!pool->blocks) {
5155 if (!poolGrow(pool))
5156 return XML_ERROR_NO_MEMORY;
5157 }
5158
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005159 for (;;) {
5160 const char *next;
5161 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5162 switch (tok) {
5163 case XML_TOK_PARAM_ENTITY_REF:
5164#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005165 if (isParamEntity || enc != encoding) {
5166 const XML_Char *name;
5167 ENTITY *entity;
5168 name = poolStoreString(&tempPool, enc,
5169 entityTextPtr + enc->minBytesPerChar,
5170 next - enc->minBytesPerChar);
5171 if (!name) {
5172 result = XML_ERROR_NO_MEMORY;
5173 goto endEntityValue;
5174 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005175 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005176 poolDiscard(&tempPool);
5177 if (!entity) {
5178 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5179 /* cannot report skipped entity here - see comments on
5180 skippedEntityHandler
5181 if (skippedEntityHandler)
5182 skippedEntityHandler(handlerArg, name, 0);
5183 */
5184 dtd->keepProcessing = dtd->standalone;
5185 goto endEntityValue;
5186 }
5187 if (entity->open) {
5188 if (enc == encoding)
5189 eventPtr = entityTextPtr;
5190 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5191 goto endEntityValue;
5192 }
5193 if (entity->systemId) {
5194 if (externalEntityRefHandler) {
5195 dtd->paramEntityRead = XML_FALSE;
5196 entity->open = XML_TRUE;
5197 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5198 0,
5199 entity->base,
5200 entity->systemId,
5201 entity->publicId)) {
5202 entity->open = XML_FALSE;
5203 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5204 goto endEntityValue;
5205 }
5206 entity->open = XML_FALSE;
5207 if (!dtd->paramEntityRead)
5208 dtd->keepProcessing = dtd->standalone;
5209 }
5210 else
5211 dtd->keepProcessing = dtd->standalone;
5212 }
5213 else {
5214 entity->open = XML_TRUE;
5215 result = storeEntityValue(parser,
5216 internalEncoding,
5217 (char *)entity->textPtr,
5218 (char *)(entity->textPtr
5219 + entity->textLen));
5220 entity->open = XML_FALSE;
5221 if (result)
5222 goto endEntityValue;
5223 }
5224 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005225 }
5226#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005227 /* In the internal subset, PE references are not legal
5228 within markup declarations, e.g entity values in this case. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005229 eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005230 result = XML_ERROR_PARAM_ENTITY_REF;
5231 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005232 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005233 result = XML_ERROR_NONE;
5234 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005235 case XML_TOK_ENTITY_REF:
5236 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005237 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5238 result = XML_ERROR_NO_MEMORY;
5239 goto endEntityValue;
5240 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005241 break;
5242 case XML_TOK_TRAILING_CR:
5243 next = entityTextPtr + enc->minBytesPerChar;
5244 /* fall through */
5245 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005246 if (pool->end == pool->ptr && !poolGrow(pool)) {
5247 result = XML_ERROR_NO_MEMORY;
5248 goto endEntityValue;
5249 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005250 *(pool->ptr)++ = 0xA;
5251 break;
5252 case XML_TOK_CHAR_REF:
5253 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005254 XML_Char buf[XML_ENCODE_MAX];
5255 int i;
5256 int n = XmlCharRefNumber(enc, entityTextPtr);
5257 if (n < 0) {
5258 if (enc == encoding)
5259 eventPtr = entityTextPtr;
5260 result = XML_ERROR_BAD_CHAR_REF;
5261 goto endEntityValue;
5262 }
5263 n = XmlEncode(n, (ICHAR *)buf);
5264 if (!n) {
5265 if (enc == encoding)
5266 eventPtr = entityTextPtr;
5267 result = XML_ERROR_BAD_CHAR_REF;
5268 goto endEntityValue;
5269 }
5270 for (i = 0; i < n; i++) {
5271 if (pool->end == pool->ptr && !poolGrow(pool)) {
5272 result = XML_ERROR_NO_MEMORY;
5273 goto endEntityValue;
5274 }
5275 *(pool->ptr)++ = buf[i];
5276 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005277 }
5278 break;
5279 case XML_TOK_PARTIAL:
5280 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005281 eventPtr = entityTextPtr;
5282 result = XML_ERROR_INVALID_TOKEN;
5283 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005284 case XML_TOK_INVALID:
5285 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005286 eventPtr = next;
5287 result = XML_ERROR_INVALID_TOKEN;
5288 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005289 default:
5290 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005291 eventPtr = entityTextPtr;
5292 result = XML_ERROR_UNEXPECTED_STATE;
5293 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005294 }
5295 entityTextPtr = next;
5296 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005297endEntityValue:
5298#ifdef XML_DTD
5299 prologState.inEntityValue = oldInEntityValue;
5300#endif /* XML_DTD */
5301 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005302}
5303
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005304static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005305normalizeLines(XML_Char *s)
5306{
5307 XML_Char *p;
5308 for (;; s++) {
5309 if (*s == XML_T('\0'))
5310 return;
5311 if (*s == 0xD)
5312 break;
5313 }
5314 p = s;
5315 do {
5316 if (*s == 0xD) {
5317 *p++ = 0xA;
5318 if (*++s == 0xA)
5319 s++;
5320 }
5321 else
5322 *p++ = *s++;
5323 } while (*s);
5324 *p = XML_T('\0');
5325}
5326
5327static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005328reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5329 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005330{
5331 const XML_Char *target;
5332 XML_Char *data;
5333 const char *tem;
5334 if (!processingInstructionHandler) {
5335 if (defaultHandler)
5336 reportDefault(parser, enc, start, end);
5337 return 1;
5338 }
5339 start += enc->minBytesPerChar * 2;
5340 tem = start + XmlNameLength(enc, start);
5341 target = poolStoreString(&tempPool, enc, start, tem);
5342 if (!target)
5343 return 0;
5344 poolFinish(&tempPool);
5345 data = poolStoreString(&tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005346 XmlSkipS(enc, tem),
5347 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005348 if (!data)
5349 return 0;
5350 normalizeLines(data);
5351 processingInstructionHandler(handlerArg, target, data);
5352 poolClear(&tempPool);
5353 return 1;
5354}
5355
5356static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005357reportComment(XML_Parser parser, const ENCODING *enc,
5358 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005359{
5360 XML_Char *data;
5361 if (!commentHandler) {
5362 if (defaultHandler)
5363 reportDefault(parser, enc, start, end);
5364 return 1;
5365 }
5366 data = poolStoreString(&tempPool,
5367 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005368 start + enc->minBytesPerChar * 4,
5369 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005370 if (!data)
5371 return 0;
5372 normalizeLines(data);
5373 commentHandler(handlerArg, data);
5374 poolClear(&tempPool);
5375 return 1;
5376}
5377
5378static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005379reportDefault(XML_Parser parser, const ENCODING *enc,
5380 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005381{
5382 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005383 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005384 const char **eventPP;
5385 const char **eventEndPP;
5386 if (enc == encoding) {
5387 eventPP = &eventPtr;
5388 eventEndPP = &eventEndPtr;
5389 }
5390 else {
5391 eventPP = &(openInternalEntities->internalEventPtr);
5392 eventEndPP = &(openInternalEntities->internalEventEndPtr);
5393 }
5394 do {
5395 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02005396 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005397 *eventEndPP = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005398 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005399 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02005400 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005401 }
5402 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005403 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005404}
5405
5406
5407static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005408defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5409 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005410{
5411 DEFAULT_ATTRIBUTE *att;
5412 if (value || isId) {
5413 /* The handling of default attributes gets messed up if we have
5414 a default which duplicates a non-default. */
5415 int i;
5416 for (i = 0; i < type->nDefaultAtts; i++)
5417 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005418 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005419 if (isId && !type->idAtt && !attId->xmlns)
5420 type->idAtt = attId;
5421 }
5422 if (type->nDefaultAtts == type->allocDefaultAtts) {
5423 if (type->allocDefaultAtts == 0) {
5424 type->allocDefaultAtts = 8;
Fred Drake08317ae2003-10-21 15:38:55 +00005425 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005426 * sizeof(DEFAULT_ATTRIBUTE));
5427 if (!type->defaultAtts)
5428 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005429 }
5430 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005431 DEFAULT_ATTRIBUTE *temp;
5432 int count = type->allocDefaultAtts * 2;
5433 temp = (DEFAULT_ATTRIBUTE *)
5434 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
5435 if (temp == NULL)
5436 return 0;
5437 type->allocDefaultAtts = count;
5438 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005439 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005440 }
5441 att = type->defaultAtts + type->nDefaultAtts;
5442 att->id = attId;
5443 att->value = value;
5444 att->isCdata = isCdata;
5445 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005446 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005447 type->nDefaultAtts += 1;
5448 return 1;
5449}
5450
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005451static int
5452setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005453{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005454 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005455 const XML_Char *name;
5456 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005457 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005458 PREFIX *prefix;
5459 const XML_Char *s;
5460 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005461 if (!poolAppendChar(&dtd->pool, *s))
5462 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005463 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005464 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5465 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005466 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005467 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005468 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005469 return 0;
5470 if (prefix->name == poolStart(&dtd->pool))
5471 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005472 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005473 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005474 elementType->prefix = prefix;
5475
5476 }
5477 }
5478 return 1;
5479}
5480
5481static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005482getAttributeId(XML_Parser parser, const ENCODING *enc,
5483 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005484{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005485 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005486 ATTRIBUTE_ID *id;
5487 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005488 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5489 return NULL;
5490 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005491 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005492 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00005493 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005494 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005495 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005496 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005497 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005498 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005499 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005500 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005501 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005502 if (!ns)
5503 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005504 else if (name[0] == XML_T(ASCII_x)
5505 && name[1] == XML_T(ASCII_m)
5506 && name[2] == XML_T(ASCII_l)
5507 && name[3] == XML_T(ASCII_n)
5508 && name[4] == XML_T(ASCII_s)
5509 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005510 if (name[5] == XML_T('\0'))
5511 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005512 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005513 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005514 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005515 }
5516 else {
5517 int i;
5518 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00005519 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005520 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005521 int j;
5522 for (j = 0; j < i; j++) {
5523 if (!poolAppendChar(&dtd->pool, name[j]))
5524 return NULL;
5525 }
5526 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5527 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005528 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005529 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07005530 if (!id->prefix)
5531 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005532 if (id->prefix->name == poolStart(&dtd->pool))
5533 poolFinish(&dtd->pool);
5534 else
5535 poolDiscard(&dtd->pool);
5536 break;
5537 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005538 }
5539 }
5540 }
5541 return id;
5542}
5543
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005544#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005545
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005546static const XML_Char *
5547getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005548{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005549 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005550 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005551 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005552
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005553 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005554 int i;
5555 int len;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005556 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005557 return NULL;
5558 len = dtd->defaultPrefix.binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005559 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005560 len--;
5561 for (i = 0; i < len; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005562 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
5563 return NULL;
5564 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005565 }
5566
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005567 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005568 for (;;) {
5569 int i;
5570 int len;
5571 const XML_Char *s;
5572 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
5573 if (!prefix)
5574 break;
5575 if (!prefix->binding)
5576 continue;
5577 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005578 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005579 for (s = prefix->name; *s; s++)
5580 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005581 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005582 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005583 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005584 len = prefix->binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005585 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005586 len--;
5587 for (i = 0; i < len; i++)
5588 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005589 return NULL;
5590 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005591 }
5592
5593
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005594 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005595 for (;;) {
5596 const XML_Char *s;
5597 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
5598 if (!e)
5599 break;
5600 if (!e->open)
5601 continue;
5602 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005603 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005604 for (s = e->name; *s; s++)
5605 if (!poolAppendChar(&tempPool, *s))
5606 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005607 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005608 }
5609
5610 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005611 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005612 return tempPool.start;
5613}
5614
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005615static XML_Bool
5616setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005617{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005618 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005619 const XML_Char *s = context;
5620
5621 while (*context != XML_T('\0')) {
5622 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5623 ENTITY *e;
5624 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005625 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005626 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005627 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005628 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005629 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005630 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005631 context = s;
5632 poolDiscard(&tempPool);
5633 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005634 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005635 PREFIX *prefix;
5636 if (poolLength(&tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005637 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005638 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005639 if (!poolAppendChar(&tempPool, XML_T('\0')))
5640 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005641 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005642 sizeof(PREFIX));
5643 if (!prefix)
5644 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005645 if (prefix->name == poolStart(&tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005646 prefix->name = poolCopyString(&dtd->pool, prefix->name);
5647 if (!prefix->name)
5648 return XML_FALSE;
5649 }
5650 poolDiscard(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005651 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005652 for (context = s + 1;
5653 *context != CONTEXT_SEP && *context != XML_T('\0');
5654 context++)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005655 if (!poolAppendChar(&tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005656 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005657 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005658 return XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00005659 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005660 &inheritedBindings) != XML_ERROR_NONE)
5661 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005662 poolDiscard(&tempPool);
5663 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005664 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005665 s = context;
5666 }
5667 else {
5668 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005669 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005670 s++;
5671 }
5672 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005673 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005674}
5675
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005676static void FASTCALL
5677normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005678{
5679 XML_Char *p = publicId;
5680 XML_Char *s;
5681 for (s = publicId; *s; s++) {
5682 switch (*s) {
5683 case 0x20:
5684 case 0xD:
5685 case 0xA:
5686 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005687 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005688 break;
5689 default:
5690 *p++ = *s;
5691 }
5692 }
5693 if (p != publicId && p[-1] == 0x20)
5694 --p;
5695 *p = XML_T('\0');
5696}
5697
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005698static DTD *
5699dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005700{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005701 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
5702 if (p == NULL)
5703 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005704 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005705 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005706 hashTableInit(&(p->generalEntities), ms);
5707 hashTableInit(&(p->elementTypes), ms);
5708 hashTableInit(&(p->attributeIds), ms);
5709 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005710#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005711 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005712 hashTableInit(&(p->paramEntities), ms);
5713#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005714 p->defaultPrefix.name = NULL;
5715 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005716
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005717 p->in_eldecl = XML_FALSE;
5718 p->scaffIndex = NULL;
5719 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005720 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005721 p->scaffSize = 0;
5722 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005723 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005724
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005725 p->keepProcessing = XML_TRUE;
5726 p->hasParamEntityRefs = XML_FALSE;
5727 p->standalone = XML_FALSE;
5728 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005729}
5730
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005731static void
5732dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005733{
5734 HASH_TABLE_ITER iter;
5735 hashTableIterInit(&iter, &(p->elementTypes));
5736 for (;;) {
5737 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5738 if (!e)
5739 break;
5740 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005741 ms->free_fcn(e->defaultAtts);
5742 }
5743 hashTableClear(&(p->generalEntities));
5744#ifdef XML_DTD
5745 p->paramEntityRead = XML_FALSE;
5746 hashTableClear(&(p->paramEntities));
5747#endif /* XML_DTD */
5748 hashTableClear(&(p->elementTypes));
5749 hashTableClear(&(p->attributeIds));
5750 hashTableClear(&(p->prefixes));
5751 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005752 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005753 p->defaultPrefix.name = NULL;
5754 p->defaultPrefix.binding = NULL;
5755
5756 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00005757
5758 ms->free_fcn(p->scaffIndex);
5759 p->scaffIndex = NULL;
5760 ms->free_fcn(p->scaffold);
5761 p->scaffold = NULL;
5762
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005763 p->scaffLevel = 0;
5764 p->scaffSize = 0;
5765 p->scaffCount = 0;
5766 p->contentStringLen = 0;
5767
5768 p->keepProcessing = XML_TRUE;
5769 p->hasParamEntityRefs = XML_FALSE;
5770 p->standalone = XML_FALSE;
5771}
5772
5773static void
5774dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
5775{
5776 HASH_TABLE_ITER iter;
5777 hashTableIterInit(&iter, &(p->elementTypes));
5778 for (;;) {
5779 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5780 if (!e)
5781 break;
5782 if (e->allocDefaultAtts != 0)
5783 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005784 }
5785 hashTableDestroy(&(p->generalEntities));
5786#ifdef XML_DTD
5787 hashTableDestroy(&(p->paramEntities));
5788#endif /* XML_DTD */
5789 hashTableDestroy(&(p->elementTypes));
5790 hashTableDestroy(&(p->attributeIds));
5791 hashTableDestroy(&(p->prefixes));
5792 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005793 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005794 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00005795 ms->free_fcn(p->scaffIndex);
5796 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005797 }
5798 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005799}
5800
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005801/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
5802 The new DTD has already been initialized.
5803*/
5804static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005805dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005806{
5807 HASH_TABLE_ITER iter;
5808
5809 /* Copy the prefix table. */
5810
5811 hashTableIterInit(&iter, &(oldDtd->prefixes));
5812 for (;;) {
5813 const XML_Char *name;
5814 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
5815 if (!oldP)
5816 break;
5817 name = poolCopyString(&(newDtd->pool), oldP->name);
5818 if (!name)
5819 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005820 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005821 return 0;
5822 }
5823
5824 hashTableIterInit(&iter, &(oldDtd->attributeIds));
5825
5826 /* Copy the attribute id table. */
5827
5828 for (;;) {
5829 ATTRIBUTE_ID *newA;
5830 const XML_Char *name;
5831 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
5832
5833 if (!oldA)
5834 break;
5835 /* Remember to allocate the scratch byte before the name. */
5836 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
5837 return 0;
5838 name = poolCopyString(&(newDtd->pool), oldA->name);
5839 if (!name)
5840 return 0;
5841 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005842 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005843 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005844 if (!newA)
5845 return 0;
5846 newA->maybeTokenized = oldA->maybeTokenized;
5847 if (oldA->prefix) {
5848 newA->xmlns = oldA->xmlns;
5849 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005850 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005851 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005852 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005853 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005854 }
5855 }
5856
5857 /* Copy the element type table. */
5858
5859 hashTableIterInit(&iter, &(oldDtd->elementTypes));
5860
5861 for (;;) {
5862 int i;
5863 ELEMENT_TYPE *newE;
5864 const XML_Char *name;
5865 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
5866 if (!oldE)
5867 break;
5868 name = poolCopyString(&(newDtd->pool), oldE->name);
5869 if (!name)
5870 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005871 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005872 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005873 if (!newE)
5874 return 0;
5875 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005876 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
5877 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
5878 if (!newE->defaultAtts) {
5879 ms->free_fcn(newE);
5880 return 0;
5881 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005882 }
5883 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005884 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005885 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005886 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
5887 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005888 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005889 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005890 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005891 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005892 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005893 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
5894 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005895 newE->defaultAtts[i].value
5896 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
5897 if (!newE->defaultAtts[i].value)
5898 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005899 }
5900 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005901 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005902 }
5903 }
5904
5905 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005906 if (!copyEntityTable(oldParser,
5907 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005908 &(newDtd->pool),
5909 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005910 return 0;
5911
5912#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005913 if (!copyEntityTable(oldParser,
5914 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005915 &(newDtd->pool),
5916 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005917 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005918 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005919#endif /* XML_DTD */
5920
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005921 newDtd->keepProcessing = oldDtd->keepProcessing;
5922 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005923 newDtd->standalone = oldDtd->standalone;
5924
5925 /* Don't want deep copying for scaffolding */
5926 newDtd->in_eldecl = oldDtd->in_eldecl;
5927 newDtd->scaffold = oldDtd->scaffold;
5928 newDtd->contentStringLen = oldDtd->contentStringLen;
5929 newDtd->scaffSize = oldDtd->scaffSize;
5930 newDtd->scaffLevel = oldDtd->scaffLevel;
5931 newDtd->scaffIndex = oldDtd->scaffIndex;
5932
5933 return 1;
5934} /* End dtdCopy */
5935
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005936static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005937copyEntityTable(XML_Parser oldParser,
5938 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005939 STRING_POOL *newPool,
5940 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005941{
5942 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005943 const XML_Char *cachedOldBase = NULL;
5944 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005945
5946 hashTableIterInit(&iter, oldTable);
5947
5948 for (;;) {
5949 ENTITY *newE;
5950 const XML_Char *name;
5951 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
5952 if (!oldE)
5953 break;
5954 name = poolCopyString(newPool, oldE->name);
5955 if (!name)
5956 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005957 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005958 if (!newE)
5959 return 0;
5960 if (oldE->systemId) {
5961 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
5962 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005963 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005964 newE->systemId = tem;
5965 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005966 if (oldE->base == cachedOldBase)
5967 newE->base = cachedNewBase;
5968 else {
5969 cachedOldBase = oldE->base;
5970 tem = poolCopyString(newPool, cachedOldBase);
5971 if (!tem)
5972 return 0;
5973 cachedNewBase = newE->base = tem;
5974 }
5975 }
5976 if (oldE->publicId) {
5977 tem = poolCopyString(newPool, oldE->publicId);
5978 if (!tem)
5979 return 0;
5980 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005981 }
5982 }
5983 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005984 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
5985 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005986 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005987 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005988 newE->textPtr = tem;
5989 newE->textLen = oldE->textLen;
5990 }
5991 if (oldE->notation) {
5992 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
5993 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005994 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005995 newE->notation = tem;
5996 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005997 newE->is_param = oldE->is_param;
5998 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005999 }
6000 return 1;
6001}
6002
Fred Drake08317ae2003-10-21 15:38:55 +00006003#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006004
Fred Drake08317ae2003-10-21 15:38:55 +00006005static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006006keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006007{
6008 for (; *s1 == *s2; s1++, s2++)
6009 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006010 return XML_TRUE;
6011 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006012}
6013
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006014static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006015hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006016{
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006017 unsigned long h = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006018 while (*s)
Fred Drake08317ae2003-10-21 15:38:55 +00006019 h = CHAR_HASH(h, *s++);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006020 return h;
6021}
6022
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006023static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006024lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006025{
6026 size_t i;
6027 if (table->size == 0) {
6028 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006029 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006030 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006031 table->power = INIT_POWER;
6032 /* table->size is a power of 2 */
6033 table->size = (size_t)1 << INIT_POWER;
6034 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006035 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006036 if (!table->v) {
6037 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006038 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006039 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006040 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006041 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006042 }
6043 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006044 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006045 unsigned long mask = (unsigned long)table->size - 1;
6046 unsigned char step = 0;
6047 i = h & mask;
6048 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006049 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006050 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006051 if (!step)
6052 step = PROBE_STEP(h, mask, table->power);
6053 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006054 }
6055 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006056 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006057
6058 /* check for overflow (table is half full) */
6059 if (table->used >> (table->power - 1)) {
6060 unsigned char newPower = table->power + 1;
6061 size_t newSize = (size_t)1 << newPower;
6062 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006063 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006064 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006065 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006066 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006067 memset(newV, 0, tsize);
6068 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006069 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006070 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006071 size_t j = newHash & newMask;
6072 step = 0;
6073 while (newV[j]) {
6074 if (!step)
6075 step = PROBE_STEP(newHash, newMask, newPower);
6076 j < step ? (j += newSize - step) : (j -= step);
6077 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006078 newV[j] = table->v[i];
6079 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006080 table->mem->free_fcn(table->v);
6081 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006082 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006083 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006084 i = h & newMask;
6085 step = 0;
6086 while (table->v[i]) {
6087 if (!step)
6088 step = PROBE_STEP(h, newMask, newPower);
6089 i < step ? (i += newSize - step) : (i -= step);
6090 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006091 }
6092 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006093 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006094 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006095 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006096 memset(table->v[i], 0, createSize);
6097 table->v[i]->name = name;
6098 (table->used)++;
6099 return table->v[i];
6100}
6101
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006102static void FASTCALL
6103hashTableClear(HASH_TABLE *table)
6104{
6105 size_t i;
6106 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006107 table->mem->free_fcn(table->v[i]);
6108 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006109 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006110 table->used = 0;
6111}
6112
6113static void FASTCALL
6114hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006115{
6116 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006117 for (i = 0; i < table->size; i++)
6118 table->mem->free_fcn(table->v[i]);
6119 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006120}
6121
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006122static void FASTCALL
6123hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006124{
Fred Drake08317ae2003-10-21 15:38:55 +00006125 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006126 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006127 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006128 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006129 p->mem = ms;
6130}
6131
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006132static void FASTCALL
6133hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006134{
6135 iter->p = table->v;
6136 iter->end = iter->p + table->size;
6137}
6138
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006139static NAMED * FASTCALL
6140hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006141{
6142 while (iter->p != iter->end) {
6143 NAMED *tem = *(iter->p)++;
6144 if (tem)
6145 return tem;
6146 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006147 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006148}
6149
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006150static void FASTCALL
6151poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006152{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006153 pool->blocks = NULL;
6154 pool->freeBlocks = NULL;
6155 pool->start = NULL;
6156 pool->ptr = NULL;
6157 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006158 pool->mem = ms;
6159}
6160
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006161static void FASTCALL
6162poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006163{
6164 if (!pool->freeBlocks)
6165 pool->freeBlocks = pool->blocks;
6166 else {
6167 BLOCK *p = pool->blocks;
6168 while (p) {
6169 BLOCK *tem = p->next;
6170 p->next = pool->freeBlocks;
6171 pool->freeBlocks = p;
6172 p = tem;
6173 }
6174 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006175 pool->blocks = NULL;
6176 pool->start = NULL;
6177 pool->ptr = NULL;
6178 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006179}
6180
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006181static void FASTCALL
6182poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006183{
6184 BLOCK *p = pool->blocks;
6185 while (p) {
6186 BLOCK *tem = p->next;
6187 pool->mem->free_fcn(p);
6188 p = tem;
6189 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006190 p = pool->freeBlocks;
6191 while (p) {
6192 BLOCK *tem = p->next;
6193 pool->mem->free_fcn(p);
6194 p = tem;
6195 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006196}
6197
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006198static XML_Char *
6199poolAppend(STRING_POOL *pool, const ENCODING *enc,
6200 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006201{
6202 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006203 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006204 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006205 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6206 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006207 break;
6208 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006209 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006210 }
6211 return pool->start;
6212}
6213
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006214static const XML_Char * FASTCALL
6215poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006216{
6217 do {
6218 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006219 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006220 } while (*s++);
6221 s = pool->start;
6222 poolFinish(pool);
6223 return s;
6224}
6225
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006226static const XML_Char *
6227poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006228{
6229 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006230 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006231 for (; n > 0; --n, s++) {
6232 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006233 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006234 }
6235 s = pool->start;
6236 poolFinish(pool);
6237 return s;
6238}
6239
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006240static const XML_Char * FASTCALL
6241poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006242{
6243 while (*s) {
6244 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006245 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006246 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006247 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006248 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006249}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006250
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006251static XML_Char *
6252poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6253 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006254{
6255 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006256 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006257 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006258 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006259 *(pool->ptr)++ = 0;
6260 return pool->start;
6261}
6262
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006263static XML_Bool FASTCALL
6264poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006265{
6266 if (pool->freeBlocks) {
6267 if (pool->start == 0) {
6268 pool->blocks = pool->freeBlocks;
6269 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006270 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006271 pool->start = pool->blocks->s;
6272 pool->end = pool->start + pool->blocks->size;
6273 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006274 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006275 }
6276 if (pool->end - pool->start < pool->freeBlocks->size) {
6277 BLOCK *tem = pool->freeBlocks->next;
6278 pool->freeBlocks->next = pool->blocks;
6279 pool->blocks = pool->freeBlocks;
6280 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006281 memcpy(pool->blocks->s, pool->start,
6282 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006283 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6284 pool->start = pool->blocks->s;
6285 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006286 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006287 }
6288 }
6289 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006290 BLOCK *temp;
6291 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
6292
6293 if (blockSize < 0)
6294 return XML_FALSE;
6295
6296 temp = (BLOCK *)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006297 pool->mem->realloc_fcn(pool->blocks,
Fred Drake08317ae2003-10-21 15:38:55 +00006298 (offsetof(BLOCK, s)
6299 + blockSize * sizeof(XML_Char)));
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006300 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006301 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006302 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006303 pool->blocks->size = blockSize;
6304 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6305 pool->start = pool->blocks->s;
6306 pool->end = pool->start + blockSize;
6307 }
6308 else {
6309 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006310 int blockSize = (int)(pool->end - pool->start);
Victor Stinner23ec4b52017-06-15 00:54:36 +02006311
6312 if (blockSize < 0)
6313 return XML_FALSE;
6314
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006315 if (blockSize < INIT_BLOCK_SIZE)
6316 blockSize = INIT_BLOCK_SIZE;
6317 else
6318 blockSize *= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006319 tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s)
Fred Drake08317ae2003-10-21 15:38:55 +00006320 + blockSize * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006321 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006322 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006323 tem->size = blockSize;
6324 tem->next = pool->blocks;
6325 pool->blocks = tem;
6326 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006327 memcpy(tem->s, pool->start,
6328 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006329 pool->ptr = tem->s + (pool->ptr - pool->start);
6330 pool->start = tem->s;
6331 pool->end = tem->s + blockSize;
6332 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006333 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006334}
6335
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006336static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006337nextScaffoldPart(XML_Parser parser)
6338{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006339 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006340 CONTENT_SCAFFOLD * me;
6341 int next;
6342
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006343 if (!dtd->scaffIndex) {
6344 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
6345 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006346 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006347 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006348 }
6349
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006350 if (dtd->scaffCount >= dtd->scaffSize) {
6351 CONTENT_SCAFFOLD *temp;
6352 if (dtd->scaffold) {
6353 temp = (CONTENT_SCAFFOLD *)
6354 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
6355 if (temp == NULL)
6356 return -1;
6357 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006358 }
6359 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006360 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
6361 * sizeof(CONTENT_SCAFFOLD));
6362 if (temp == NULL)
6363 return -1;
6364 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006365 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006366 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006367 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006368 next = dtd->scaffCount++;
6369 me = &dtd->scaffold[next];
6370 if (dtd->scaffLevel) {
6371 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006372 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006373 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006374 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006375 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006376 parent->firstchild = next;
6377 parent->lastchild = next;
6378 parent->childcnt++;
6379 }
6380 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6381 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006382}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006383
6384static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006385build_node(XML_Parser parser,
6386 int src_node,
6387 XML_Content *dest,
6388 XML_Content **contpos,
6389 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006390{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006391 DTD * const dtd = _dtd; /* save one level of indirection */
6392 dest->type = dtd->scaffold[src_node].type;
6393 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006394 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006395 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006396 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006397 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006398 for (;;) {
6399 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006400 if (!*src)
6401 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006402 src++;
6403 }
6404 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006405 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006406 }
6407 else {
6408 unsigned int i;
6409 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006410 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006411 dest->children = *contpos;
6412 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006413 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
6414 i < dest->numchildren;
6415 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006416 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6417 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006418 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006419 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006420}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006421
6422static XML_Content *
6423build_model (XML_Parser parser)
6424{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006425 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006426 XML_Content *ret;
6427 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006428 XML_Char * str;
6429 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6430 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006431
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006432 ret = (XML_Content *)MALLOC(allocsize);
6433 if (!ret)
6434 return NULL;
6435
6436 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006437 cpos = &ret[1];
6438
6439 build_node(parser, 0, ret, &cpos, &str);
6440 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006441}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006442
6443static ELEMENT_TYPE *
6444getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006445 const ENCODING *enc,
6446 const char *ptr,
6447 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006448{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006449 DTD * const dtd = _dtd; /* save one level of indirection */
6450 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006451 ELEMENT_TYPE *ret;
6452
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006453 if (!name)
6454 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006455 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006456 if (!ret)
6457 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006458 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006459 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006460 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006461 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006462 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006463 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006464 }
6465 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006466}