blob: 76f078e2505f90bbf78a66df14e3a7fec0f46540 [file] [log] [blame]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
Victor Stinner5ff71322017-06-21 14:39:22 +02003
4 77fea421d361dca90041d0040ecf1dca651167fadf2af79e990e35168d70d933 (2.2.1+)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005*/
6
Victor Stinner5ff71322017-06-21 14:39:22 +02007#define _GNU_SOURCE /* syscall prototype */
8
Victor Stinner23ec4b52017-06-15 00:54:36 +02009#include <stddef.h>
10#include <string.h> /* memset(), memcpy() */
11#include <assert.h>
12#include <limits.h> /* UINT_MAX */
Victor Stinner5ff71322017-06-21 14:39:22 +020013#include <stdio.h> /* fprintf */
14#include <stdlib.h> /* getenv */
Victor Stinner23ec4b52017-06-15 00:54:36 +020015
Victor Stinner5ff71322017-06-21 14:39:22 +020016#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +020017#define getpid GetCurrentProcessId
18#else
19#include <sys/time.h> /* gettimeofday() */
20#include <sys/types.h> /* getpid() */
21#include <unistd.h> /* getpid() */
22#endif
23
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070024#define XML_BUILDING_EXPAT 1
25
Victor Stinner5ff71322017-06-21 14:39:22 +020026#ifdef _WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070027#include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070028#elif defined(HAVE_EXPAT_CONFIG_H)
29#include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020030#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010031
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070032#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000033#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020034#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000035
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000036#ifdef XML_UNICODE
37#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
38#define XmlConvert XmlUtf16Convert
39#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
40#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
41#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070042/* Using pointer subtraction to convert to integer type. */
43#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000044typedef unsigned short ICHAR;
45#else
46#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
47#define XmlConvert XmlUtf8Convert
48#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
49#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
50#define XmlEncode XmlUtf8Encode
51#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
52typedef char ICHAR;
53#endif
54
55
56#ifndef XML_NS
57
58#define XmlInitEncodingNS XmlInitEncoding
59#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
60#undef XmlGetInternalEncodingNS
61#define XmlGetInternalEncodingNS XmlGetInternalEncoding
62#define XmlParseXmlDeclNS XmlParseXmlDecl
63
64#endif
65
Martin v. Löwisfc03a942003-01-25 22:41:29 +000066#ifdef XML_UNICODE
67
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000068#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +000069#define XML_T(x) (const wchar_t)x
70#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000071#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +000072#define XML_T(x) (const unsigned short)x
73#define XML_L(x) x
74#endif
75
76#else
77
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000078#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +000079#define XML_L(x) x
80
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000081#endif
82
83/* Round up n to be a multiple of sz, where sz is a power of 2. */
84#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
85
Fred Drake08317ae2003-10-21 15:38:55 +000086/* Handle the case where memmove() doesn't exist. */
87#ifndef HAVE_MEMMOVE
88#ifdef HAVE_BCOPY
89#define memmove(d,s,l) bcopy((s),(d),(l))
90#else
91#error memmove does not exist on this platform, nor is a substitute available
92#endif /* HAVE_BCOPY */
93#endif /* HAVE_MEMMOVE */
94
Martin v. Löwisfc03a942003-01-25 22:41:29 +000095#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000096#include "xmltok.h"
97#include "xmlrole.h"
98
99typedef const XML_Char *KEY;
100
101typedef struct {
102 KEY name;
103} NAMED;
104
105typedef struct {
106 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000107 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000108 size_t size;
109 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000110 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000111} HASH_TABLE;
112
Victor Stinner5ff71322017-06-21 14:39:22 +0200113static size_t
114keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000115
Victor Stinner5ff71322017-06-21 14:39:22 +0200116static void
117copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
Fred Drake08317ae2003-10-21 15:38:55 +0000118
119/* For probing (after a collision) we need a step size relative prime
120 to the hash table size, which is a power of 2. We use double-hashing,
121 since we can calculate a second hash value cheaply by taking those bits
122 of the first hash value that were discarded (masked out) when the table
123 index was calculated: index = hash & mask, where mask = table->size - 1.
124 We limit the maximum step size to table->size / 4 (mask >> 2) and make
125 it odd, since odd numbers are always relative prime to a power of 2.
126*/
127#define SECOND_HASH(hash, mask, power) \
128 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
129#define PROBE_STEP(hash, mask, power) \
130 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
131
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000132typedef struct {
133 NAMED **p;
134 NAMED **end;
135} HASH_TABLE_ITER;
136
137#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
138#define INIT_DATA_BUF_SIZE 1024
139#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000140#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000141#define INIT_BLOCK_SIZE 1024
142#define INIT_BUFFER_SIZE 1024
143
144#define EXPAND_SPARE 24
145
146typedef struct binding {
147 struct prefix *prefix;
148 struct binding *nextTagBinding;
149 struct binding *prevPrefixBinding;
150 const struct attribute_id *attId;
151 XML_Char *uri;
152 int uriLen;
153 int uriAlloc;
154} BINDING;
155
156typedef struct prefix {
157 const XML_Char *name;
158 BINDING *binding;
159} PREFIX;
160
161typedef struct {
162 const XML_Char *str;
163 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000164 const XML_Char *prefix;
165 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000166 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000167 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000168} TAG_NAME;
169
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000170/* TAG represents an open element.
171 The name of the element is stored in both the document and API
172 encodings. The memory buffer 'buf' is a separately-allocated
173 memory area which stores the name. During the XML_Parse()/
174 XMLParseBuffer() when the element is open, the memory for the 'raw'
175 version of the name (in the document encoding) is shared with the
176 document buffer. If the element is open across calls to
177 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
178 contain the 'raw' name as well.
179
180 A parser re-uses these structures, maintaining a list of allocated
181 TAG objects in a free list.
182*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000183typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000184 struct tag *parent; /* parent of this element */
185 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000186 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000187 TAG_NAME name; /* tagName in the API encoding */
188 char *buf; /* buffer for name components */
189 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000190 BINDING *bindings;
191} TAG;
192
193typedef struct {
194 const XML_Char *name;
195 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000196 int textLen; /* length in XML_Chars */
197 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000198 const XML_Char *systemId;
199 const XML_Char *base;
200 const XML_Char *publicId;
201 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000202 XML_Bool open;
203 XML_Bool is_param;
204 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000205} ENTITY;
206
207typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000208 enum XML_Content_Type type;
209 enum XML_Content_Quant quant;
210 const XML_Char * name;
211 int firstchild;
212 int lastchild;
213 int childcnt;
214 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000215} CONTENT_SCAFFOLD;
216
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000217#define INIT_SCAFFOLD_ELEMENTS 32
218
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000219typedef struct block {
220 struct block *next;
221 int size;
222 XML_Char s[1];
223} BLOCK;
224
225typedef struct {
226 BLOCK *blocks;
227 BLOCK *freeBlocks;
228 const XML_Char *end;
229 XML_Char *ptr;
230 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000231 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000232} STRING_POOL;
233
234/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000235 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000236typedef struct attribute_id {
237 XML_Char *name;
238 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000239 XML_Bool maybeTokenized;
240 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000241} ATTRIBUTE_ID;
242
243typedef struct {
244 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000245 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000246 const XML_Char *value;
247} DEFAULT_ATTRIBUTE;
248
249typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000250 unsigned long version;
251 unsigned long hash;
252 const XML_Char *uriName;
253} NS_ATT;
254
255typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000256 const XML_Char *name;
257 PREFIX *prefix;
258 const ATTRIBUTE_ID *idAtt;
259 int nDefaultAtts;
260 int allocDefaultAtts;
261 DEFAULT_ATTRIBUTE *defaultAtts;
262} ELEMENT_TYPE;
263
264typedef struct {
265 HASH_TABLE generalEntities;
266 HASH_TABLE elementTypes;
267 HASH_TABLE attributeIds;
268 HASH_TABLE prefixes;
269 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000270 STRING_POOL entityValuePool;
271 /* false once a parameter entity reference has been skipped */
272 XML_Bool keepProcessing;
273 /* true once an internal or external PE reference has been encountered;
274 this includes the reference to an external subset */
275 XML_Bool hasParamEntityRefs;
276 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000277#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000278 /* indicates if external PE has been read */
279 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000280 HASH_TABLE paramEntities;
281#endif /* XML_DTD */
282 PREFIX defaultPrefix;
283 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000284 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000285 CONTENT_SCAFFOLD *scaffold;
286 unsigned contentStringLen;
287 unsigned scaffSize;
288 unsigned scaffCount;
289 int scaffLevel;
290 int *scaffIndex;
291} DTD;
292
293typedef struct open_internal_entity {
294 const char *internalEventPtr;
295 const char *internalEventEndPtr;
296 struct open_internal_entity *next;
297 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000298 int startTagLevel;
299 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000300} OPEN_INTERNAL_ENTITY;
301
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000302typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
303 const char *start,
304 const char *end,
305 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000306
307static Processor prologProcessor;
308static Processor prologInitProcessor;
309static Processor contentProcessor;
310static Processor cdataSectionProcessor;
311#ifdef XML_DTD
312static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000313static Processor externalParEntProcessor;
314static Processor externalParEntInitProcessor;
315static Processor entityValueProcessor;
316static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000317#endif /* XML_DTD */
318static Processor epilogProcessor;
319static Processor errorProcessor;
320static Processor externalEntityInitProcessor;
321static Processor externalEntityInitProcessor2;
322static Processor externalEntityInitProcessor3;
323static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000324static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000325
326static enum XML_Error
327handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
328static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000329processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000330 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000331static enum XML_Error
332initializeEncoding(XML_Parser parser);
333static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700334doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
335 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000336 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000337static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700338processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000339 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000340static enum XML_Error
341doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700342 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000343 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000344static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000345doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000346 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000347#ifdef XML_DTD
348static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000349doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000350 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000351#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000352
Victor Stinner5ff71322017-06-21 14:39:22 +0200353static void
354freeBindings(XML_Parser parser, BINDING *bindings);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000355static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000356storeAtts(XML_Parser parser, const ENCODING *, const char *s,
357 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000358static enum XML_Error
359addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
360 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000361static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700362defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000363 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000364static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000365storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
366 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000367static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000368appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
369 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000370static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000371getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
372 const char *end);
373static int
374setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000375static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000376storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
377 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000378static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000379reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
380 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000381static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000382reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
383 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000384static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000385reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
386 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000387
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000388static const XML_Char * getContext(XML_Parser parser);
389static XML_Bool
390setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000391
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000392static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000393
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000394static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
395/* do not call if parentParser != NULL */
396static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
397static void
398dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
399static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700400dtdCopy(XML_Parser oldParser,
401 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000402static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700403copyEntityTable(XML_Parser oldParser,
404 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000405static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700406lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000407static void FASTCALL
408hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
409static void FASTCALL hashTableClear(HASH_TABLE *);
410static void FASTCALL hashTableDestroy(HASH_TABLE *);
411static void FASTCALL
412hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
413static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000414
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000415static void FASTCALL
416poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
417static void FASTCALL poolClear(STRING_POOL *);
418static void FASTCALL poolDestroy(STRING_POOL *);
419static XML_Char *
420poolAppend(STRING_POOL *pool, const ENCODING *enc,
421 const char *ptr, const char *end);
422static XML_Char *
423poolStoreString(STRING_POOL *pool, const ENCODING *enc,
424 const char *ptr, const char *end);
425static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
426static const XML_Char * FASTCALL
427poolCopyString(STRING_POOL *pool, const XML_Char *s);
428static const XML_Char *
429poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
430static const XML_Char * FASTCALL
431poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000432
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000433static int FASTCALL nextScaffoldPart(XML_Parser parser);
434static XML_Content * build_model(XML_Parser parser);
435static ELEMENT_TYPE *
436getElementType(XML_Parser parser, const ENCODING *enc,
437 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000438
Victor Stinner23ec4b52017-06-15 00:54:36 +0200439static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700440static XML_Bool startParsing(XML_Parser parser);
441
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000442static XML_Parser
443parserCreate(const XML_Char *encodingName,
444 const XML_Memory_Handling_Suite *memsuite,
445 const XML_Char *nameSep,
446 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700447
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000448static void
449parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000450
451#define poolStart(pool) ((pool)->start)
452#define poolEnd(pool) ((pool)->ptr)
453#define poolLength(pool) ((pool)->ptr - (pool)->start)
454#define poolChop(pool) ((void)--(pool->ptr))
455#define poolLastChar(pool) (((pool)->ptr)[-1])
456#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
457#define poolFinish(pool) ((pool)->start = (pool)->ptr)
458#define poolAppendChar(pool, c) \
459 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
460 ? 0 \
461 : ((*((pool)->ptr)++ = c), 1))
462
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000463struct XML_ParserStruct {
464 /* The first member must be userData so that the XML_GetUserData
465 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000466 void *m_userData;
467 void *m_handlerArg;
468 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000469 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000470 /* first character to be parsed */
471 const char *m_bufferPtr;
472 /* past last character to be parsed */
473 char *m_bufferEnd;
474 /* allocated end of buffer */
475 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000476 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000477 const char *m_parseEndPtr;
478 XML_Char *m_dataBuf;
479 XML_Char *m_dataBufEnd;
480 XML_StartElementHandler m_startElementHandler;
481 XML_EndElementHandler m_endElementHandler;
482 XML_CharacterDataHandler m_characterDataHandler;
483 XML_ProcessingInstructionHandler m_processingInstructionHandler;
484 XML_CommentHandler m_commentHandler;
485 XML_StartCdataSectionHandler m_startCdataSectionHandler;
486 XML_EndCdataSectionHandler m_endCdataSectionHandler;
487 XML_DefaultHandler m_defaultHandler;
488 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
489 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
490 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
491 XML_NotationDeclHandler m_notationDeclHandler;
492 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
493 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
494 XML_NotStandaloneHandler m_notStandaloneHandler;
495 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000496 XML_Parser m_externalEntityRefHandlerArg;
497 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000498 XML_UnknownEncodingHandler m_unknownEncodingHandler;
499 XML_ElementDeclHandler m_elementDeclHandler;
500 XML_AttlistDeclHandler m_attlistDeclHandler;
501 XML_EntityDeclHandler m_entityDeclHandler;
502 XML_XmlDeclHandler m_xmlDeclHandler;
503 const ENCODING *m_encoding;
504 INIT_ENCODING m_initEncoding;
505 const ENCODING *m_internalEncoding;
506 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000507 XML_Bool m_ns;
508 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000509 void *m_unknownEncodingMem;
510 void *m_unknownEncodingData;
511 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000512 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000513 PROLOG_STATE m_prologState;
514 Processor *m_processor;
515 enum XML_Error m_errorCode;
516 const char *m_eventPtr;
517 const char *m_eventEndPtr;
518 const char *m_positionPtr;
519 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000520 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000521 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000522 int m_tagLevel;
523 ENTITY *m_declEntity;
524 const XML_Char *m_doctypeName;
525 const XML_Char *m_doctypeSysid;
526 const XML_Char *m_doctypePubid;
527 const XML_Char *m_declAttributeType;
528 const XML_Char *m_declNotationName;
529 const XML_Char *m_declNotationPublicId;
530 ELEMENT_TYPE *m_declElementType;
531 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000532 XML_Bool m_declAttributeIsCdata;
533 XML_Bool m_declAttributeIsId;
534 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000535 const XML_Char *m_curBase;
536 TAG *m_tagStack;
537 TAG *m_freeTagList;
538 BINDING *m_inheritedBindings;
539 BINDING *m_freeBindingList;
540 int m_attsSize;
541 int m_nSpecifiedAtts;
542 int m_idAttIndex;
543 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000544 NS_ATT *m_nsAtts;
545 unsigned long m_nsAttsVersion;
546 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700547#ifdef XML_ATTR_INFO
548 XML_AttrInfo *m_attInfo;
549#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000550 POSITION m_position;
551 STRING_POOL m_tempPool;
552 STRING_POOL m_temp2Pool;
553 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000554 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000555 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000556 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000557 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000558#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000559 XML_Bool m_isParamEntity;
560 XML_Bool m_useForeignDTD;
561 enum XML_ParamEntityParsing m_paramEntityParsing;
562#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700563 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000564};
565
566#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
567#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
568#define FREE(p) (parser->m_mem.free_fcn((p)))
569
570#define userData (parser->m_userData)
571#define handlerArg (parser->m_handlerArg)
572#define startElementHandler (parser->m_startElementHandler)
573#define endElementHandler (parser->m_endElementHandler)
574#define characterDataHandler (parser->m_characterDataHandler)
575#define processingInstructionHandler \
576 (parser->m_processingInstructionHandler)
577#define commentHandler (parser->m_commentHandler)
578#define startCdataSectionHandler \
579 (parser->m_startCdataSectionHandler)
580#define endCdataSectionHandler (parser->m_endCdataSectionHandler)
581#define defaultHandler (parser->m_defaultHandler)
582#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
583#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
584#define unparsedEntityDeclHandler \
585 (parser->m_unparsedEntityDeclHandler)
586#define notationDeclHandler (parser->m_notationDeclHandler)
587#define startNamespaceDeclHandler \
588 (parser->m_startNamespaceDeclHandler)
589#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
590#define notStandaloneHandler (parser->m_notStandaloneHandler)
591#define externalEntityRefHandler \
592 (parser->m_externalEntityRefHandler)
593#define externalEntityRefHandlerArg \
594 (parser->m_externalEntityRefHandlerArg)
595#define internalEntityRefHandler \
596 (parser->m_internalEntityRefHandler)
597#define skippedEntityHandler (parser->m_skippedEntityHandler)
598#define unknownEncodingHandler (parser->m_unknownEncodingHandler)
599#define elementDeclHandler (parser->m_elementDeclHandler)
600#define attlistDeclHandler (parser->m_attlistDeclHandler)
601#define entityDeclHandler (parser->m_entityDeclHandler)
602#define xmlDeclHandler (parser->m_xmlDeclHandler)
603#define encoding (parser->m_encoding)
604#define initEncoding (parser->m_initEncoding)
605#define internalEncoding (parser->m_internalEncoding)
606#define unknownEncodingMem (parser->m_unknownEncodingMem)
607#define unknownEncodingData (parser->m_unknownEncodingData)
608#define unknownEncodingHandlerData \
609 (parser->m_unknownEncodingHandlerData)
610#define unknownEncodingRelease (parser->m_unknownEncodingRelease)
611#define protocolEncodingName (parser->m_protocolEncodingName)
612#define ns (parser->m_ns)
613#define ns_triplets (parser->m_ns_triplets)
614#define prologState (parser->m_prologState)
615#define processor (parser->m_processor)
616#define errorCode (parser->m_errorCode)
617#define eventPtr (parser->m_eventPtr)
618#define eventEndPtr (parser->m_eventEndPtr)
619#define positionPtr (parser->m_positionPtr)
620#define position (parser->m_position)
621#define openInternalEntities (parser->m_openInternalEntities)
Fred Drake31d485c2004-08-03 07:06:22 +0000622#define freeInternalEntities (parser->m_freeInternalEntities)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000623#define defaultExpandInternalEntities \
624 (parser->m_defaultExpandInternalEntities)
625#define tagLevel (parser->m_tagLevel)
626#define buffer (parser->m_buffer)
627#define bufferPtr (parser->m_bufferPtr)
628#define bufferEnd (parser->m_bufferEnd)
629#define parseEndByteIndex (parser->m_parseEndByteIndex)
630#define parseEndPtr (parser->m_parseEndPtr)
631#define bufferLim (parser->m_bufferLim)
632#define dataBuf (parser->m_dataBuf)
633#define dataBufEnd (parser->m_dataBufEnd)
634#define _dtd (parser->m_dtd)
635#define curBase (parser->m_curBase)
636#define declEntity (parser->m_declEntity)
637#define doctypeName (parser->m_doctypeName)
638#define doctypeSysid (parser->m_doctypeSysid)
639#define doctypePubid (parser->m_doctypePubid)
640#define declAttributeType (parser->m_declAttributeType)
641#define declNotationName (parser->m_declNotationName)
642#define declNotationPublicId (parser->m_declNotationPublicId)
643#define declElementType (parser->m_declElementType)
644#define declAttributeId (parser->m_declAttributeId)
645#define declAttributeIsCdata (parser->m_declAttributeIsCdata)
646#define declAttributeIsId (parser->m_declAttributeIsId)
647#define freeTagList (parser->m_freeTagList)
648#define freeBindingList (parser->m_freeBindingList)
649#define inheritedBindings (parser->m_inheritedBindings)
650#define tagStack (parser->m_tagStack)
651#define atts (parser->m_atts)
652#define attsSize (parser->m_attsSize)
653#define nSpecifiedAtts (parser->m_nSpecifiedAtts)
654#define idAttIndex (parser->m_idAttIndex)
Fred Drake08317ae2003-10-21 15:38:55 +0000655#define nsAtts (parser->m_nsAtts)
656#define nsAttsVersion (parser->m_nsAttsVersion)
657#define nsAttsPower (parser->m_nsAttsPower)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700658#define attInfo (parser->m_attInfo)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000659#define tempPool (parser->m_tempPool)
660#define temp2Pool (parser->m_temp2Pool)
661#define groupConnector (parser->m_groupConnector)
662#define groupSize (parser->m_groupSize)
663#define namespaceSeparator (parser->m_namespaceSeparator)
664#define parentParser (parser->m_parentParser)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000665#define ps_parsing (parser->m_parsingStatus.parsing)
666#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000667#ifdef XML_DTD
668#define isParamEntity (parser->m_isParamEntity)
669#define useForeignDTD (parser->m_useForeignDTD)
670#define paramEntityParsing (parser->m_paramEntityParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000671#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700672#define hash_secret_salt (parser->m_hash_secret_salt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000673
Fred Drake08317ae2003-10-21 15:38:55 +0000674XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000675XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000676{
677 return XML_ParserCreate_MM(encodingName, NULL, NULL);
678}
679
Fred Drake08317ae2003-10-21 15:38:55 +0000680XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000681XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000682{
683 XML_Char tmp[2];
684 *tmp = nsSep;
685 return XML_ParserCreate_MM(encodingName, NULL, tmp);
686}
687
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000688static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700689 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
690 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
691 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
692 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
693 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
694 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000695};
696
Victor Stinner5ff71322017-06-21 14:39:22 +0200697
698#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
699# include <errno.h>
700
701# if defined(HAVE_GETRANDOM)
702# include <sys/random.h> /* getrandom */
703# else
704# include <unistd.h> /* syscall */
705# include <sys/syscall.h> /* SYS_getrandom */
706# endif
707
708/* Obtain entropy on Linux 3.17+ */
709static int
710writeRandomBytes_getrandom(void * target, size_t count) {
711 int success = 0; /* full count bytes written? */
712 size_t bytesWrittenTotal = 0;
713 const unsigned int getrandomFlags = 0;
714
715 do {
716 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
717 const size_t bytesToWrite = count - bytesWrittenTotal;
718
719 const int bytesWrittenMore =
720#if defined(HAVE_GETRANDOM)
721 getrandom(currentTarget, bytesToWrite, getrandomFlags);
722#else
723 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
724#endif
725
726 if (bytesWrittenMore > 0) {
727 bytesWrittenTotal += bytesWrittenMore;
728 if (bytesWrittenTotal >= count)
729 success = 1;
730 }
731 } while (! success && (errno == EINTR || errno == EAGAIN));
732
733 return success;
734}
735
736#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
737
738
739#ifdef _WIN32
740
741typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
742
743/* Obtain entropy on Windows XP / Windows Server 2003 and later.
744 * Hint on RtlGenRandom and the following article from libsodioum.
745 *
746 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
747 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
748 */
749static int
750writeRandomBytes_RtlGenRandom(void * target, size_t count) {
751 int success = 0; /* full count bytes written? */
752 const HMODULE advapi32 = LoadLibrary("ADVAPI32.DLL");
753
754 if (advapi32) {
755 const RTLGENRANDOM_FUNC RtlGenRandom
756 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
757 if (RtlGenRandom) {
758 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
759 success = 1;
760 }
761 }
762 FreeLibrary(advapi32);
763 }
764
765 return success;
766}
767
768#endif /* _WIN32 */
769
770
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700771static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200772gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000773{
Victor Stinner5ff71322017-06-21 14:39:22 +0200774#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200775 FILETIME ft;
776 GetSystemTimeAsFileTime(&ft); /* never fails */
777 return ft.dwHighDateTime ^ ft.dwLowDateTime;
778#else
779 struct timeval tv;
780 int gettimeofday_res;
781
782 gettimeofday_res = gettimeofday(&tv, NULL);
783 assert (gettimeofday_res == 0);
784
785 /* Microseconds time is <20 bits entropy */
786 return tv.tv_usec;
787#endif
788}
789
Victor Stinner5ff71322017-06-21 14:39:22 +0200790#if defined(HAVE_ARC4RANDOM_BUF) && defined(HAVE_LIBBSD)
791# include <bsd/stdlib.h>
792#endif
793
794static unsigned long
795ENTROPY_DEBUG(const char * label, unsigned long entropy) {
796 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
797 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
798 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
799 label,
800 (int)sizeof(entropy) * 2, entropy,
801 (unsigned long)sizeof(entropy));
802 }
803 return entropy;
804}
805
Victor Stinner23ec4b52017-06-15 00:54:36 +0200806static unsigned long
807generate_hash_secret_salt(XML_Parser parser)
808{
Victor Stinner5ff71322017-06-21 14:39:22 +0200809 unsigned long entropy;
810 (void)parser;
811#if defined(HAVE_ARC4RANDOM_BUF) || defined(__CloudABI__)
812 (void)gather_time_entropy;
813 arc4random_buf(&entropy, sizeof(entropy));
814 return ENTROPY_DEBUG("arc4random_buf", entropy);
815#else
816 /* Try high quality providers first .. */
817#ifdef _WIN32
818 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
819 return ENTROPY_DEBUG("RtlGenRandom", entropy);
820 }
821#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
822 if (writeRandomBytes_getrandom((void *)&entropy, sizeof(entropy))) {
823 return ENTROPY_DEBUG("getrandom", entropy);
824 }
825#endif
826 /* .. and self-made low quality for backup: */
827
828 /* Process ID is 0 bits entropy if attacker has local access */
829 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200830
831 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
832 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200833 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200834 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200835 return ENTROPY_DEBUG("fallback(8)",
836 entropy * (unsigned long)2305843009213693951);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200837 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200838#endif
839}
840
841static unsigned long
842get_hash_secret_salt(XML_Parser parser) {
843 if (parser->m_parentParser != NULL)
844 return get_hash_secret_salt(parser->m_parentParser);
845 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700846}
847
848static XML_Bool /* only valid for root parser */
849startParsing(XML_Parser parser)
850{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700851 /* hash functions must be initialized before setContext() is called */
852 if (hash_secret_salt == 0)
Victor Stinner23ec4b52017-06-15 00:54:36 +0200853 hash_secret_salt = generate_hash_secret_salt(parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700854 if (ns) {
855 /* implicit context only set for root parser, since child
856 parsers (i.e. external entity parsers) will inherit it
857 */
858 return setContext(parser, implicitContext);
859 }
860 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700861}
862
863XML_Parser XMLCALL
864XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700865 const XML_Memory_Handling_Suite *memsuite,
866 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700867{
868 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000869}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000870
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000871static XML_Parser
872parserCreate(const XML_Char *encodingName,
873 const XML_Memory_Handling_Suite *memsuite,
874 const XML_Char *nameSep,
875 DTD *dtd)
876{
877 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000878
879 if (memsuite) {
880 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000881 parser = (XML_Parser)
882 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
883 if (parser != NULL) {
884 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
885 mtemp->malloc_fcn = memsuite->malloc_fcn;
886 mtemp->realloc_fcn = memsuite->realloc_fcn;
887 mtemp->free_fcn = memsuite->free_fcn;
888 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000889 }
890 else {
891 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000892 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
893 if (parser != NULL) {
894 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
895 mtemp->malloc_fcn = malloc;
896 mtemp->realloc_fcn = realloc;
897 mtemp->free_fcn = free;
898 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000899 }
900
901 if (!parser)
902 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000903
904 buffer = NULL;
905 bufferLim = NULL;
906
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000907 attsSize = INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000908 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
909 if (atts == NULL) {
910 FREE(parser);
911 return NULL;
912 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700913#ifdef XML_ATTR_INFO
914 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
915 if (attInfo == NULL) {
916 FREE(atts);
917 FREE(parser);
918 return NULL;
919 }
920#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000921 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
922 if (dataBuf == NULL) {
923 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700924#ifdef XML_ATTR_INFO
925 FREE(attInfo);
926#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000927 FREE(parser);
928 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000929 }
930 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
931
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000932 if (dtd)
933 _dtd = dtd;
934 else {
935 _dtd = dtdCreate(&parser->m_mem);
936 if (_dtd == NULL) {
937 FREE(dataBuf);
938 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700939#ifdef XML_ATTR_INFO
940 FREE(attInfo);
941#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000942 FREE(parser);
943 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000944 }
945 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000946
947 freeBindingList = NULL;
948 freeTagList = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +0000949 freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000950
951 groupSize = 0;
952 groupConnector = NULL;
953
954 unknownEncodingHandler = NULL;
955 unknownEncodingHandlerData = NULL;
956
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700957 namespaceSeparator = ASCII_EXCL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000958 ns = XML_FALSE;
959 ns_triplets = XML_FALSE;
960
Fred Drake08317ae2003-10-21 15:38:55 +0000961 nsAtts = NULL;
962 nsAttsVersion = 0;
963 nsAttsPower = 0;
964
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000965 poolInit(&tempPool, &(parser->m_mem));
966 poolInit(&temp2Pool, &(parser->m_mem));
967 parserInit(parser, encodingName);
968
969 if (encodingName && !protocolEncodingName) {
970 XML_ParserFree(parser);
971 return NULL;
972 }
973
974 if (nameSep) {
975 ns = XML_TRUE;
976 internalEncoding = XmlGetInternalEncodingNS();
977 namespaceSeparator = *nameSep;
978 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000979 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000980 internalEncoding = XmlGetInternalEncoding();
981 }
982
983 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000984}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000985
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000986static void
987parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000988{
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000989 processor = prologInitProcessor;
990 XmlPrologStateInit(&prologState);
991 protocolEncodingName = (encodingName != NULL
992 ? poolCopyString(&tempPool, encodingName)
993 : NULL);
994 curBase = NULL;
995 XmlInitEncoding(&initEncoding, &encoding, 0);
996 userData = NULL;
997 handlerArg = NULL;
998 startElementHandler = NULL;
999 endElementHandler = NULL;
1000 characterDataHandler = NULL;
1001 processingInstructionHandler = NULL;
1002 commentHandler = NULL;
1003 startCdataSectionHandler = NULL;
1004 endCdataSectionHandler = NULL;
1005 defaultHandler = NULL;
1006 startDoctypeDeclHandler = NULL;
1007 endDoctypeDeclHandler = NULL;
1008 unparsedEntityDeclHandler = NULL;
1009 notationDeclHandler = NULL;
1010 startNamespaceDeclHandler = NULL;
1011 endNamespaceDeclHandler = NULL;
1012 notStandaloneHandler = NULL;
1013 externalEntityRefHandler = NULL;
1014 externalEntityRefHandlerArg = parser;
1015 skippedEntityHandler = NULL;
1016 elementDeclHandler = NULL;
1017 attlistDeclHandler = NULL;
1018 entityDeclHandler = NULL;
1019 xmlDeclHandler = NULL;
1020 bufferPtr = buffer;
1021 bufferEnd = buffer;
1022 parseEndByteIndex = 0;
1023 parseEndPtr = NULL;
1024 declElementType = NULL;
1025 declAttributeId = NULL;
1026 declEntity = NULL;
1027 doctypeName = NULL;
1028 doctypeSysid = NULL;
1029 doctypePubid = NULL;
1030 declAttributeType = NULL;
1031 declNotationName = NULL;
1032 declNotationPublicId = NULL;
1033 declAttributeIsCdata = XML_FALSE;
1034 declAttributeIsId = XML_FALSE;
1035 memset(&position, 0, sizeof(POSITION));
1036 errorCode = XML_ERROR_NONE;
1037 eventPtr = NULL;
1038 eventEndPtr = NULL;
1039 positionPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001040 openInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001041 defaultExpandInternalEntities = XML_TRUE;
1042 tagLevel = 0;
1043 tagStack = NULL;
1044 inheritedBindings = NULL;
1045 nSpecifiedAtts = 0;
1046 unknownEncodingMem = NULL;
1047 unknownEncodingRelease = NULL;
1048 unknownEncodingData = NULL;
1049 parentParser = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001050 ps_parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001051#ifdef XML_DTD
1052 isParamEntity = XML_FALSE;
1053 useForeignDTD = XML_FALSE;
1054 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1055#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001056 hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001057}
1058
1059/* moves list of bindings to freeBindingList */
1060static void FASTCALL
1061moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1062{
1063 while (bindings) {
1064 BINDING *b = bindings;
1065 bindings = bindings->nextTagBinding;
1066 b->nextTagBinding = freeBindingList;
1067 freeBindingList = b;
1068 }
1069}
1070
Fred Drake08317ae2003-10-21 15:38:55 +00001071XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001072XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1073{
1074 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001075 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001076
1077 if (parser == NULL)
1078 return XML_FALSE;
1079
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001080 if (parentParser)
1081 return XML_FALSE;
1082 /* move tagStack to freeTagList */
1083 tStk = tagStack;
1084 while (tStk) {
1085 TAG *tag = tStk;
1086 tStk = tStk->parent;
1087 tag->parent = freeTagList;
1088 moveToFreeBindingList(parser, tag->bindings);
1089 tag->bindings = NULL;
1090 freeTagList = tag;
1091 }
Fred Drake31d485c2004-08-03 07:06:22 +00001092 /* move openInternalEntities to freeInternalEntities */
1093 openEntityList = openInternalEntities;
1094 while (openEntityList) {
1095 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1096 openEntityList = openEntity->next;
1097 openEntity->next = freeInternalEntities;
1098 freeInternalEntities = openEntity;
1099 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001100 moveToFreeBindingList(parser, inheritedBindings);
Fred Drake08317ae2003-10-21 15:38:55 +00001101 FREE(unknownEncodingMem);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001102 if (unknownEncodingRelease)
1103 unknownEncodingRelease(unknownEncodingData);
1104 poolClear(&tempPool);
1105 poolClear(&temp2Pool);
1106 parserInit(parser, encodingName);
1107 dtdReset(_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001108 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001109}
1110
Fred Drake08317ae2003-10-21 15:38:55 +00001111enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001112XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1113{
Victor Stinner5ff71322017-06-21 14:39:22 +02001114 if (parser == NULL)
1115 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001116 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1117 XXX There's no way for the caller to determine which of the
1118 XXX possible error cases caused the XML_STATUS_ERROR return.
1119 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001120 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001121 return XML_STATUS_ERROR;
1122 if (encodingName == NULL)
1123 protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001124 else {
1125 protocolEncodingName = poolCopyString(&tempPool, encodingName);
1126 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001127 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001128 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001129 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001130}
1131
Fred Drake08317ae2003-10-21 15:38:55 +00001132XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001133XML_ExternalEntityParserCreate(XML_Parser oldParser,
1134 const XML_Char *context,
1135 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001136{
1137 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001138 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001139 DTD *oldDtd;
1140 XML_StartElementHandler oldStartElementHandler;
1141 XML_EndElementHandler oldEndElementHandler;
1142 XML_CharacterDataHandler oldCharacterDataHandler;
1143 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1144 XML_CommentHandler oldCommentHandler;
1145 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1146 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1147 XML_DefaultHandler oldDefaultHandler;
1148 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1149 XML_NotationDeclHandler oldNotationDeclHandler;
1150 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1151 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1152 XML_NotStandaloneHandler oldNotStandaloneHandler;
1153 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1154 XML_SkippedEntityHandler oldSkippedEntityHandler;
1155 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1156 XML_ElementDeclHandler oldElementDeclHandler;
1157 XML_AttlistDeclHandler oldAttlistDeclHandler;
1158 XML_EntityDeclHandler oldEntityDeclHandler;
1159 XML_XmlDeclHandler oldXmlDeclHandler;
1160 ELEMENT_TYPE * oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001161
Victor Stinner5ff71322017-06-21 14:39:22 +02001162 void *oldUserData;
1163 void *oldHandlerArg;
1164 XML_Bool oldDefaultExpandInternalEntities;
1165 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001166#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001167 enum XML_ParamEntityParsing oldParamEntityParsing;
1168 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001169#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001170 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001171 /* Note that the new parser shares the same hash secret as the old
1172 parser, so that dtdCopy and copyEntityTable can lookup values
1173 from hash tables associated with either parser without us having
1174 to worry which hash secrets each table has.
1175 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001176 unsigned long oldhash_secret_salt;
1177
1178 /* Validate the oldParser parameter before we pull everything out of it */
1179 if (oldParser == NULL)
1180 return NULL;
1181
1182 /* Stash the original parser contents on the stack */
1183 oldDtd = _dtd;
1184 oldStartElementHandler = startElementHandler;
1185 oldEndElementHandler = endElementHandler;
1186 oldCharacterDataHandler = characterDataHandler;
1187 oldProcessingInstructionHandler = processingInstructionHandler;
1188 oldCommentHandler = commentHandler;
1189 oldStartCdataSectionHandler = startCdataSectionHandler;
1190 oldEndCdataSectionHandler = endCdataSectionHandler;
1191 oldDefaultHandler = defaultHandler;
1192 oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler;
1193 oldNotationDeclHandler = notationDeclHandler;
1194 oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
1195 oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
1196 oldNotStandaloneHandler = notStandaloneHandler;
1197 oldExternalEntityRefHandler = externalEntityRefHandler;
1198 oldSkippedEntityHandler = skippedEntityHandler;
1199 oldUnknownEncodingHandler = unknownEncodingHandler;
1200 oldElementDeclHandler = elementDeclHandler;
1201 oldAttlistDeclHandler = attlistDeclHandler;
1202 oldEntityDeclHandler = entityDeclHandler;
1203 oldXmlDeclHandler = xmlDeclHandler;
1204 oldDeclElementType = declElementType;
1205
1206 oldUserData = userData;
1207 oldHandlerArg = handlerArg;
1208 oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1209 oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1210#ifdef XML_DTD
1211 oldParamEntityParsing = paramEntityParsing;
1212 oldInEntityValue = prologState.inEntityValue;
1213#endif
1214 oldns_triplets = ns_triplets;
1215 /* Note that the new parser shares the same hash secret as the old
1216 parser, so that dtdCopy and copyEntityTable can lookup values
1217 from hash tables associated with either parser without us having
1218 to worry which hash secrets each table has.
1219 */
1220 oldhash_secret_salt = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001221
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001222#ifdef XML_DTD
1223 if (!context)
1224 newDtd = oldDtd;
1225#endif /* XML_DTD */
1226
1227 /* Note that the magical uses of the pre-processor to make field
1228 access look more like C++ require that `parser' be overwritten
1229 here. This makes this function more painful to follow than it
1230 would be otherwise.
1231 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001232 if (ns) {
1233 XML_Char tmp[2];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001234 *tmp = namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001235 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001236 }
1237 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001238 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001239 }
1240
1241 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001242 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001243
1244 startElementHandler = oldStartElementHandler;
1245 endElementHandler = oldEndElementHandler;
1246 characterDataHandler = oldCharacterDataHandler;
1247 processingInstructionHandler = oldProcessingInstructionHandler;
1248 commentHandler = oldCommentHandler;
1249 startCdataSectionHandler = oldStartCdataSectionHandler;
1250 endCdataSectionHandler = oldEndCdataSectionHandler;
1251 defaultHandler = oldDefaultHandler;
1252 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1253 notationDeclHandler = oldNotationDeclHandler;
1254 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1255 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1256 notStandaloneHandler = oldNotStandaloneHandler;
1257 externalEntityRefHandler = oldExternalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001258 skippedEntityHandler = oldSkippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001259 unknownEncodingHandler = oldUnknownEncodingHandler;
1260 elementDeclHandler = oldElementDeclHandler;
1261 attlistDeclHandler = oldAttlistDeclHandler;
1262 entityDeclHandler = oldEntityDeclHandler;
1263 xmlDeclHandler = oldXmlDeclHandler;
1264 declElementType = oldDeclElementType;
1265 userData = oldUserData;
1266 if (oldUserData == oldHandlerArg)
1267 handlerArg = userData;
1268 else
1269 handlerArg = parser;
1270 if (oldExternalEntityRefHandlerArg != oldParser)
1271 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1272 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1273 ns_triplets = oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001274 hash_secret_salt = oldhash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001275 parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001276#ifdef XML_DTD
1277 paramEntityParsing = oldParamEntityParsing;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001278 prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001279 if (context) {
1280#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001281 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001282 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001283 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001284 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001285 }
1286 processor = externalEntityInitProcessor;
1287#ifdef XML_DTD
1288 }
1289 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001290 /* The DTD instance referenced by _dtd is shared between the document's
1291 root parser and external PE parsers, therefore one does not need to
1292 call setContext. In addition, one also *must* not call setContext,
1293 because this would overwrite existing prefix->binding pointers in
1294 _dtd with ones that get destroyed with the external PE parser.
1295 This would leave those prefixes with dangling pointers.
1296 */
1297 isParamEntity = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001298 XmlPrologStateInitExternalEntity(&prologState);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001299 processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001300 }
1301#endif /* XML_DTD */
1302 return parser;
1303}
1304
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001305static void FASTCALL
1306destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001307{
1308 for (;;) {
1309 BINDING *b = bindings;
1310 if (!b)
1311 break;
1312 bindings = b->nextTagBinding;
1313 FREE(b->uri);
1314 FREE(b);
1315 }
1316}
1317
Fred Drake08317ae2003-10-21 15:38:55 +00001318void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001319XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001320{
Fred Drake31d485c2004-08-03 07:06:22 +00001321 TAG *tagList;
1322 OPEN_INTERNAL_ENTITY *entityList;
1323 if (parser == NULL)
1324 return;
1325 /* free tagStack and freeTagList */
1326 tagList = tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001327 for (;;) {
1328 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001329 if (tagList == NULL) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001330 if (freeTagList == NULL)
1331 break;
Fred Drake31d485c2004-08-03 07:06:22 +00001332 tagList = freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001333 freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001334 }
Fred Drake31d485c2004-08-03 07:06:22 +00001335 p = tagList;
1336 tagList = tagList->parent;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001337 FREE(p->buf);
1338 destroyBindings(p->bindings, parser);
1339 FREE(p);
1340 }
Fred Drake31d485c2004-08-03 07:06:22 +00001341 /* free openInternalEntities and freeInternalEntities */
1342 entityList = openInternalEntities;
1343 for (;;) {
1344 OPEN_INTERNAL_ENTITY *openEntity;
1345 if (entityList == NULL) {
1346 if (freeInternalEntities == NULL)
1347 break;
1348 entityList = freeInternalEntities;
1349 freeInternalEntities = NULL;
1350 }
1351 openEntity = entityList;
1352 entityList = entityList->next;
1353 FREE(openEntity);
1354 }
1355
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001356 destroyBindings(freeBindingList, parser);
1357 destroyBindings(inheritedBindings, parser);
1358 poolDestroy(&tempPool);
1359 poolDestroy(&temp2Pool);
1360#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001361 /* external parameter entity parsers share the DTD structure
1362 parser->m_dtd with the root parser, so we must not destroy it
1363 */
1364 if (!isParamEntity && _dtd)
1365#else
1366 if (_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001367#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001368 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001369 FREE((void *)atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001370#ifdef XML_ATTR_INFO
1371 FREE((void *)attInfo);
1372#endif
Fred Drake08317ae2003-10-21 15:38:55 +00001373 FREE(groupConnector);
1374 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001375 FREE(dataBuf);
Fred Drake08317ae2003-10-21 15:38:55 +00001376 FREE(nsAtts);
1377 FREE(unknownEncodingMem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001378 if (unknownEncodingRelease)
1379 unknownEncodingRelease(unknownEncodingData);
1380 FREE(parser);
1381}
1382
Fred Drake08317ae2003-10-21 15:38:55 +00001383void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001384XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001385{
Victor Stinner5ff71322017-06-21 14:39:22 +02001386 if (parser != NULL)
1387 handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001388}
1389
Fred Drake08317ae2003-10-21 15:38:55 +00001390enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001391XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1392{
Victor Stinner5ff71322017-06-21 14:39:22 +02001393 if (parser == NULL)
1394 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001395#ifdef XML_DTD
1396 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001397 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001398 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1399 useForeignDTD = useDTD;
1400 return XML_ERROR_NONE;
1401#else
1402 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1403#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001404}
1405
Fred Drake08317ae2003-10-21 15:38:55 +00001406void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001407XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1408{
Victor Stinner5ff71322017-06-21 14:39:22 +02001409 if (parser == NULL)
1410 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001411 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001413 return;
1414 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1415}
1416
Fred Drake08317ae2003-10-21 15:38:55 +00001417void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001418XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001419{
Victor Stinner5ff71322017-06-21 14:39:22 +02001420 if (parser == NULL)
1421 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001422 if (handlerArg == userData)
1423 handlerArg = userData = p;
1424 else
1425 userData = p;
1426}
1427
Fred Drake08317ae2003-10-21 15:38:55 +00001428enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001429XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001430{
Victor Stinner5ff71322017-06-21 14:39:22 +02001431 if (parser == NULL)
1432 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001433 if (p) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001434 p = poolCopyString(&_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001435 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001436 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001437 curBase = p;
1438 }
1439 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001440 curBase = NULL;
1441 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001442}
1443
Fred Drake08317ae2003-10-21 15:38:55 +00001444const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001445XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001446{
Victor Stinner5ff71322017-06-21 14:39:22 +02001447 if (parser == NULL)
1448 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001449 return curBase;
1450}
1451
Fred Drake08317ae2003-10-21 15:38:55 +00001452int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001453XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001454{
Victor Stinner5ff71322017-06-21 14:39:22 +02001455 if (parser == NULL)
1456 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001457 return nSpecifiedAtts;
1458}
1459
Fred Drake08317ae2003-10-21 15:38:55 +00001460int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001461XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001462{
Victor Stinner5ff71322017-06-21 14:39:22 +02001463 if (parser == NULL)
1464 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001465 return idAttIndex;
1466}
1467
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001468#ifdef XML_ATTR_INFO
1469const XML_AttrInfo * XMLCALL
1470XML_GetAttributeInfo(XML_Parser parser)
1471{
Victor Stinner5ff71322017-06-21 14:39:22 +02001472 if (parser == NULL)
1473 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001474 return attInfo;
1475}
1476#endif
1477
Fred Drake08317ae2003-10-21 15:38:55 +00001478void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001479XML_SetElementHandler(XML_Parser parser,
1480 XML_StartElementHandler start,
1481 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001482{
Victor Stinner5ff71322017-06-21 14:39:22 +02001483 if (parser == NULL)
1484 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001485 startElementHandler = start;
1486 endElementHandler = end;
1487}
1488
Fred Drake08317ae2003-10-21 15:38:55 +00001489void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001490XML_SetStartElementHandler(XML_Parser parser,
1491 XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001492 if (parser != NULL)
1493 startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001494}
1495
Fred Drake08317ae2003-10-21 15:38:55 +00001496void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001497XML_SetEndElementHandler(XML_Parser parser,
1498 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001499 if (parser != NULL)
1500 endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001501}
1502
Fred Drake08317ae2003-10-21 15:38:55 +00001503void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001504XML_SetCharacterDataHandler(XML_Parser parser,
1505 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001506{
Victor Stinner5ff71322017-06-21 14:39:22 +02001507 if (parser != NULL)
1508 characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001509}
1510
Fred Drake08317ae2003-10-21 15:38:55 +00001511void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001512XML_SetProcessingInstructionHandler(XML_Parser parser,
1513 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001514{
Victor Stinner5ff71322017-06-21 14:39:22 +02001515 if (parser != NULL)
1516 processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001517}
1518
Fred Drake08317ae2003-10-21 15:38:55 +00001519void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001520XML_SetCommentHandler(XML_Parser parser,
1521 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001522{
Victor Stinner5ff71322017-06-21 14:39:22 +02001523 if (parser != NULL)
1524 commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001525}
1526
Fred Drake08317ae2003-10-21 15:38:55 +00001527void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001528XML_SetCdataSectionHandler(XML_Parser parser,
1529 XML_StartCdataSectionHandler start,
1530 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001531{
Victor Stinner5ff71322017-06-21 14:39:22 +02001532 if (parser == NULL)
1533 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001534 startCdataSectionHandler = start;
1535 endCdataSectionHandler = end;
1536}
1537
Fred Drake08317ae2003-10-21 15:38:55 +00001538void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001539XML_SetStartCdataSectionHandler(XML_Parser parser,
1540 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001541 if (parser != NULL)
1542 startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001543}
1544
Fred Drake08317ae2003-10-21 15:38:55 +00001545void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001546XML_SetEndCdataSectionHandler(XML_Parser parser,
1547 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001548 if (parser != NULL)
1549 endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001550}
1551
Fred Drake08317ae2003-10-21 15:38:55 +00001552void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001553XML_SetDefaultHandler(XML_Parser parser,
1554 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001555{
Victor Stinner5ff71322017-06-21 14:39:22 +02001556 if (parser == NULL)
1557 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001558 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001559 defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001560}
1561
Fred Drake08317ae2003-10-21 15:38:55 +00001562void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001563XML_SetDefaultHandlerExpand(XML_Parser parser,
1564 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001565{
Victor Stinner5ff71322017-06-21 14:39:22 +02001566 if (parser == NULL)
1567 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001568 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001569 defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001570}
1571
Fred Drake08317ae2003-10-21 15:38:55 +00001572void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001573XML_SetDoctypeDeclHandler(XML_Parser parser,
1574 XML_StartDoctypeDeclHandler start,
1575 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001576{
Victor Stinner5ff71322017-06-21 14:39:22 +02001577 if (parser == NULL)
1578 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001579 startDoctypeDeclHandler = start;
1580 endDoctypeDeclHandler = end;
1581}
1582
Fred Drake08317ae2003-10-21 15:38:55 +00001583void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001584XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1585 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001586 if (parser != NULL)
1587 startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001588}
1589
Fred Drake08317ae2003-10-21 15:38:55 +00001590void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001591XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1592 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001593 if (parser != NULL)
1594 endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001595}
1596
Fred Drake08317ae2003-10-21 15:38:55 +00001597void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001598XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1599 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001600{
Victor Stinner5ff71322017-06-21 14:39:22 +02001601 if (parser != NULL)
1602 unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001603}
1604
Fred Drake08317ae2003-10-21 15:38:55 +00001605void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001606XML_SetNotationDeclHandler(XML_Parser parser,
1607 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001608{
Victor Stinner5ff71322017-06-21 14:39:22 +02001609 if (parser != NULL)
1610 notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001611}
1612
Fred Drake08317ae2003-10-21 15:38:55 +00001613void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001614XML_SetNamespaceDeclHandler(XML_Parser parser,
1615 XML_StartNamespaceDeclHandler start,
1616 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001617{
Victor Stinner5ff71322017-06-21 14:39:22 +02001618 if (parser == NULL)
1619 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001620 startNamespaceDeclHandler = start;
1621 endNamespaceDeclHandler = end;
1622}
1623
Fred Drake08317ae2003-10-21 15:38:55 +00001624void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001625XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1626 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001627 if (parser != NULL)
1628 startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001629}
1630
Fred Drake08317ae2003-10-21 15:38:55 +00001631void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001632XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1633 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001634 if (parser != NULL)
1635 endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001636}
1637
Fred Drake08317ae2003-10-21 15:38:55 +00001638void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001639XML_SetNotStandaloneHandler(XML_Parser parser,
1640 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001641{
Victor Stinner5ff71322017-06-21 14:39:22 +02001642 if (parser != NULL)
1643 notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001644}
1645
Fred Drake08317ae2003-10-21 15:38:55 +00001646void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001647XML_SetExternalEntityRefHandler(XML_Parser parser,
1648 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001649{
Victor Stinner5ff71322017-06-21 14:39:22 +02001650 if (parser != NULL)
1651 externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001652}
1653
Fred Drake08317ae2003-10-21 15:38:55 +00001654void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001655XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001656{
Victor Stinner5ff71322017-06-21 14:39:22 +02001657 if (parser == NULL)
1658 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001659 if (arg)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001660 externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001661 else
1662 externalEntityRefHandlerArg = parser;
1663}
1664
Fred Drake08317ae2003-10-21 15:38:55 +00001665void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001666XML_SetSkippedEntityHandler(XML_Parser parser,
1667 XML_SkippedEntityHandler handler)
1668{
Victor Stinner5ff71322017-06-21 14:39:22 +02001669 if (parser != NULL)
1670 skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001671}
1672
Fred Drake08317ae2003-10-21 15:38:55 +00001673void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001674XML_SetUnknownEncodingHandler(XML_Parser parser,
1675 XML_UnknownEncodingHandler handler,
1676 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001677{
Victor Stinner5ff71322017-06-21 14:39:22 +02001678 if (parser == NULL)
1679 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001680 unknownEncodingHandler = handler;
1681 unknownEncodingHandlerData = data;
1682}
1683
Fred Drake08317ae2003-10-21 15:38:55 +00001684void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001685XML_SetElementDeclHandler(XML_Parser parser,
1686 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001687{
Victor Stinner5ff71322017-06-21 14:39:22 +02001688 if (parser != NULL)
1689 elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001690}
1691
Fred Drake08317ae2003-10-21 15:38:55 +00001692void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001693XML_SetAttlistDeclHandler(XML_Parser parser,
1694 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001695{
Victor Stinner5ff71322017-06-21 14:39:22 +02001696 if (parser != NULL)
1697 attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001698}
1699
Fred Drake08317ae2003-10-21 15:38:55 +00001700void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001701XML_SetEntityDeclHandler(XML_Parser parser,
1702 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001703{
Victor Stinner5ff71322017-06-21 14:39:22 +02001704 if (parser != NULL)
1705 entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001706}
1707
Fred Drake08317ae2003-10-21 15:38:55 +00001708void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001709XML_SetXmlDeclHandler(XML_Parser parser,
1710 XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001711 if (parser != NULL)
1712 xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001713}
1714
Fred Drake08317ae2003-10-21 15:38:55 +00001715int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001716XML_SetParamEntityParsing(XML_Parser parser,
1717 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001718{
Victor Stinner5ff71322017-06-21 14:39:22 +02001719 if (parser == NULL)
1720 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001721 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001722 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001723 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001724#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001725 paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001726 return 1;
1727#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001728 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001729#endif
1730}
1731
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001732int XMLCALL
1733XML_SetHashSalt(XML_Parser parser,
1734 unsigned long hash_salt)
1735{
Victor Stinner5ff71322017-06-21 14:39:22 +02001736 if (parser == NULL)
1737 return 0;
1738 if (parser->m_parentParser)
1739 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001740 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1741 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1742 return 0;
1743 hash_secret_salt = hash_salt;
1744 return 1;
1745}
1746
Fred Drake08317ae2003-10-21 15:38:55 +00001747enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001748XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001749{
Victor Stinner5ff71322017-06-21 14:39:22 +02001750 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
1751 errorCode = XML_ERROR_INVALID_ARGUMENT;
1752 return XML_STATUS_ERROR;
1753 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001754 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001755 case XML_SUSPENDED:
1756 errorCode = XML_ERROR_SUSPENDED;
1757 return XML_STATUS_ERROR;
1758 case XML_FINISHED:
1759 errorCode = XML_ERROR_FINISHED;
1760 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001761 case XML_INITIALIZED:
1762 if (parentParser == NULL && !startParsing(parser)) {
1763 errorCode = XML_ERROR_NO_MEMORY;
1764 return XML_STATUS_ERROR;
1765 }
Fred Drake31d485c2004-08-03 07:06:22 +00001766 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001767 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001768 }
1769
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001770 if (len == 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001771 ps_finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001772 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001773 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001774 positionPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001775 parseEndPtr = bufferEnd;
1776
1777 /* If data are left over from last buffer, and we now know that these
1778 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001779 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001780 */
1781 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1782
1783 if (errorCode == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001784 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001785 case XML_SUSPENDED:
1786 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1787 positionPtr = bufferPtr;
1788 return XML_STATUS_SUSPENDED;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001789 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001790 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001791 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001792 /* fall through */
1793 default:
1794 return XML_STATUS_OK;
1795 }
1796 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001797 eventEndPtr = eventPtr;
1798 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001799 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001800 }
1801#ifndef XML_CONTEXT_BYTES
1802 else if (bufferPtr == bufferEnd) {
1803 const char *end;
1804 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001805 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001806 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1807 if (len > ((XML_Size)-1) / 2 - parseEndByteIndex) {
1808 errorCode = XML_ERROR_NO_MEMORY;
1809 eventPtr = eventEndPtr = NULL;
1810 processor = errorProcessor;
1811 return XML_STATUS_ERROR;
1812 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001813 parseEndByteIndex += len;
1814 positionPtr = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001815 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001816
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001817 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001818
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001819 if (errorCode != XML_ERROR_NONE) {
1820 eventEndPtr = eventPtr;
1821 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001822 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001823 }
Fred Drake31d485c2004-08-03 07:06:22 +00001824 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001825 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001826 case XML_SUSPENDED:
1827 result = XML_STATUS_SUSPENDED;
1828 break;
1829 case XML_INITIALIZED:
1830 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001831 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001832 ps_parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001833 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001834 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001835 /* fall through */
1836 default:
1837 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001838 }
1839 }
1840
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001841 XmlUpdatePosition(encoding, positionPtr, end, &position);
1842 nLeftOver = s + len - end;
1843 if (nLeftOver) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001844 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001845 /* avoid _signed_ integer overflow */
1846 char *temp = NULL;
1847 const int bytesToAllocate = (int)((unsigned)len * 2U);
1848 if (bytesToAllocate > 0) {
1849 temp = (buffer == NULL
1850 ? (char *)MALLOC(bytesToAllocate)
1851 : (char *)REALLOC(buffer, bytesToAllocate));
1852 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001853 if (temp == NULL) {
1854 errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001855 eventPtr = eventEndPtr = NULL;
1856 processor = errorProcessor;
1857 return XML_STATUS_ERROR;
1858 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001859 buffer = temp;
Victor Stinner5ff71322017-06-21 14:39:22 +02001860 bufferLim = buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001861 }
1862 memcpy(buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001863 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001864 bufferPtr = buffer;
1865 bufferEnd = buffer + nLeftOver;
1866 positionPtr = bufferPtr;
1867 parseEndPtr = bufferEnd;
1868 eventPtr = bufferPtr;
1869 eventEndPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001870 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001871 }
1872#endif /* not defined XML_CONTEXT_BYTES */
1873 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001874 void *buff = XML_GetBuffer(parser, len);
1875 if (buff == NULL)
1876 return XML_STATUS_ERROR;
1877 else {
1878 memcpy(buff, s, len);
1879 return XML_ParseBuffer(parser, len, isFinal);
1880 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001881 }
1882}
1883
Fred Drake08317ae2003-10-21 15:38:55 +00001884enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001885XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001886{
Fred Drake31d485c2004-08-03 07:06:22 +00001887 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001888 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001889
Victor Stinner5ff71322017-06-21 14:39:22 +02001890 if (parser == NULL)
1891 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001892 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001893 case XML_SUSPENDED:
1894 errorCode = XML_ERROR_SUSPENDED;
1895 return XML_STATUS_ERROR;
1896 case XML_FINISHED:
1897 errorCode = XML_ERROR_FINISHED;
1898 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001899 case XML_INITIALIZED:
1900 if (parentParser == NULL && !startParsing(parser)) {
1901 errorCode = XML_ERROR_NO_MEMORY;
1902 return XML_STATUS_ERROR;
1903 }
Fred Drake31d485c2004-08-03 07:06:22 +00001904 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001905 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001906 }
1907
1908 start = bufferPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001909 positionPtr = start;
1910 bufferEnd += len;
Fred Drake31d485c2004-08-03 07:06:22 +00001911 parseEndPtr = bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001912 parseEndByteIndex += len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001913 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001914
1915 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
1916
1917 if (errorCode != XML_ERROR_NONE) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001918 eventEndPtr = eventPtr;
1919 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001920 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001921 }
Fred Drake31d485c2004-08-03 07:06:22 +00001922 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001923 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001924 case XML_SUSPENDED:
1925 result = XML_STATUS_SUSPENDED;
1926 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001927 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001928 case XML_PARSING:
1929 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001930 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001931 return result;
1932 }
1933 default: ; /* should not happen */
1934 }
1935 }
1936
1937 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1938 positionPtr = bufferPtr;
1939 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001940}
1941
Fred Drake08317ae2003-10-21 15:38:55 +00001942void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001943XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001944{
Victor Stinner5ff71322017-06-21 14:39:22 +02001945 if (parser == NULL)
1946 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001947 if (len < 0) {
1948 errorCode = XML_ERROR_NO_MEMORY;
1949 return NULL;
1950 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001951 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001952 case XML_SUSPENDED:
1953 errorCode = XML_ERROR_SUSPENDED;
1954 return NULL;
1955 case XML_FINISHED:
1956 errorCode = XML_ERROR_FINISHED;
1957 return NULL;
1958 default: ;
1959 }
1960
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001961 if (len > bufferLim - bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001962#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001963 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02001964#endif /* defined XML_CONTEXT_BYTES */
1965 /* Do not invoke signed arithmetic overflow: */
1966 int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001967 if (neededSize < 0) {
1968 errorCode = XML_ERROR_NO_MEMORY;
1969 return NULL;
1970 }
1971#ifdef XML_CONTEXT_BYTES
1972 keep = (int)(bufferPtr - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001973 if (keep > XML_CONTEXT_BYTES)
1974 keep = XML_CONTEXT_BYTES;
1975 neededSize += keep;
1976#endif /* defined XML_CONTEXT_BYTES */
1977 if (neededSize <= bufferLim - buffer) {
1978#ifdef XML_CONTEXT_BYTES
1979 if (keep < bufferPtr - buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001980 int offset = (int)(bufferPtr - buffer) - keep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001981 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
1982 bufferEnd -= offset;
1983 bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001984 }
1985#else
1986 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
1987 bufferEnd = buffer + (bufferEnd - bufferPtr);
1988 bufferPtr = buffer;
1989#endif /* not defined XML_CONTEXT_BYTES */
1990 }
1991 else {
1992 char *newBuf;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001993 int bufferSize = (int)(bufferLim - bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001994 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001995 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001996 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02001997 /* Do not invoke signed arithmetic overflow: */
1998 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001999 } while (bufferSize < neededSize && bufferSize > 0);
2000 if (bufferSize <= 0) {
2001 errorCode = XML_ERROR_NO_MEMORY;
2002 return NULL;
2003 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002004 newBuf = (char *)MALLOC(bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002005 if (newBuf == 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002006 errorCode = XML_ERROR_NO_MEMORY;
2007 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002008 }
2009 bufferLim = newBuf + bufferSize;
2010#ifdef XML_CONTEXT_BYTES
2011 if (bufferPtr) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002012 int keep = (int)(bufferPtr - buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002013 if (keep > XML_CONTEXT_BYTES)
2014 keep = XML_CONTEXT_BYTES;
2015 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
2016 FREE(buffer);
2017 buffer = newBuf;
2018 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
2019 bufferPtr = buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002020 }
2021 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002022 bufferEnd = newBuf + (bufferEnd - bufferPtr);
2023 bufferPtr = buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002024 }
2025#else
2026 if (bufferPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002027 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
2028 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002029 }
2030 bufferEnd = newBuf + (bufferEnd - bufferPtr);
2031 bufferPtr = buffer = newBuf;
2032#endif /* not defined XML_CONTEXT_BYTES */
2033 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002034 eventPtr = eventEndPtr = NULL;
2035 positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002036 }
2037 return bufferEnd;
2038}
2039
Fred Drake31d485c2004-08-03 07:06:22 +00002040enum XML_Status XMLCALL
2041XML_StopParser(XML_Parser parser, XML_Bool resumable)
2042{
Victor Stinner5ff71322017-06-21 14:39:22 +02002043 if (parser == NULL)
2044 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002045 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002046 case XML_SUSPENDED:
2047 if (resumable) {
2048 errorCode = XML_ERROR_SUSPENDED;
2049 return XML_STATUS_ERROR;
2050 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002051 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002052 break;
2053 case XML_FINISHED:
2054 errorCode = XML_ERROR_FINISHED;
2055 return XML_STATUS_ERROR;
2056 default:
2057 if (resumable) {
2058#ifdef XML_DTD
2059 if (isParamEntity) {
2060 errorCode = XML_ERROR_SUSPEND_PE;
2061 return XML_STATUS_ERROR;
2062 }
2063#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002064 ps_parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002065 }
2066 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002067 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002068 }
2069 return XML_STATUS_OK;
2070}
2071
2072enum XML_Status XMLCALL
2073XML_ResumeParser(XML_Parser parser)
2074{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002075 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002076
Victor Stinner5ff71322017-06-21 14:39:22 +02002077 if (parser == NULL)
2078 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002079 if (ps_parsing != XML_SUSPENDED) {
Fred Drake31d485c2004-08-03 07:06:22 +00002080 errorCode = XML_ERROR_NOT_SUSPENDED;
2081 return XML_STATUS_ERROR;
2082 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002083 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002084
2085 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
2086
2087 if (errorCode != XML_ERROR_NONE) {
2088 eventEndPtr = eventPtr;
2089 processor = errorProcessor;
2090 return XML_STATUS_ERROR;
2091 }
2092 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002093 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002094 case XML_SUSPENDED:
2095 result = XML_STATUS_SUSPENDED;
2096 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002097 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002098 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002099 if (ps_finalBuffer) {
2100 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002101 return result;
2102 }
2103 default: ;
2104 }
2105 }
2106
2107 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
2108 positionPtr = bufferPtr;
2109 return result;
2110}
2111
2112void XMLCALL
2113XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2114{
Victor Stinner5ff71322017-06-21 14:39:22 +02002115 if (parser == NULL)
2116 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002117 assert(status != NULL);
2118 *status = parser->m_parsingStatus;
2119}
2120
Fred Drake08317ae2003-10-21 15:38:55 +00002121enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002122XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002123{
Victor Stinner5ff71322017-06-21 14:39:22 +02002124 if (parser == NULL)
2125 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002126 return errorCode;
2127}
2128
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002129XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002130XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002131{
Victor Stinner5ff71322017-06-21 14:39:22 +02002132 if (parser == NULL)
2133 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002134 if (eventPtr)
Victor Stinner23ec4b52017-06-15 00:54:36 +02002135 return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002136 return -1;
2137}
2138
Fred Drake08317ae2003-10-21 15:38:55 +00002139int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002140XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002141{
Victor Stinner5ff71322017-06-21 14:39:22 +02002142 if (parser == NULL)
2143 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002144 if (eventEndPtr && eventPtr)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002145 return (int)(eventEndPtr - eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002146 return 0;
2147}
2148
Fred Drake08317ae2003-10-21 15:38:55 +00002149const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002150XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002151{
2152#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002153 if (parser == NULL)
2154 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002155 if (eventPtr && buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002156 if (offset != NULL)
2157 *offset = (int)(eventPtr - buffer);
2158 if (size != NULL)
2159 *size = (int)(bufferEnd - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002160 return buffer;
2161 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002162#else
2163 (void)parser;
2164 (void)offset;
2165 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002166#endif /* defined XML_CONTEXT_BYTES */
2167 return (char *) 0;
2168}
2169
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002170XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002171XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002172{
Victor Stinner5ff71322017-06-21 14:39:22 +02002173 if (parser == NULL)
2174 return 0;
Fred Drake31d485c2004-08-03 07:06:22 +00002175 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002176 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
2177 positionPtr = eventPtr;
2178 }
2179 return position.lineNumber + 1;
2180}
2181
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002182XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002183XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002184{
Victor Stinner5ff71322017-06-21 14:39:22 +02002185 if (parser == NULL)
2186 return 0;
Fred Drake31d485c2004-08-03 07:06:22 +00002187 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002188 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
2189 positionPtr = eventPtr;
2190 }
2191 return position.columnNumber;
2192}
2193
Fred Drake08317ae2003-10-21 15:38:55 +00002194void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002195XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2196{
Victor Stinner5ff71322017-06-21 14:39:22 +02002197 if (parser != NULL)
2198 FREE(model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002199}
2200
Fred Drake08317ae2003-10-21 15:38:55 +00002201void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002202XML_MemMalloc(XML_Parser parser, size_t size)
2203{
Victor Stinner5ff71322017-06-21 14:39:22 +02002204 if (parser == NULL)
2205 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002206 return MALLOC(size);
2207}
2208
Fred Drake08317ae2003-10-21 15:38:55 +00002209void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002210XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2211{
Victor Stinner5ff71322017-06-21 14:39:22 +02002212 if (parser == NULL)
2213 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002214 return REALLOC(ptr, size);
2215}
2216
Fred Drake08317ae2003-10-21 15:38:55 +00002217void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002218XML_MemFree(XML_Parser parser, void *ptr)
2219{
Victor Stinner5ff71322017-06-21 14:39:22 +02002220 if (parser != NULL)
2221 FREE(ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002222}
2223
Fred Drake08317ae2003-10-21 15:38:55 +00002224void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002225XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002226{
Victor Stinner5ff71322017-06-21 14:39:22 +02002227 if (parser == NULL)
2228 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002229 if (defaultHandler) {
2230 if (openInternalEntities)
2231 reportDefault(parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002232 internalEncoding,
2233 openInternalEntities->internalEventPtr,
2234 openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002235 else
2236 reportDefault(parser, encoding, eventPtr, eventEndPtr);
2237 }
2238}
2239
Fred Drake08317ae2003-10-21 15:38:55 +00002240const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002241XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002242{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002243 static const XML_LChar* const message[] = {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002244 0,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002245 XML_L("out of memory"),
2246 XML_L("syntax error"),
2247 XML_L("no element found"),
2248 XML_L("not well-formed (invalid token)"),
2249 XML_L("unclosed token"),
2250 XML_L("partial character"),
2251 XML_L("mismatched tag"),
2252 XML_L("duplicate attribute"),
2253 XML_L("junk after document element"),
2254 XML_L("illegal parameter entity reference"),
2255 XML_L("undefined entity"),
2256 XML_L("recursive entity reference"),
2257 XML_L("asynchronous entity"),
2258 XML_L("reference to invalid character number"),
2259 XML_L("reference to binary entity"),
2260 XML_L("reference to external entity in attribute"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002261 XML_L("XML or text declaration not at start of entity"),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002262 XML_L("unknown encoding"),
2263 XML_L("encoding specified in XML declaration is incorrect"),
2264 XML_L("unclosed CDATA section"),
2265 XML_L("error in processing external entity reference"),
2266 XML_L("document is not standalone"),
2267 XML_L("unexpected parser state - please send a bug report"),
2268 XML_L("entity declared in parameter entity"),
2269 XML_L("requested feature requires XML_DTD support in Expat"),
Fred Drake08317ae2003-10-21 15:38:55 +00002270 XML_L("cannot change setting once parsing has begun"),
Fred Drake31d485c2004-08-03 07:06:22 +00002271 XML_L("unbound prefix"),
2272 XML_L("must not undeclare prefix"),
2273 XML_L("incomplete markup in parameter entity"),
2274 XML_L("XML declaration not well-formed"),
2275 XML_L("text declaration not well-formed"),
2276 XML_L("illegal character(s) in public id"),
2277 XML_L("parser suspended"),
2278 XML_L("parser not suspended"),
2279 XML_L("parsing aborted"),
2280 XML_L("parsing finished"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002281 XML_L("cannot suspend in external parameter entity"),
2282 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
2283 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
2284 XML_L("prefix must not be bound to one of the reserved namespace names")
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002285 };
2286 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
2287 return message[code];
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002288 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002289}
2290
Fred Drake08317ae2003-10-21 15:38:55 +00002291const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002292XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002293
2294 /* V1 is used to string-ize the version number. However, it would
2295 string-ize the actual version macro *names* unless we get them
2296 substituted before being passed to V1. CPP is defined to expand
2297 a macro, then rescan for more expansions. Thus, we use V2 to expand
2298 the version macros, then CPP will expand the resulting V1() macro
2299 with the correct numerals. */
2300 /* ### I'm assuming cpp is portable in this respect... */
2301
2302#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2303#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2304
2305 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2306
2307#undef V1
2308#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002309}
2310
Fred Drake08317ae2003-10-21 15:38:55 +00002311XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002312XML_ExpatVersionInfo(void)
2313{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002314 XML_Expat_Version version;
2315
2316 version.major = XML_MAJOR_VERSION;
2317 version.minor = XML_MINOR_VERSION;
2318 version.micro = XML_MICRO_VERSION;
2319
2320 return version;
2321}
2322
Fred Drake08317ae2003-10-21 15:38:55 +00002323const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002324XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002325{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002326 static const XML_Feature features[] = {
2327 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2328 sizeof(XML_Char)},
2329 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2330 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002331#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002332 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002333#endif
2334#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002335 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002336#endif
2337#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002338 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002339#endif
2340#ifdef XML_CONTEXT_BYTES
2341 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2342 XML_CONTEXT_BYTES},
2343#endif
2344#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002345 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002346#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002347#ifdef XML_NS
2348 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2349#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002350#ifdef XML_LARGE_SIZE
2351 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2352#endif
2353#ifdef XML_ATTR_INFO
2354 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2355#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002356 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002357 };
2358
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002359 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002360}
2361
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002362/* Initially tag->rawName always points into the parse buffer;
2363 for those TAG instances opened while the current parse buffer was
2364 processed, and not yet closed, we need to store tag->rawName in a more
2365 permanent location, since the parse buffer is about to be discarded.
2366*/
2367static XML_Bool
2368storeRawNames(XML_Parser parser)
2369{
2370 TAG *tag = tagStack;
2371 while (tag) {
2372 int bufSize;
2373 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2374 char *rawNameBuf = tag->buf + nameLen;
2375 /* Stop if already stored. Since tagStack is a stack, we can stop
2376 at the first entry that has already been copied; everything
2377 below it in the stack is already been accounted for in a
2378 previous call to this function.
2379 */
2380 if (tag->rawName == rawNameBuf)
2381 break;
2382 /* For re-use purposes we need to ensure that the
2383 size of tag->buf is a multiple of sizeof(XML_Char).
2384 */
2385 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2386 if (bufSize > tag->bufEnd - tag->buf) {
2387 char *temp = (char *)REALLOC(tag->buf, bufSize);
2388 if (temp == NULL)
2389 return XML_FALSE;
2390 /* if tag->name.str points to tag->buf (only when namespace
2391 processing is off) then we have to update it
2392 */
2393 if (tag->name.str == (XML_Char *)tag->buf)
2394 tag->name.str = (XML_Char *)temp;
2395 /* if tag->name.localPart is set (when namespace processing is on)
2396 then update it as well, since it will always point into tag->buf
2397 */
2398 if (tag->name.localPart)
2399 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2400 (XML_Char *)tag->buf);
2401 tag->buf = temp;
2402 tag->bufEnd = temp + bufSize;
2403 rawNameBuf = temp + nameLen;
2404 }
2405 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2406 tag->rawName = rawNameBuf;
2407 tag = tag->parent;
2408 }
2409 return XML_TRUE;
2410}
2411
2412static enum XML_Error PTRCALL
2413contentProcessor(XML_Parser parser,
2414 const char *start,
2415 const char *end,
2416 const char **endPtr)
2417{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002418 enum XML_Error result = doContent(parser, 0, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002419 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002420 if (result == XML_ERROR_NONE) {
2421 if (!storeRawNames(parser))
2422 return XML_ERROR_NO_MEMORY;
2423 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002424 return result;
2425}
2426
2427static enum XML_Error PTRCALL
2428externalEntityInitProcessor(XML_Parser parser,
2429 const char *start,
2430 const char *end,
2431 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002432{
2433 enum XML_Error result = initializeEncoding(parser);
2434 if (result != XML_ERROR_NONE)
2435 return result;
2436 processor = externalEntityInitProcessor2;
2437 return externalEntityInitProcessor2(parser, start, end, endPtr);
2438}
2439
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002440static enum XML_Error PTRCALL
2441externalEntityInitProcessor2(XML_Parser parser,
2442 const char *start,
2443 const char *end,
2444 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002445{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002446 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002447 int tok = XmlContentTok(encoding, start, end, &next);
2448 switch (tok) {
2449 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002450 /* If we are at the end of the buffer, this would cause the next stage,
2451 i.e. externalEntityInitProcessor3, to pass control directly to
2452 doContent (by detecting XML_TOK_NONE) without processing any xml text
2453 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2454 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002455 if (next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002456 *endPtr = next;
2457 return XML_ERROR_NONE;
2458 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002459 start = next;
2460 break;
2461 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002462 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002463 *endPtr = start;
2464 return XML_ERROR_NONE;
2465 }
2466 eventPtr = start;
2467 return XML_ERROR_UNCLOSED_TOKEN;
2468 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002470 *endPtr = start;
2471 return XML_ERROR_NONE;
2472 }
2473 eventPtr = start;
2474 return XML_ERROR_PARTIAL_CHAR;
2475 }
2476 processor = externalEntityInitProcessor3;
2477 return externalEntityInitProcessor3(parser, start, end, endPtr);
2478}
2479
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002480static enum XML_Error PTRCALL
2481externalEntityInitProcessor3(XML_Parser parser,
2482 const char *start,
2483 const char *end,
2484 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002485{
Fred Drake31d485c2004-08-03 07:06:22 +00002486 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002487 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Fred Drake31d485c2004-08-03 07:06:22 +00002488 eventPtr = start;
2489 tok = XmlContentTok(encoding, start, end, &next);
2490 eventEndPtr = next;
2491
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002492 switch (tok) {
2493 case XML_TOK_XML_DECL:
2494 {
Fred Drake31d485c2004-08-03 07:06:22 +00002495 enum XML_Error result;
2496 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002497 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002498 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002499 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002500 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002501 *endPtr = next;
2502 return XML_ERROR_NONE;
2503 case XML_FINISHED:
2504 return XML_ERROR_ABORTED;
2505 default:
2506 start = next;
2507 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002508 }
2509 break;
2510 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002511 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002512 *endPtr = start;
2513 return XML_ERROR_NONE;
2514 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002515 return XML_ERROR_UNCLOSED_TOKEN;
2516 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002517 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002518 *endPtr = start;
2519 return XML_ERROR_NONE;
2520 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002521 return XML_ERROR_PARTIAL_CHAR;
2522 }
2523 processor = externalEntityContentProcessor;
2524 tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002525 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002526}
2527
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002528static enum XML_Error PTRCALL
2529externalEntityContentProcessor(XML_Parser parser,
2530 const char *start,
2531 const char *end,
2532 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002533{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002534 enum XML_Error result = doContent(parser, 1, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002535 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002536 if (result == XML_ERROR_NONE) {
2537 if (!storeRawNames(parser))
2538 return XML_ERROR_NO_MEMORY;
2539 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002540 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002541}
2542
2543static enum XML_Error
2544doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002545 int startTagLevel,
2546 const ENCODING *enc,
2547 const char *s,
2548 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002549 const char **nextPtr,
2550 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002551{
Fred Drake31d485c2004-08-03 07:06:22 +00002552 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002553 DTD * const dtd = _dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002554
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002555 const char **eventPP;
2556 const char **eventEndPP;
2557 if (enc == encoding) {
2558 eventPP = &eventPtr;
2559 eventEndPP = &eventEndPtr;
2560 }
2561 else {
2562 eventPP = &(openInternalEntities->internalEventPtr);
2563 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2564 }
2565 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002566
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002567 for (;;) {
2568 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2569 int tok = XmlContentTok(enc, s, end, &next);
2570 *eventEndPP = next;
2571 switch (tok) {
2572 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002573 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002574 *nextPtr = s;
2575 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002576 }
2577 *eventEndPP = end;
2578 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002579 XML_Char c = 0xA;
2580 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002581 }
2582 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002583 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002584 /* We are at the end of the final buffer, should we check for
2585 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002586 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002587 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002588 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002589 if (tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002590 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002591 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002592 return XML_ERROR_NONE;
2593 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002594 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002595 *nextPtr = s;
2596 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002597 }
2598 if (startTagLevel > 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002599 if (tagLevel != startTagLevel)
2600 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002601 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002602 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002603 }
2604 return XML_ERROR_NO_ELEMENTS;
2605 case XML_TOK_INVALID:
2606 *eventPP = next;
2607 return XML_ERROR_INVALID_TOKEN;
2608 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002609 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002610 *nextPtr = s;
2611 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002612 }
2613 return XML_ERROR_UNCLOSED_TOKEN;
2614 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002615 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002616 *nextPtr = s;
2617 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002618 }
2619 return XML_ERROR_PARTIAL_CHAR;
2620 case XML_TOK_ENTITY_REF:
2621 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002622 const XML_Char *name;
2623 ENTITY *entity;
2624 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2625 s + enc->minBytesPerChar,
2626 next - enc->minBytesPerChar);
2627 if (ch) {
2628 if (characterDataHandler)
2629 characterDataHandler(handlerArg, &ch, 1);
2630 else if (defaultHandler)
2631 reportDefault(parser, enc, s, next);
2632 break;
2633 }
2634 name = poolStoreString(&dtd->pool, enc,
2635 s + enc->minBytesPerChar,
2636 next - enc->minBytesPerChar);
2637 if (!name)
2638 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002639 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002640 poolDiscard(&dtd->pool);
2641 /* First, determine if a check for an existing declaration is needed;
2642 if yes, check that the entity exists, and that it is internal,
2643 otherwise call the skipped entity or default handler.
2644 */
2645 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2646 if (!entity)
2647 return XML_ERROR_UNDEFINED_ENTITY;
2648 else if (!entity->is_internal)
2649 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2650 }
2651 else if (!entity) {
2652 if (skippedEntityHandler)
2653 skippedEntityHandler(handlerArg, name, 0);
2654 else if (defaultHandler)
2655 reportDefault(parser, enc, s, next);
2656 break;
2657 }
2658 if (entity->open)
2659 return XML_ERROR_RECURSIVE_ENTITY_REF;
2660 if (entity->notation)
2661 return XML_ERROR_BINARY_ENTITY_REF;
2662 if (entity->textPtr) {
2663 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002664 if (!defaultExpandInternalEntities) {
2665 if (skippedEntityHandler)
2666 skippedEntityHandler(handlerArg, entity->name, 0);
2667 else if (defaultHandler)
2668 reportDefault(parser, enc, s, next);
2669 break;
2670 }
Fred Drake31d485c2004-08-03 07:06:22 +00002671 result = processInternalEntity(parser, entity, XML_FALSE);
2672 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002673 return result;
2674 }
2675 else if (externalEntityRefHandler) {
2676 const XML_Char *context;
2677 entity->open = XML_TRUE;
2678 context = getContext(parser);
2679 entity->open = XML_FALSE;
2680 if (!context)
2681 return XML_ERROR_NO_MEMORY;
Fred Drake31d485c2004-08-03 07:06:22 +00002682 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002683 context,
2684 entity->base,
2685 entity->systemId,
2686 entity->publicId))
2687 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2688 poolDiscard(&tempPool);
2689 }
2690 else if (defaultHandler)
2691 reportDefault(parser, enc, s, next);
2692 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002693 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002694 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002695 /* fall through */
2696 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002697 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002698 TAG *tag;
2699 enum XML_Error result;
2700 XML_Char *toPtr;
2701 if (freeTagList) {
2702 tag = freeTagList;
2703 freeTagList = freeTagList->parent;
2704 }
2705 else {
2706 tag = (TAG *)MALLOC(sizeof(TAG));
2707 if (!tag)
2708 return XML_ERROR_NO_MEMORY;
2709 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2710 if (!tag->buf) {
2711 FREE(tag);
2712 return XML_ERROR_NO_MEMORY;
2713 }
2714 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2715 }
2716 tag->bindings = NULL;
2717 tag->parent = tagStack;
2718 tagStack = tag;
2719 tag->name.localPart = NULL;
2720 tag->name.prefix = NULL;
2721 tag->rawName = s + enc->minBytesPerChar;
2722 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2723 ++tagLevel;
2724 {
2725 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2726 const char *fromPtr = tag->rawName;
2727 toPtr = (XML_Char *)tag->buf;
2728 for (;;) {
2729 int bufSize;
2730 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002731 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002732 &fromPtr, rawNameEnd,
2733 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002734 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner5ff71322017-06-21 14:39:22 +02002735 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002736 tag->name.strLen = convLen;
2737 break;
2738 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002739 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002740 {
2741 char *temp = (char *)REALLOC(tag->buf, bufSize);
2742 if (temp == NULL)
2743 return XML_ERROR_NO_MEMORY;
2744 tag->buf = temp;
2745 tag->bufEnd = temp + bufSize;
2746 toPtr = (XML_Char *)temp + convLen;
2747 }
2748 }
2749 }
2750 tag->name.str = (XML_Char *)tag->buf;
2751 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002752 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2753 if (result)
2754 return result;
2755 if (startElementHandler)
2756 startElementHandler(handlerArg, tag->name.str,
2757 (const XML_Char **)atts);
2758 else if (defaultHandler)
2759 reportDefault(parser, enc, s, next);
2760 poolClear(&tempPool);
2761 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002762 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002763 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002764 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002765 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2766 {
2767 const char *rawName = s + enc->minBytesPerChar;
2768 enum XML_Error result;
2769 BINDING *bindings = NULL;
2770 XML_Bool noElmHandlers = XML_TRUE;
2771 TAG_NAME name;
2772 name.str = poolStoreString(&tempPool, enc, rawName,
2773 rawName + XmlNameLength(enc, rawName));
2774 if (!name.str)
2775 return XML_ERROR_NO_MEMORY;
2776 poolFinish(&tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002777 result = storeAtts(parser, enc, s, &name, &bindings);
Victor Stinner5ff71322017-06-21 14:39:22 +02002778 if (result != XML_ERROR_NONE) {
2779 freeBindings(parser, bindings);
Fred Drake4faea012003-01-28 06:42:40 +00002780 return result;
Victor Stinner5ff71322017-06-21 14:39:22 +02002781 }
Fred Drake4faea012003-01-28 06:42:40 +00002782 poolFinish(&tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002783 if (startElementHandler) {
2784 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2785 noElmHandlers = XML_FALSE;
2786 }
2787 if (endElementHandler) {
2788 if (startElementHandler)
2789 *eventPP = *eventEndPP;
2790 endElementHandler(handlerArg, name.str);
2791 noElmHandlers = XML_FALSE;
2792 }
2793 if (noElmHandlers && defaultHandler)
2794 reportDefault(parser, enc, s, next);
2795 poolClear(&tempPool);
Victor Stinner5ff71322017-06-21 14:39:22 +02002796 freeBindings(parser, bindings);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002797 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002798 if (tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002799 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002800 break;
2801 case XML_TOK_END_TAG:
2802 if (tagLevel == startTagLevel)
2803 return XML_ERROR_ASYNC_ENTITY;
2804 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002805 int len;
2806 const char *rawName;
2807 TAG *tag = tagStack;
2808 tagStack = tag->parent;
2809 tag->parent = freeTagList;
2810 freeTagList = tag;
2811 rawName = s + enc->minBytesPerChar*2;
2812 len = XmlNameLength(enc, rawName);
2813 if (len != tag->rawNameLength
2814 || memcmp(tag->rawName, rawName, len) != 0) {
2815 *eventPP = rawName;
2816 return XML_ERROR_TAG_MISMATCH;
2817 }
2818 --tagLevel;
2819 if (endElementHandler) {
2820 const XML_Char *localPart;
2821 const XML_Char *prefix;
2822 XML_Char *uri;
2823 localPart = tag->name.localPart;
2824 if (ns && localPart) {
2825 /* localPart and prefix may have been overwritten in
2826 tag->name.str, since this points to the binding->uri
2827 buffer which gets re-used; so we have to add them again
2828 */
2829 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2830 /* don't need to check for space - already done in storeAtts() */
2831 while (*localPart) *uri++ = *localPart++;
2832 prefix = (XML_Char *)tag->name.prefix;
2833 if (ns_triplets && prefix) {
2834 *uri++ = namespaceSeparator;
2835 while (*prefix) *uri++ = *prefix++;
2836 }
2837 *uri = XML_T('\0');
2838 }
2839 endElementHandler(handlerArg, tag->name.str);
2840 }
2841 else if (defaultHandler)
2842 reportDefault(parser, enc, s, next);
2843 while (tag->bindings) {
2844 BINDING *b = tag->bindings;
2845 if (endNamespaceDeclHandler)
2846 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2847 tag->bindings = tag->bindings->nextTagBinding;
2848 b->nextTagBinding = freeBindingList;
2849 freeBindingList = b;
2850 b->prefix->binding = b->prevPrefixBinding;
2851 }
2852 if (tagLevel == 0)
2853 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002854 }
2855 break;
2856 case XML_TOK_CHAR_REF:
2857 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002858 int n = XmlCharRefNumber(enc, s);
2859 if (n < 0)
2860 return XML_ERROR_BAD_CHAR_REF;
2861 if (characterDataHandler) {
2862 XML_Char buf[XML_ENCODE_MAX];
2863 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
2864 }
2865 else if (defaultHandler)
2866 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002867 }
2868 break;
2869 case XML_TOK_XML_DECL:
2870 return XML_ERROR_MISPLACED_XML_PI;
2871 case XML_TOK_DATA_NEWLINE:
2872 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002873 XML_Char c = 0xA;
2874 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002875 }
2876 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002877 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002878 break;
2879 case XML_TOK_CDATA_SECT_OPEN:
2880 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002881 enum XML_Error result;
2882 if (startCdataSectionHandler)
2883 startCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002884#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002885 /* Suppose you doing a transformation on a document that involves
2886 changing only the character data. You set up a defaultHandler
2887 and a characterDataHandler. The defaultHandler simply copies
2888 characters through. The characterDataHandler does the
2889 transformation and writes the characters out escaping them as
2890 necessary. This case will fail to work if we leave out the
2891 following two lines (because & and < inside CDATA sections will
2892 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002893
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002894 However, now we have a start/endCdataSectionHandler, so it seems
2895 easier to let the user deal with this.
2896 */
2897 else if (characterDataHandler)
2898 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002899#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002900 else if (defaultHandler)
2901 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00002902 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2903 if (result != XML_ERROR_NONE)
2904 return result;
2905 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002906 processor = cdataSectionProcessor;
2907 return result;
2908 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002909 }
2910 break;
2911 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00002912 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002913 *nextPtr = s;
2914 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002915 }
2916 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002917 if (MUST_CONVERT(enc, s)) {
2918 ICHAR *dataPtr = (ICHAR *)dataBuf;
2919 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2920 characterDataHandler(handlerArg, dataBuf,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002921 (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002922 }
2923 else
2924 characterDataHandler(handlerArg,
2925 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002926 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002927 }
2928 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002929 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002930 /* We are at the end of the final buffer, should we check for
2931 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002932 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002933 if (startTagLevel == 0) {
2934 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002935 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002936 }
2937 if (tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002938 *eventPP = end;
2939 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002940 }
Fred Drake31d485c2004-08-03 07:06:22 +00002941 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002942 return XML_ERROR_NONE;
2943 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002944 {
2945 XML_CharacterDataHandler charDataHandler = characterDataHandler;
2946 if (charDataHandler) {
2947 if (MUST_CONVERT(enc, s)) {
2948 for (;;) {
2949 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002950 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002951 *eventEndPP = s;
2952 charDataHandler(handlerArg, dataBuf,
2953 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02002954 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002955 break;
2956 *eventPP = s;
2957 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002958 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002959 else
2960 charDataHandler(handlerArg,
2961 (XML_Char *)s,
2962 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002963 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002964 else if (defaultHandler)
2965 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002966 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002967 break;
2968 case XML_TOK_PI:
2969 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002970 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002971 break;
2972 case XML_TOK_COMMENT:
2973 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002974 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002975 break;
2976 default:
2977 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002978 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002979 break;
2980 }
2981 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002982 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002983 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002984 *nextPtr = next;
2985 return XML_ERROR_NONE;
2986 case XML_FINISHED:
2987 return XML_ERROR_ABORTED;
2988 default: ;
2989 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002990 }
2991 /* not reached */
2992}
2993
Victor Stinner5ff71322017-06-21 14:39:22 +02002994/* This function does not call free() on the allocated memory, merely
2995 * moving it to the parser's freeBindingList where it can be freed or
2996 * reused as appropriate.
2997 */
2998static void
2999freeBindings(XML_Parser parser, BINDING *bindings)
3000{
3001 while (bindings) {
3002 BINDING *b = bindings;
3003
3004 /* startNamespaceDeclHandler will have been called for this
3005 * binding in addBindings(), so call the end handler now.
3006 */
3007 if (endNamespaceDeclHandler)
3008 endNamespaceDeclHandler(handlerArg, b->prefix->name);
3009
3010 bindings = bindings->nextTagBinding;
3011 b->nextTagBinding = freeBindingList;
3012 freeBindingList = b;
3013 b->prefix->binding = b->prevPrefixBinding;
3014 }
3015}
3016
Fred Drake4faea012003-01-28 06:42:40 +00003017/* Precondition: all arguments must be non-NULL;
3018 Purpose:
3019 - normalize attributes
3020 - check attributes for well-formedness
3021 - generate namespace aware attribute names (URI, prefix)
3022 - build list of attributes for startElementHandler
3023 - default attributes
3024 - process namespace declarations (check and report them)
3025 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003026*/
3027static enum XML_Error
3028storeAtts(XML_Parser parser, const ENCODING *enc,
3029 const char *attStr, TAG_NAME *tagNamePtr,
3030 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003031{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003032 DTD * const dtd = _dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003033 ELEMENT_TYPE *elementType;
3034 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003035 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003036 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003037 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003038 int i;
3039 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003040 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003041 int nPrefixes = 0;
3042 BINDING *binding;
3043 const XML_Char *localPart;
3044
3045 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003046 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00003047 if (!elementType) {
3048 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3049 if (!name)
3050 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003051 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003052 sizeof(ELEMENT_TYPE));
3053 if (!elementType)
3054 return XML_ERROR_NO_MEMORY;
3055 if (ns && !setElementTypePrefix(parser, elementType))
3056 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003057 }
Fred Drake4faea012003-01-28 06:42:40 +00003058 nDefaultAtts = elementType->nDefaultAtts;
3059
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003060 /* get the attributes from the tokenizer */
3061 n = XmlGetAttributes(enc, attStr, attsSize, atts);
3062 if (n + nDefaultAtts > attsSize) {
3063 int oldAttsSize = attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003064 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003065#ifdef XML_ATTR_INFO
3066 XML_AttrInfo *temp2;
3067#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003068 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003069 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
3070 if (temp == NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003071 return XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003072 atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003073#ifdef XML_ATTR_INFO
3074 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
3075 if (temp2 == NULL)
3076 return XML_ERROR_NO_MEMORY;
3077 attInfo = temp2;
3078#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003079 if (n > oldAttsSize)
3080 XmlGetAttributes(enc, attStr, n, atts);
3081 }
Fred Drake4faea012003-01-28 06:42:40 +00003082
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003083 appAtts = (const XML_Char **)atts;
3084 for (i = 0; i < n; i++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003085 ATTRIBUTE *currAtt = &atts[i];
3086#ifdef XML_ATTR_INFO
3087 XML_AttrInfo *currAttInfo = &attInfo[i];
3088#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003089 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003090 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3091 currAtt->name
3092 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003093 if (!attId)
3094 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003095#ifdef XML_ATTR_INFO
3096 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
3097 currAttInfo->nameEnd = currAttInfo->nameStart +
3098 XmlNameLength(enc, currAtt->name);
3099 currAttInfo->valueStart = parseEndByteIndex -
3100 (parseEndPtr - currAtt->valuePtr);
3101 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
3102#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003103 /* Detect duplicate attributes by their QNames. This does not work when
3104 namespace processing is turned on and different prefixes for the same
3105 namespace are used. For this case we have a check further down.
3106 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003107 if ((attId->name)[-1]) {
3108 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003109 eventPtr = atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003110 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3111 }
3112 (attId->name)[-1] = 1;
3113 appAtts[attIndex++] = attId->name;
3114 if (!atts[i].normalized) {
3115 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003116 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003117
3118 /* figure out whether declared as other than CDATA */
3119 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003120 int j;
3121 for (j = 0; j < nDefaultAtts; j++) {
3122 if (attId == elementType->defaultAtts[j].id) {
3123 isCdata = elementType->defaultAtts[j].isCdata;
3124 break;
3125 }
3126 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003127 }
3128
3129 /* normalize the attribute value */
3130 result = storeAttributeValue(parser, enc, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003131 atts[i].valuePtr, atts[i].valueEnd,
3132 &tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003133 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003134 return result;
Fred Drake4faea012003-01-28 06:42:40 +00003135 appAtts[attIndex] = poolStart(&tempPool);
3136 poolFinish(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003137 }
Fred Drake4faea012003-01-28 06:42:40 +00003138 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003139 /* the value did not need normalizing */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003140 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
3141 atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003142 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003143 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003144 poolFinish(&tempPool);
3145 }
3146 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003147 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003148 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003149 /* deal with namespace declarations here */
3150 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3151 appAtts[attIndex], bindingsPtr);
3152 if (result)
3153 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003154 --attIndex;
3155 }
3156 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003157 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003158 attIndex++;
3159 nPrefixes++;
3160 (attId->name)[-1] = 2;
3161 }
3162 }
3163 else
3164 attIndex++;
3165 }
Fred Drake4faea012003-01-28 06:42:40 +00003166
3167 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3168 nSpecifiedAtts = attIndex;
3169 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3170 for (i = 0; i < attIndex; i += 2)
3171 if (appAtts[i] == elementType->idAtt->name) {
3172 idAttIndex = i;
3173 break;
3174 }
3175 }
3176 else
3177 idAttIndex = -1;
3178
3179 /* do attribute defaulting */
3180 for (i = 0; i < nDefaultAtts; i++) {
3181 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3182 if (!(da->id->name)[-1] && da->value) {
3183 if (da->id->prefix) {
3184 if (da->id->xmlns) {
3185 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3186 da->value, bindingsPtr);
3187 if (result)
3188 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003189 }
3190 else {
Fred Drake4faea012003-01-28 06:42:40 +00003191 (da->id->name)[-1] = 2;
3192 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003193 appAtts[attIndex++] = da->id->name;
3194 appAtts[attIndex++] = da->value;
3195 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003196 }
Fred Drake4faea012003-01-28 06:42:40 +00003197 else {
3198 (da->id->name)[-1] = 1;
3199 appAtts[attIndex++] = da->id->name;
3200 appAtts[attIndex++] = da->value;
3201 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003202 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003203 }
Fred Drake4faea012003-01-28 06:42:40 +00003204 appAtts[attIndex] = 0;
3205
Fred Drake08317ae2003-10-21 15:38:55 +00003206 /* expand prefixed attribute names, check for duplicates,
3207 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003208 i = 0;
3209 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00003210 int j; /* hash table index */
3211 unsigned long version = nsAttsVersion;
3212 int nsAttsSize = (int)1 << nsAttsPower;
3213 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3214 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
3215 NS_ATT *temp;
3216 /* hash table size must also be a power of 2 and >= 8 */
3217 while (nPrefixes >> nsAttsPower++);
3218 if (nsAttsPower < 3)
3219 nsAttsPower = 3;
3220 nsAttsSize = (int)1 << nsAttsPower;
3221 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
3222 if (!temp)
3223 return XML_ERROR_NO_MEMORY;
3224 nsAtts = temp;
3225 version = 0; /* force re-initialization of nsAtts hash table */
3226 }
3227 /* using a version flag saves us from initializing nsAtts every time */
3228 if (!version) { /* initialize version flags when version wraps around */
3229 version = INIT_ATTS_VERSION;
3230 for (j = nsAttsSize; j != 0; )
3231 nsAtts[--j].version = version;
3232 }
3233 nsAttsVersion = --version;
3234
3235 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003236 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003237 const XML_Char *s = appAtts[i];
3238 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003239 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003240 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003241 unsigned long uriHash;
3242 struct siphash sip_state;
3243 struct sipkey sip_key;
3244
3245 copy_salt_to_sipkey(parser, &sip_key);
3246 sip24_init(&sip_state, &sip_key);
3247
Fred Drake08317ae2003-10-21 15:38:55 +00003248 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003249 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07003250 if (!id || !id->prefix)
3251 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003252 b = id->prefix->binding;
3253 if (!b)
3254 return XML_ERROR_UNBOUND_PREFIX;
3255
Fred Drake08317ae2003-10-21 15:38:55 +00003256 for (j = 0; j < b->uriLen; j++) {
3257 const XML_Char c = b->uri[j];
3258 if (!poolAppendChar(&tempPool, c))
3259 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003260 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003261
3262 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3263
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003264 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003265 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003266
3267 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3268
Fred Drake08317ae2003-10-21 15:38:55 +00003269 do { /* copies null terminator */
Fred Drake08317ae2003-10-21 15:38:55 +00003270 if (!poolAppendChar(&tempPool, *s))
3271 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003272 } while (*s++);
3273
Victor Stinner5ff71322017-06-21 14:39:22 +02003274 uriHash = (unsigned long)sip24_final(&sip_state);
3275
Fred Drake08317ae2003-10-21 15:38:55 +00003276 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003277 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003278 */
3279 unsigned char step = 0;
3280 unsigned long mask = nsAttsSize - 1;
3281 j = uriHash & mask; /* index into hash table */
3282 while (nsAtts[j].version == version) {
3283 /* for speed we compare stored hash values first */
3284 if (uriHash == nsAtts[j].hash) {
3285 const XML_Char *s1 = poolStart(&tempPool);
3286 const XML_Char *s2 = nsAtts[j].uriName;
3287 /* s1 is null terminated, but not s2 */
3288 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3289 if (*s1 == 0)
3290 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3291 }
3292 if (!step)
3293 step = PROBE_STEP(uriHash, mask, nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003294 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003295 }
Fred Drake08317ae2003-10-21 15:38:55 +00003296 }
3297
3298 if (ns_triplets) { /* append namespace separator and prefix */
3299 tempPool.ptr[-1] = namespaceSeparator;
3300 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003301 do {
3302 if (!poolAppendChar(&tempPool, *s))
3303 return XML_ERROR_NO_MEMORY;
3304 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003305 }
Fred Drake08317ae2003-10-21 15:38:55 +00003306
3307 /* store expanded name in attribute list */
3308 s = poolStart(&tempPool);
3309 poolFinish(&tempPool);
3310 appAtts[i] = s;
3311
3312 /* fill empty slot with new version, uriName and hash value */
3313 nsAtts[j].version = version;
3314 nsAtts[j].hash = uriHash;
3315 nsAtts[j].uriName = s;
3316
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003317 if (!--nPrefixes) {
3318 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003319 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003320 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003321 }
Fred Drake08317ae2003-10-21 15:38:55 +00003322 else /* not prefixed */
3323 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003324 }
3325 }
Fred Drake08317ae2003-10-21 15:38:55 +00003326 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003327 for (; i < attIndex; i += 2)
3328 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003329 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3330 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003331
Fred Drake08317ae2003-10-21 15:38:55 +00003332 if (!ns)
3333 return XML_ERROR_NONE;
3334
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003335 /* expand the element type name */
3336 if (elementType->prefix) {
3337 binding = elementType->prefix->binding;
3338 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003339 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003340 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003341 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003342 ;
3343 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003344 else if (dtd->defaultPrefix.binding) {
3345 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003346 localPart = tagNamePtr->str;
3347 }
3348 else
3349 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003350 prefixLen = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00003351 if (ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003352 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003353 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003354 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003355 tagNamePtr->localPart = localPart;
3356 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003357 tagNamePtr->prefix = binding->prefix->name;
3358 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003359 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003360 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003361 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003362 if (n > binding->uriAlloc) {
3363 TAG *p;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003364 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003365 if (!uri)
3366 return XML_ERROR_NO_MEMORY;
3367 binding->uriAlloc = n + EXPAND_SPARE;
3368 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3369 for (p = tagStack; p; p = p->parent)
3370 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003371 p->name.str = uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003372 FREE(binding->uri);
3373 binding->uri = uri;
3374 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003375 /* if namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003376 uri = binding->uri + binding->uriLen;
3377 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003378 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003379 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003380 uri += i - 1;
3381 *uri = namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003382 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3383 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003384 tagNamePtr->str = binding->uri;
3385 return XML_ERROR_NONE;
3386}
3387
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003388/* addBinding() overwrites the value of prefix->binding without checking.
3389 Therefore one must keep track of the old value outside of addBinding().
3390*/
3391static enum XML_Error
3392addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3393 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003394{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003395 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003396 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3397 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3398 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3399 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3400 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3401 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003402 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003403 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003404 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3405 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003406 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3407 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3408 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3409 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3410 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003411 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003412 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003413 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3414
3415 XML_Bool mustBeXML = XML_FALSE;
3416 XML_Bool isXML = XML_TRUE;
3417 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003418
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003419 BINDING *b;
3420 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003421
Fred Drake31d485c2004-08-03 07:06:22 +00003422 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003423 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003424 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003425
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003426 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003427 && prefix->name[0] == XML_T(ASCII_x)
3428 && prefix->name[1] == XML_T(ASCII_m)
3429 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003430
3431 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003432 if (prefix->name[3] == XML_T(ASCII_n)
3433 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003434 && prefix->name[5] == XML_T('\0'))
3435 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3436
3437 if (prefix->name[3] == XML_T('\0'))
3438 mustBeXML = XML_TRUE;
3439 }
3440
3441 for (len = 0; uri[len]; len++) {
3442 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3443 isXML = XML_FALSE;
3444
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003445 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003446 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3447 isXMLNS = XML_FALSE;
3448 }
3449 isXML = isXML && len == xmlLen;
3450 isXMLNS = isXMLNS && len == xmlnsLen;
3451
3452 if (mustBeXML != isXML)
3453 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3454 : XML_ERROR_RESERVED_NAMESPACE_URI;
3455
3456 if (isXMLNS)
3457 return XML_ERROR_RESERVED_NAMESPACE_URI;
3458
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003459 if (namespaceSeparator)
3460 len++;
3461 if (freeBindingList) {
3462 b = freeBindingList;
3463 if (len > b->uriAlloc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003464 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3465 sizeof(XML_Char) * (len + EXPAND_SPARE));
3466 if (temp == NULL)
3467 return XML_ERROR_NO_MEMORY;
3468 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003469 b->uriAlloc = len + EXPAND_SPARE;
3470 }
3471 freeBindingList = b->nextTagBinding;
3472 }
3473 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003474 b = (BINDING *)MALLOC(sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003475 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003476 return XML_ERROR_NO_MEMORY;
3477 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003478 if (!b->uri) {
3479 FREE(b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003480 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003481 }
3482 b->uriAlloc = len + EXPAND_SPARE;
3483 }
3484 b->uriLen = len;
3485 memcpy(b->uri, uri, len * sizeof(XML_Char));
3486 if (namespaceSeparator)
3487 b->uri[len - 1] = namespaceSeparator;
3488 b->prefix = prefix;
3489 b->attId = attId;
3490 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003491 /* NULL binding when default namespace undeclared */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003492 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3493 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003494 else
3495 prefix->binding = b;
3496 b->nextTagBinding = *bindingsPtr;
3497 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003498 /* if attId == NULL then we are not starting a namespace scope */
3499 if (attId && startNamespaceDeclHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003500 startNamespaceDeclHandler(handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003501 prefix->binding ? uri : 0);
3502 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003503}
3504
3505/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003506 the whole file is parsed with one call.
3507*/
3508static enum XML_Error PTRCALL
3509cdataSectionProcessor(XML_Parser parser,
3510 const char *start,
3511 const char *end,
3512 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003513{
Fred Drake31d485c2004-08-03 07:06:22 +00003514 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003515 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003516 if (result != XML_ERROR_NONE)
3517 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003518 if (start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003519 if (parentParser) { /* we are parsing an external entity */
3520 processor = externalEntityContentProcessor;
3521 return externalEntityContentProcessor(parser, start, end, endPtr);
3522 }
3523 else {
3524 processor = contentProcessor;
3525 return contentProcessor(parser, start, end, endPtr);
3526 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003527 }
3528 return result;
3529}
3530
Fred Drake31d485c2004-08-03 07:06:22 +00003531/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003532 the section is not yet closed.
3533*/
3534static enum XML_Error
3535doCdataSection(XML_Parser parser,
3536 const ENCODING *enc,
3537 const char **startPtr,
3538 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003539 const char **nextPtr,
3540 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003541{
3542 const char *s = *startPtr;
3543 const char **eventPP;
3544 const char **eventEndPP;
3545 if (enc == encoding) {
3546 eventPP = &eventPtr;
3547 *eventPP = s;
3548 eventEndPP = &eventEndPtr;
3549 }
3550 else {
3551 eventPP = &(openInternalEntities->internalEventPtr);
3552 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3553 }
3554 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003555 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003556
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003557 for (;;) {
3558 const char *next;
3559 int tok = XmlCdataSectionTok(enc, s, end, &next);
3560 *eventEndPP = next;
3561 switch (tok) {
3562 case XML_TOK_CDATA_SECT_CLOSE:
3563 if (endCdataSectionHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003564 endCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003565#if 0
3566 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3567 else if (characterDataHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003568 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003569#endif
3570 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003571 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003572 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003573 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003574 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003575 return XML_ERROR_ABORTED;
3576 else
3577 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003578 case XML_TOK_DATA_NEWLINE:
3579 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003580 XML_Char c = 0xA;
3581 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003582 }
3583 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003584 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003585 break;
3586 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003587 {
3588 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3589 if (charDataHandler) {
3590 if (MUST_CONVERT(enc, s)) {
3591 for (;;) {
3592 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02003593 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003594 *eventEndPP = next;
3595 charDataHandler(handlerArg, dataBuf,
3596 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003597 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003598 break;
3599 *eventPP = s;
3600 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003601 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003602 else
3603 charDataHandler(handlerArg,
3604 (XML_Char *)s,
3605 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003606 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003607 else if (defaultHandler)
3608 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003609 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003610 break;
3611 case XML_TOK_INVALID:
3612 *eventPP = next;
3613 return XML_ERROR_INVALID_TOKEN;
3614 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003615 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003616 *nextPtr = s;
3617 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003618 }
3619 return XML_ERROR_PARTIAL_CHAR;
3620 case XML_TOK_PARTIAL:
3621 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003622 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003623 *nextPtr = s;
3624 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003625 }
3626 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3627 default:
3628 *eventPP = next;
3629 return XML_ERROR_UNEXPECTED_STATE;
3630 }
Fred Drake31d485c2004-08-03 07:06:22 +00003631
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003632 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003633 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003634 case XML_SUSPENDED:
3635 *nextPtr = next;
3636 return XML_ERROR_NONE;
3637 case XML_FINISHED:
3638 return XML_ERROR_ABORTED;
3639 default: ;
3640 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003641 }
3642 /* not reached */
3643}
3644
3645#ifdef XML_DTD
3646
3647/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003648 the whole file is parsed with one call.
3649*/
3650static enum XML_Error PTRCALL
3651ignoreSectionProcessor(XML_Parser parser,
3652 const char *start,
3653 const char *end,
3654 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003655{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003656 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003657 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003658 if (result != XML_ERROR_NONE)
3659 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003660 if (start) {
3661 processor = prologProcessor;
3662 return prologProcessor(parser, start, end, endPtr);
3663 }
3664 return result;
3665}
3666
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003667/* startPtr gets set to non-null is the section is closed, and to null
3668 if the section is not yet closed.
3669*/
3670static enum XML_Error
3671doIgnoreSection(XML_Parser parser,
3672 const ENCODING *enc,
3673 const char **startPtr,
3674 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003675 const char **nextPtr,
3676 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003677{
3678 const char *next;
3679 int tok;
3680 const char *s = *startPtr;
3681 const char **eventPP;
3682 const char **eventEndPP;
3683 if (enc == encoding) {
3684 eventPP = &eventPtr;
3685 *eventPP = s;
3686 eventEndPP = &eventEndPtr;
3687 }
3688 else {
3689 eventPP = &(openInternalEntities->internalEventPtr);
3690 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3691 }
3692 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003693 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003694 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3695 *eventEndPP = next;
3696 switch (tok) {
3697 case XML_TOK_IGNORE_SECT:
3698 if (defaultHandler)
3699 reportDefault(parser, enc, s, next);
3700 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003701 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003702 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003703 return XML_ERROR_ABORTED;
3704 else
3705 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003706 case XML_TOK_INVALID:
3707 *eventPP = next;
3708 return XML_ERROR_INVALID_TOKEN;
3709 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003710 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003711 *nextPtr = s;
3712 return XML_ERROR_NONE;
3713 }
3714 return XML_ERROR_PARTIAL_CHAR;
3715 case XML_TOK_PARTIAL:
3716 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003717 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003718 *nextPtr = s;
3719 return XML_ERROR_NONE;
3720 }
3721 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3722 default:
3723 *eventPP = next;
3724 return XML_ERROR_UNEXPECTED_STATE;
3725 }
3726 /* not reached */
3727}
3728
3729#endif /* XML_DTD */
3730
3731static enum XML_Error
3732initializeEncoding(XML_Parser parser)
3733{
3734 const char *s;
3735#ifdef XML_UNICODE
3736 char encodingBuf[128];
3737 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003738 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003739 else {
3740 int i;
3741 for (i = 0; protocolEncodingName[i]; i++) {
3742 if (i == sizeof(encodingBuf) - 1
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003743 || (protocolEncodingName[i] & ~0x7f) != 0) {
3744 encodingBuf[0] = '\0';
3745 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003746 }
3747 encodingBuf[i] = (char)protocolEncodingName[i];
3748 }
3749 encodingBuf[i] = '\0';
3750 s = encodingBuf;
3751 }
3752#else
3753 s = protocolEncodingName;
3754#endif
3755 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3756 return XML_ERROR_NONE;
3757 return handleUnknownEncoding(parser, protocolEncodingName);
3758}
3759
3760static enum XML_Error
3761processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003762 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003763{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003764 const char *encodingName = NULL;
3765 const XML_Char *storedEncName = NULL;
3766 const ENCODING *newEncoding = NULL;
3767 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003768 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003769 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003770 int standalone = -1;
3771 if (!(ns
3772 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003773 : XmlParseXmlDecl)(isGeneralTextEntity,
3774 encoding,
3775 s,
3776 next,
3777 &eventPtr,
3778 &version,
3779 &versionend,
3780 &encodingName,
3781 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003782 &standalone)) {
3783 if (isGeneralTextEntity)
3784 return XML_ERROR_TEXT_DECL;
3785 else
3786 return XML_ERROR_XML_DECL;
3787 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003788 if (!isGeneralTextEntity && standalone == 1) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003789 _dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003790#ifdef XML_DTD
3791 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3792 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3793#endif /* XML_DTD */
3794 }
3795 if (xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003796 if (encodingName != NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003797 storedEncName = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003798 encoding,
3799 encodingName,
3800 encodingName
3801 + XmlNameLength(encoding, encodingName));
3802 if (!storedEncName)
3803 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003804 poolFinish(&temp2Pool);
3805 }
3806 if (version) {
3807 storedversion = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003808 encoding,
3809 version,
3810 versionend - encoding->minBytesPerChar);
3811 if (!storedversion)
3812 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003813 }
3814 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
3815 }
3816 else if (defaultHandler)
3817 reportDefault(parser, encoding, s, next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003818 if (protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003819 if (newEncoding) {
3820 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003821 eventPtr = encodingName;
3822 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003823 }
3824 encoding = newEncoding;
3825 }
3826 else if (encodingName) {
3827 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003828 if (!storedEncName) {
3829 storedEncName = poolStoreString(
3830 &temp2Pool, encoding, encodingName,
3831 encodingName + XmlNameLength(encoding, encodingName));
3832 if (!storedEncName)
3833 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003834 }
3835 result = handleUnknownEncoding(parser, storedEncName);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003836 poolClear(&temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003837 if (result == XML_ERROR_UNKNOWN_ENCODING)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003838 eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003839 return result;
3840 }
3841 }
3842
3843 if (storedEncName || storedversion)
3844 poolClear(&temp2Pool);
3845
3846 return XML_ERROR_NONE;
3847}
3848
3849static enum XML_Error
3850handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
3851{
3852 if (unknownEncodingHandler) {
3853 XML_Encoding info;
3854 int i;
3855 for (i = 0; i < 256; i++)
3856 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003857 info.convert = NULL;
3858 info.data = NULL;
3859 info.release = NULL;
3860 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
3861 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003862 ENCODING *enc;
3863 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
3864 if (!unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003865 if (info.release)
3866 info.release(info.data);
3867 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003868 }
3869 enc = (ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003870 ? XmlInitUnknownEncodingNS
3871 : XmlInitUnknownEncoding)(unknownEncodingMem,
3872 info.map,
3873 info.convert,
3874 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003875 if (enc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003876 unknownEncodingData = info.data;
3877 unknownEncodingRelease = info.release;
3878 encoding = enc;
3879 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003880 }
3881 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003882 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003883 info.release(info.data);
3884 }
3885 return XML_ERROR_UNKNOWN_ENCODING;
3886}
3887
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003888static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003889prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003890 const char *s,
3891 const char *end,
3892 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003893{
3894 enum XML_Error result = initializeEncoding(parser);
3895 if (result != XML_ERROR_NONE)
3896 return result;
3897 processor = prologProcessor;
3898 return prologProcessor(parser, s, end, nextPtr);
3899}
3900
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003901#ifdef XML_DTD
3902
3903static enum XML_Error PTRCALL
3904externalParEntInitProcessor(XML_Parser parser,
3905 const char *s,
3906 const char *end,
3907 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003908{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003909 enum XML_Error result = initializeEncoding(parser);
3910 if (result != XML_ERROR_NONE)
3911 return result;
3912
3913 /* we know now that XML_Parse(Buffer) has been called,
3914 so we consider the external parameter entity read */
3915 _dtd->paramEntityRead = XML_TRUE;
3916
3917 if (prologState.inEntityValue) {
3918 processor = entityValueInitProcessor;
3919 return entityValueInitProcessor(parser, s, end, nextPtr);
3920 }
3921 else {
3922 processor = externalParEntProcessor;
3923 return externalParEntProcessor(parser, s, end, nextPtr);
3924 }
3925}
3926
3927static enum XML_Error PTRCALL
3928entityValueInitProcessor(XML_Parser parser,
3929 const char *s,
3930 const char *end,
3931 const char **nextPtr)
3932{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003933 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00003934 const char *start = s;
3935 const char *next = start;
3936 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003937
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003938 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003939 tok = XmlPrologTok(encoding, start, end, &next);
Fred Drake31d485c2004-08-03 07:06:22 +00003940 eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003941 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003942 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00003943 *nextPtr = s;
3944 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003945 }
3946 switch (tok) {
3947 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00003948 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003949 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00003950 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003951 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003952 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003953 case XML_TOK_NONE: /* start == end */
3954 default:
3955 break;
3956 }
Fred Drake31d485c2004-08-03 07:06:22 +00003957 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003958 return storeEntityValue(parser, encoding, s, end);
3959 }
3960 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00003961 enum XML_Error result;
3962 result = processXmlDecl(parser, 0, start, next);
3963 if (result != XML_ERROR_NONE)
3964 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003965 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003966 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003967 *nextPtr = next;
3968 return XML_ERROR_NONE;
3969 case XML_FINISHED:
3970 return XML_ERROR_ABORTED;
3971 default:
3972 *nextPtr = next;
3973 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003974 /* stop scanning for text declaration - we found one */
3975 processor = entityValueProcessor;
3976 return entityValueProcessor(parser, next, end, nextPtr);
3977 }
3978 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3979 return XML_TOK_NONE on the next call, which would then cause the
3980 function to exit with *nextPtr set to s - that is what we want for other
3981 tokens, but not for the BOM - we would rather like to skip it;
3982 then, when this routine is entered the next time, XmlPrologTok will
3983 return XML_TOK_INVALID, since the BOM is still in the buffer
3984 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003985 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003986 *nextPtr = next;
3987 return XML_ERROR_NONE;
3988 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003989 /* If we get this token, we have the start of what might be a
3990 normal tag, but not a declaration (i.e. it doesn't begin with
3991 "<!"). In a DTD context, that isn't legal.
3992 */
3993 else if (tok == XML_TOK_INSTANCE_START) {
3994 *nextPtr = next;
3995 return XML_ERROR_SYNTAX;
3996 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003997 start = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003998 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003999 }
4000}
4001
4002static enum XML_Error PTRCALL
4003externalParEntProcessor(XML_Parser parser,
4004 const char *s,
4005 const char *end,
4006 const char **nextPtr)
4007{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004008 const char *next = s;
4009 int tok;
4010
Fred Drake31d485c2004-08-03 07:06:22 +00004011 tok = XmlPrologTok(encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004012 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004013 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004014 *nextPtr = s;
4015 return XML_ERROR_NONE;
4016 }
4017 switch (tok) {
4018 case XML_TOK_INVALID:
4019 return XML_ERROR_INVALID_TOKEN;
4020 case XML_TOK_PARTIAL:
4021 return XML_ERROR_UNCLOSED_TOKEN;
4022 case XML_TOK_PARTIAL_CHAR:
4023 return XML_ERROR_PARTIAL_CHAR;
4024 case XML_TOK_NONE: /* start == end */
4025 default:
4026 break;
4027 }
4028 }
4029 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4030 However, when parsing an external subset, doProlog will not accept a BOM
4031 as valid, and report a syntax error, so we have to skip the BOM
4032 */
4033 else if (tok == XML_TOK_BOM) {
4034 s = next;
4035 tok = XmlPrologTok(encoding, s, end, &next);
4036 }
4037
4038 processor = prologProcessor;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004039 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004040 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004041}
4042
4043static enum XML_Error PTRCALL
4044entityValueProcessor(XML_Parser parser,
4045 const char *s,
4046 const char *end,
4047 const char **nextPtr)
4048{
4049 const char *start = s;
4050 const char *next = s;
4051 const ENCODING *enc = encoding;
4052 int tok;
4053
4054 for (;;) {
4055 tok = XmlPrologTok(enc, start, end, &next);
4056 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004057 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004058 *nextPtr = s;
4059 return XML_ERROR_NONE;
4060 }
4061 switch (tok) {
4062 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004063 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004064 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004065 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004066 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004067 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004068 case XML_TOK_NONE: /* start == end */
4069 default:
4070 break;
4071 }
Fred Drake31d485c2004-08-03 07:06:22 +00004072 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004073 return storeEntityValue(parser, enc, s, end);
4074 }
4075 start = next;
4076 }
4077}
4078
4079#endif /* XML_DTD */
4080
4081static enum XML_Error PTRCALL
4082prologProcessor(XML_Parser parser,
4083 const char *s,
4084 const char *end,
4085 const char **nextPtr)
4086{
4087 const char *next = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004088 int tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004089 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004090 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004091}
4092
4093static enum XML_Error
4094doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004095 const ENCODING *enc,
4096 const char *s,
4097 const char *end,
4098 int tok,
4099 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00004100 const char **nextPtr,
4101 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004102{
4103#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004104 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004105#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004106 static const XML_Char atypeCDATA[] =
4107 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4108 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4109 static const XML_Char atypeIDREF[] =
4110 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4111 static const XML_Char atypeIDREFS[] =
4112 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4113 static const XML_Char atypeENTITY[] =
4114 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4115 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4116 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004117 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004118 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4119 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4120 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4121 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4122 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4123 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4124 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004125
Fred Drake31d485c2004-08-03 07:06:22 +00004126 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004127 DTD * const dtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004128
4129 const char **eventPP;
4130 const char **eventEndPP;
4131 enum XML_Content_Quant quant;
4132
4133 if (enc == encoding) {
4134 eventPP = &eventPtr;
4135 eventEndPP = &eventEndPtr;
4136 }
4137 else {
4138 eventPP = &(openInternalEntities->internalEventPtr);
4139 eventEndPP = &(openInternalEntities->internalEventEndPtr);
4140 }
Fred Drake31d485c2004-08-03 07:06:22 +00004141
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004142 for (;;) {
4143 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004144 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004145 *eventPP = s;
4146 *eventEndPP = next;
4147 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004148 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004149 *nextPtr = s;
4150 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004151 }
4152 switch (tok) {
4153 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004154 *eventPP = next;
4155 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004156 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004157 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004158 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004159 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004160 case -XML_TOK_PROLOG_S:
4161 tok = -tok;
4162 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004163 case XML_TOK_NONE:
4164#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004165 /* for internal PE NOT referenced between declarations */
4166 if (enc != encoding && !openInternalEntities->betweenDecl) {
4167 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004168 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004169 }
4170 /* WFC: PE Between Declarations - must check that PE contains
4171 complete markup, not only for external PEs, but also for
4172 internal PEs if the reference occurs between declarations.
4173 */
4174 if (isParamEntity || enc != encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004175 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
4176 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004177 return XML_ERROR_INCOMPLETE_PE;
4178 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004179 return XML_ERROR_NONE;
4180 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004181#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004182 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004183 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004184 tok = -tok;
4185 next = end;
4186 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004187 }
4188 }
4189 role = XmlTokenRole(&prologState, tok, s, next, enc);
4190 switch (role) {
4191 case XML_ROLE_XML_DECL:
4192 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004193 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4194 if (result != XML_ERROR_NONE)
4195 return result;
4196 enc = encoding;
4197 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004198 }
4199 break;
4200 case XML_ROLE_DOCTYPE_NAME:
4201 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004202 doctypeName = poolStoreString(&tempPool, enc, s, next);
4203 if (!doctypeName)
4204 return XML_ERROR_NO_MEMORY;
4205 poolFinish(&tempPool);
4206 doctypePubid = NULL;
4207 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004208 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004209 doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004210 break;
4211 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4212 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004213 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
4214 doctypePubid, 1);
4215 doctypeName = NULL;
4216 poolClear(&tempPool);
4217 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004218 }
4219 break;
4220#ifdef XML_DTD
4221 case XML_ROLE_TEXT_DECL:
4222 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004223 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4224 if (result != XML_ERROR_NONE)
4225 return result;
4226 enc = encoding;
4227 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004228 }
4229 break;
4230#endif /* XML_DTD */
4231 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004232#ifdef XML_DTD
4233 useForeignDTD = XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004234 declEntity = (ENTITY *)lookup(parser,
4235 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004236 externalSubsetName,
4237 sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004238 if (!declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004239 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004240#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004241 dtd->hasParamEntityRefs = XML_TRUE;
4242 if (startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004243 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004244 if (!XmlIsPublicId(enc, s, next, eventPP))
4245 return XML_ERROR_PUBLICID;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004246 pubId = poolStoreString(&tempPool, enc,
4247 s + enc->minBytesPerChar,
4248 next - enc->minBytesPerChar);
4249 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004250 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004251 normalizePublicId(pubId);
Fred Drake31d485c2004-08-03 07:06:22 +00004252 poolFinish(&tempPool);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004253 doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004254 handleDefault = XML_FALSE;
4255 goto alreadyChecked;
4256 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004257 /* fall through */
4258 case XML_ROLE_ENTITY_PUBLIC_ID:
4259 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004260 return XML_ERROR_PUBLICID;
4261 alreadyChecked:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004262 if (dtd->keepProcessing && declEntity) {
4263 XML_Char *tem = poolStoreString(&dtd->pool,
4264 enc,
4265 s + enc->minBytesPerChar,
4266 next - enc->minBytesPerChar);
4267 if (!tem)
4268 return XML_ERROR_NO_MEMORY;
4269 normalizePublicId(tem);
4270 declEntity->publicId = tem;
4271 poolFinish(&dtd->pool);
4272 if (entityDeclHandler)
4273 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004274 }
4275 break;
4276 case XML_ROLE_DOCTYPE_CLOSE:
4277 if (doctypeName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004278 startDoctypeDeclHandler(handlerArg, doctypeName,
4279 doctypeSysid, doctypePubid, 0);
4280 poolClear(&tempPool);
4281 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004282 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004283 /* doctypeSysid will be non-NULL in the case of a previous
4284 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
4285 was not set, indicating an external subset
4286 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004287#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004288 if (doctypeSysid || useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004289 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4290 dtd->hasParamEntityRefs = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004291 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004292 ENTITY *entity = (ENTITY *)lookup(parser,
4293 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004294 externalSubsetName,
4295 sizeof(ENTITY));
4296 if (!entity)
4297 return XML_ERROR_NO_MEMORY;
4298 if (useForeignDTD)
4299 entity->base = curBase;
4300 dtd->paramEntityRead = XML_FALSE;
4301 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4302 0,
4303 entity->base,
4304 entity->systemId,
4305 entity->publicId))
4306 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004307 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004308 if (!dtd->standalone &&
4309 notStandaloneHandler &&
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004310 !notStandaloneHandler(handlerArg))
4311 return XML_ERROR_NOT_STANDALONE;
4312 }
4313 /* if we didn't read the foreign DTD then this means that there
4314 is no external subset and we must reset dtd->hasParamEntityRefs
4315 */
4316 else if (!doctypeSysid)
4317 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004318 /* end of DTD - no need to update dtd->keepProcessing */
4319 }
4320 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004321 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004322#endif /* XML_DTD */
4323 if (endDoctypeDeclHandler) {
4324 endDoctypeDeclHandler(handlerArg);
4325 handleDefault = XML_FALSE;
4326 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004327 break;
4328 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004329#ifdef XML_DTD
4330 /* if there is no DOCTYPE declaration then now is the
4331 last chance to read the foreign DTD
4332 */
4333 if (useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004334 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004335 dtd->hasParamEntityRefs = XML_TRUE;
4336 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004337 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004338 externalSubsetName,
4339 sizeof(ENTITY));
4340 if (!entity)
4341 return XML_ERROR_NO_MEMORY;
4342 entity->base = curBase;
4343 dtd->paramEntityRead = XML_FALSE;
4344 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4345 0,
4346 entity->base,
4347 entity->systemId,
4348 entity->publicId))
4349 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004350 if (dtd->paramEntityRead) {
4351 if (!dtd->standalone &&
4352 notStandaloneHandler &&
4353 !notStandaloneHandler(handlerArg))
4354 return XML_ERROR_NOT_STANDALONE;
4355 }
4356 /* if we didn't read the foreign DTD then this means that there
4357 is no external subset and we must reset dtd->hasParamEntityRefs
4358 */
4359 else
4360 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004361 /* end of DTD - no need to update dtd->keepProcessing */
4362 }
4363 }
4364#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004365 processor = contentProcessor;
4366 return contentProcessor(parser, s, end, nextPtr);
4367 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4368 declElementType = getElementType(parser, enc, s, next);
4369 if (!declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004370 return XML_ERROR_NO_MEMORY;
4371 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004372 case XML_ROLE_ATTRIBUTE_NAME:
4373 declAttributeId = getAttributeId(parser, enc, s, next);
4374 if (!declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004375 return XML_ERROR_NO_MEMORY;
4376 declAttributeIsCdata = XML_FALSE;
4377 declAttributeType = NULL;
4378 declAttributeIsId = XML_FALSE;
4379 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004380 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004381 declAttributeIsCdata = XML_TRUE;
4382 declAttributeType = atypeCDATA;
4383 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004384 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004385 declAttributeIsId = XML_TRUE;
4386 declAttributeType = atypeID;
4387 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004388 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004389 declAttributeType = atypeIDREF;
4390 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004391 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004392 declAttributeType = atypeIDREFS;
4393 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004394 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004395 declAttributeType = atypeENTITY;
4396 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004397 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004398 declAttributeType = atypeENTITIES;
4399 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004400 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004401 declAttributeType = atypeNMTOKEN;
4402 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004403 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004404 declAttributeType = atypeNMTOKENS;
4405 checkAttListDeclHandler:
4406 if (dtd->keepProcessing && attlistDeclHandler)
4407 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004408 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004409 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4410 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004411 if (dtd->keepProcessing && attlistDeclHandler) {
4412 const XML_Char *prefix;
4413 if (declAttributeType) {
4414 prefix = enumValueSep;
4415 }
4416 else {
4417 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4418 ? notationPrefix
4419 : enumValueStart);
4420 }
4421 if (!poolAppendString(&tempPool, prefix))
4422 return XML_ERROR_NO_MEMORY;
4423 if (!poolAppend(&tempPool, enc, s, next))
4424 return XML_ERROR_NO_MEMORY;
4425 declAttributeType = tempPool.start;
4426 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004427 }
4428 break;
4429 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4430 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004431 if (dtd->keepProcessing) {
4432 if (!defineAttribute(declElementType, declAttributeId,
Fred Drake08317ae2003-10-21 15:38:55 +00004433 declAttributeIsCdata, declAttributeIsId,
4434 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004435 return XML_ERROR_NO_MEMORY;
4436 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004437 if (*declAttributeType == XML_T(ASCII_LPAREN)
4438 || (*declAttributeType == XML_T(ASCII_N)
4439 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004440 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004441 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004442 || !poolAppendChar(&tempPool, XML_T('\0')))
4443 return XML_ERROR_NO_MEMORY;
4444 declAttributeType = tempPool.start;
4445 poolFinish(&tempPool);
4446 }
4447 *eventEndPP = s;
4448 attlistDeclHandler(handlerArg, declElementType->name,
4449 declAttributeId->name, declAttributeType,
4450 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4451 poolClear(&tempPool);
4452 handleDefault = XML_FALSE;
4453 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004454 }
4455 break;
4456 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4457 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004458 if (dtd->keepProcessing) {
4459 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004460 enum XML_Error result =
4461 storeAttributeValue(parser, enc, declAttributeIsCdata,
4462 s + enc->minBytesPerChar,
4463 next - enc->minBytesPerChar,
4464 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004465 if (result)
4466 return result;
4467 attVal = poolStart(&dtd->pool);
4468 poolFinish(&dtd->pool);
4469 /* ID attributes aren't allowed to have a default */
4470 if (!defineAttribute(declElementType, declAttributeId,
4471 declAttributeIsCdata, XML_FALSE, attVal, parser))
4472 return XML_ERROR_NO_MEMORY;
4473 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004474 if (*declAttributeType == XML_T(ASCII_LPAREN)
4475 || (*declAttributeType == XML_T(ASCII_N)
4476 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004477 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004478 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004479 || !poolAppendChar(&tempPool, XML_T('\0')))
4480 return XML_ERROR_NO_MEMORY;
4481 declAttributeType = tempPool.start;
4482 poolFinish(&tempPool);
4483 }
4484 *eventEndPP = s;
4485 attlistDeclHandler(handlerArg, declElementType->name,
4486 declAttributeId->name, declAttributeType,
4487 attVal,
4488 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4489 poolClear(&tempPool);
4490 handleDefault = XML_FALSE;
4491 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004492 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004493 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004494 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004495 if (dtd->keepProcessing) {
4496 enum XML_Error result = storeEntityValue(parser, enc,
4497 s + enc->minBytesPerChar,
4498 next - enc->minBytesPerChar);
4499 if (declEntity) {
4500 declEntity->textPtr = poolStart(&dtd->entityValuePool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004501 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004502 poolFinish(&dtd->entityValuePool);
4503 if (entityDeclHandler) {
4504 *eventEndPP = s;
4505 entityDeclHandler(handlerArg,
4506 declEntity->name,
4507 declEntity->is_param,
4508 declEntity->textPtr,
4509 declEntity->textLen,
4510 curBase, 0, 0, 0);
4511 handleDefault = XML_FALSE;
4512 }
4513 }
4514 else
4515 poolDiscard(&dtd->entityValuePool);
4516 if (result != XML_ERROR_NONE)
4517 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004518 }
4519 break;
4520 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004521#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004522 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004523#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004524 dtd->hasParamEntityRefs = XML_TRUE;
4525 if (startDoctypeDeclHandler) {
4526 doctypeSysid = poolStoreString(&tempPool, enc,
4527 s + enc->minBytesPerChar,
4528 next - enc->minBytesPerChar);
4529 if (doctypeSysid == NULL)
4530 return XML_ERROR_NO_MEMORY;
4531 poolFinish(&tempPool);
4532 handleDefault = XML_FALSE;
4533 }
4534#ifdef XML_DTD
4535 else
4536 /* use externalSubsetName to make doctypeSysid non-NULL
4537 for the case where no startDoctypeDeclHandler is set */
4538 doctypeSysid = externalSubsetName;
4539#endif /* XML_DTD */
4540 if (!dtd->standalone
4541#ifdef XML_DTD
4542 && !paramEntityParsing
4543#endif /* XML_DTD */
4544 && notStandaloneHandler
4545 && !notStandaloneHandler(handlerArg))
4546 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004547#ifndef XML_DTD
4548 break;
4549#else /* XML_DTD */
4550 if (!declEntity) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004551 declEntity = (ENTITY *)lookup(parser,
4552 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004553 externalSubsetName,
4554 sizeof(ENTITY));
4555 if (!declEntity)
4556 return XML_ERROR_NO_MEMORY;
4557 declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004558 }
4559 /* fall through */
4560#endif /* XML_DTD */
4561 case XML_ROLE_ENTITY_SYSTEM_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004562 if (dtd->keepProcessing && declEntity) {
4563 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4564 s + enc->minBytesPerChar,
4565 next - enc->minBytesPerChar);
4566 if (!declEntity->systemId)
4567 return XML_ERROR_NO_MEMORY;
4568 declEntity->base = curBase;
4569 poolFinish(&dtd->pool);
4570 if (entityDeclHandler)
4571 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004572 }
4573 break;
4574 case XML_ROLE_ENTITY_COMPLETE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004575 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4576 *eventEndPP = s;
4577 entityDeclHandler(handlerArg,
4578 declEntity->name,
4579 declEntity->is_param,
4580 0,0,
4581 declEntity->base,
4582 declEntity->systemId,
4583 declEntity->publicId,
4584 0);
4585 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004586 }
4587 break;
4588 case XML_ROLE_ENTITY_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004589 if (dtd->keepProcessing && declEntity) {
4590 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4591 if (!declEntity->notation)
4592 return XML_ERROR_NO_MEMORY;
4593 poolFinish(&dtd->pool);
4594 if (unparsedEntityDeclHandler) {
4595 *eventEndPP = s;
4596 unparsedEntityDeclHandler(handlerArg,
4597 declEntity->name,
4598 declEntity->base,
4599 declEntity->systemId,
4600 declEntity->publicId,
4601 declEntity->notation);
4602 handleDefault = XML_FALSE;
4603 }
4604 else if (entityDeclHandler) {
4605 *eventEndPP = s;
4606 entityDeclHandler(handlerArg,
4607 declEntity->name,
4608 0,0,0,
4609 declEntity->base,
4610 declEntity->systemId,
4611 declEntity->publicId,
4612 declEntity->notation);
4613 handleDefault = XML_FALSE;
4614 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004615 }
4616 break;
4617 case XML_ROLE_GENERAL_ENTITY_NAME:
4618 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004619 if (XmlPredefinedEntityName(enc, s, next)) {
4620 declEntity = NULL;
4621 break;
4622 }
4623 if (dtd->keepProcessing) {
4624 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4625 if (!name)
4626 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004627 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004628 sizeof(ENTITY));
4629 if (!declEntity)
4630 return XML_ERROR_NO_MEMORY;
4631 if (declEntity->name != name) {
4632 poolDiscard(&dtd->pool);
4633 declEntity = NULL;
4634 }
4635 else {
4636 poolFinish(&dtd->pool);
4637 declEntity->publicId = NULL;
4638 declEntity->is_param = XML_FALSE;
4639 /* if we have a parent parser or are reading an internal parameter
4640 entity, then the entity declaration is not considered "internal"
4641 */
4642 declEntity->is_internal = !(parentParser || openInternalEntities);
4643 if (entityDeclHandler)
4644 handleDefault = XML_FALSE;
4645 }
4646 }
4647 else {
4648 poolDiscard(&dtd->pool);
4649 declEntity = NULL;
4650 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004651 }
4652 break;
4653 case XML_ROLE_PARAM_ENTITY_NAME:
4654#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004655 if (dtd->keepProcessing) {
4656 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4657 if (!name)
4658 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004659 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004660 name, sizeof(ENTITY));
4661 if (!declEntity)
4662 return XML_ERROR_NO_MEMORY;
4663 if (declEntity->name != name) {
4664 poolDiscard(&dtd->pool);
4665 declEntity = NULL;
4666 }
4667 else {
4668 poolFinish(&dtd->pool);
4669 declEntity->publicId = NULL;
4670 declEntity->is_param = XML_TRUE;
4671 /* if we have a parent parser or are reading an internal parameter
4672 entity, then the entity declaration is not considered "internal"
4673 */
4674 declEntity->is_internal = !(parentParser || openInternalEntities);
4675 if (entityDeclHandler)
4676 handleDefault = XML_FALSE;
4677 }
4678 }
4679 else {
4680 poolDiscard(&dtd->pool);
4681 declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004682 }
4683#else /* not XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004684 declEntity = NULL;
4685#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004686 break;
4687 case XML_ROLE_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004688 declNotationPublicId = NULL;
4689 declNotationName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004690 if (notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004691 declNotationName = poolStoreString(&tempPool, enc, s, next);
4692 if (!declNotationName)
4693 return XML_ERROR_NO_MEMORY;
4694 poolFinish(&tempPool);
4695 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004696 }
4697 break;
4698 case XML_ROLE_NOTATION_PUBLIC_ID:
4699 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004700 return XML_ERROR_PUBLICID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004701 if (declNotationName) { /* means notationDeclHandler != NULL */
4702 XML_Char *tem = poolStoreString(&tempPool,
4703 enc,
4704 s + enc->minBytesPerChar,
4705 next - enc->minBytesPerChar);
4706 if (!tem)
4707 return XML_ERROR_NO_MEMORY;
4708 normalizePublicId(tem);
4709 declNotationPublicId = tem;
4710 poolFinish(&tempPool);
4711 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004712 }
4713 break;
4714 case XML_ROLE_NOTATION_SYSTEM_ID:
4715 if (declNotationName && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004716 const XML_Char *systemId
4717 = poolStoreString(&tempPool, enc,
4718 s + enc->minBytesPerChar,
4719 next - enc->minBytesPerChar);
4720 if (!systemId)
4721 return XML_ERROR_NO_MEMORY;
4722 *eventEndPP = s;
4723 notationDeclHandler(handlerArg,
4724 declNotationName,
4725 curBase,
4726 systemId,
4727 declNotationPublicId);
4728 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004729 }
4730 poolClear(&tempPool);
4731 break;
4732 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4733 if (declNotationPublicId && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004734 *eventEndPP = s;
4735 notationDeclHandler(handlerArg,
4736 declNotationName,
4737 curBase,
4738 0,
4739 declNotationPublicId);
4740 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004741 }
4742 poolClear(&tempPool);
4743 break;
4744 case XML_ROLE_ERROR:
4745 switch (tok) {
4746 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004747 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004748 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004749 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004750 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004751 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004752 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004753 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004754 }
4755#ifdef XML_DTD
4756 case XML_ROLE_IGNORE_SECT:
4757 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004758 enum XML_Error result;
4759 if (defaultHandler)
4760 reportDefault(parser, enc, s, next);
4761 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004762 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4763 if (result != XML_ERROR_NONE)
4764 return result;
4765 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004766 processor = ignoreSectionProcessor;
4767 return result;
4768 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004769 }
4770 break;
4771#endif /* XML_DTD */
4772 case XML_ROLE_GROUP_OPEN:
4773 if (prologState.level >= groupSize) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004774 if (groupSize) {
4775 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
4776 if (temp == NULL)
4777 return XML_ERROR_NO_MEMORY;
4778 groupConnector = temp;
4779 if (dtd->scaffIndex) {
4780 int *temp = (int *)REALLOC(dtd->scaffIndex,
4781 groupSize * sizeof(int));
4782 if (temp == NULL)
4783 return XML_ERROR_NO_MEMORY;
4784 dtd->scaffIndex = temp;
4785 }
4786 }
4787 else {
4788 groupConnector = (char *)MALLOC(groupSize = 32);
4789 if (!groupConnector)
4790 return XML_ERROR_NO_MEMORY;
4791 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004792 }
4793 groupConnector[prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004794 if (dtd->in_eldecl) {
4795 int myindex = nextScaffoldPart(parser);
4796 if (myindex < 0)
4797 return XML_ERROR_NO_MEMORY;
4798 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4799 dtd->scaffLevel++;
4800 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
4801 if (elementDeclHandler)
4802 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004803 }
4804 break;
4805 case XML_ROLE_GROUP_SEQUENCE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004806 if (groupConnector[prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004807 return XML_ERROR_SYNTAX;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004808 groupConnector[prologState.level] = ASCII_COMMA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004809 if (dtd->in_eldecl && elementDeclHandler)
4810 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004811 break;
4812 case XML_ROLE_GROUP_CHOICE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004813 if (groupConnector[prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004814 return XML_ERROR_SYNTAX;
4815 if (dtd->in_eldecl
4816 && !groupConnector[prologState.level]
4817 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4818 != XML_CTYPE_MIXED)
4819 ) {
4820 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4821 = XML_CTYPE_CHOICE;
4822 if (elementDeclHandler)
4823 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004824 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004825 groupConnector[prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004826 break;
4827 case XML_ROLE_PARAM_ENTITY_REF:
4828#ifdef XML_DTD
4829 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004830 dtd->hasParamEntityRefs = XML_TRUE;
4831 if (!paramEntityParsing)
4832 dtd->keepProcessing = dtd->standalone;
4833 else {
4834 const XML_Char *name;
4835 ENTITY *entity;
4836 name = poolStoreString(&dtd->pool, enc,
4837 s + enc->minBytesPerChar,
4838 next - enc->minBytesPerChar);
4839 if (!name)
4840 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004841 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004842 poolDiscard(&dtd->pool);
4843 /* first, determine if a check for an existing declaration is needed;
4844 if yes, check that the entity exists, and that it is internal,
4845 otherwise call the skipped entity handler
4846 */
4847 if (prologState.documentEntity &&
4848 (dtd->standalone
4849 ? !openInternalEntities
4850 : !dtd->hasParamEntityRefs)) {
4851 if (!entity)
4852 return XML_ERROR_UNDEFINED_ENTITY;
4853 else if (!entity->is_internal)
4854 return XML_ERROR_ENTITY_DECLARED_IN_PE;
4855 }
4856 else if (!entity) {
4857 dtd->keepProcessing = dtd->standalone;
4858 /* cannot report skipped entities in declarations */
4859 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
4860 skippedEntityHandler(handlerArg, name, 1);
4861 handleDefault = XML_FALSE;
4862 }
4863 break;
4864 }
4865 if (entity->open)
4866 return XML_ERROR_RECURSIVE_ENTITY_REF;
4867 if (entity->textPtr) {
4868 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004869 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00004870 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
4871 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004872 if (result != XML_ERROR_NONE)
4873 return result;
4874 handleDefault = XML_FALSE;
4875 break;
4876 }
4877 if (externalEntityRefHandler) {
4878 dtd->paramEntityRead = XML_FALSE;
4879 entity->open = XML_TRUE;
4880 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4881 0,
4882 entity->base,
4883 entity->systemId,
4884 entity->publicId)) {
4885 entity->open = XML_FALSE;
4886 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4887 }
4888 entity->open = XML_FALSE;
4889 handleDefault = XML_FALSE;
4890 if (!dtd->paramEntityRead) {
4891 dtd->keepProcessing = dtd->standalone;
4892 break;
4893 }
4894 }
4895 else {
4896 dtd->keepProcessing = dtd->standalone;
4897 break;
4898 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004899 }
4900#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004901 if (!dtd->standalone &&
4902 notStandaloneHandler &&
4903 !notStandaloneHandler(handlerArg))
4904 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004905 break;
4906
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004907 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004908
4909 case XML_ROLE_ELEMENT_NAME:
4910 if (elementDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004911 declElementType = getElementType(parser, enc, s, next);
4912 if (!declElementType)
4913 return XML_ERROR_NO_MEMORY;
4914 dtd->scaffLevel = 0;
4915 dtd->scaffCount = 0;
4916 dtd->in_eldecl = XML_TRUE;
4917 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004918 }
4919 break;
4920
4921 case XML_ROLE_CONTENT_ANY:
4922 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004923 if (dtd->in_eldecl) {
4924 if (elementDeclHandler) {
4925 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
4926 if (!content)
4927 return XML_ERROR_NO_MEMORY;
4928 content->quant = XML_CQUANT_NONE;
4929 content->name = NULL;
4930 content->numchildren = 0;
4931 content->children = NULL;
4932 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
4933 XML_CTYPE_ANY :
4934 XML_CTYPE_EMPTY);
4935 *eventEndPP = s;
4936 elementDeclHandler(handlerArg, declElementType->name, content);
4937 handleDefault = XML_FALSE;
4938 }
4939 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004940 }
4941 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004942
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004943 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004944 if (dtd->in_eldecl) {
4945 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4946 = XML_CTYPE_MIXED;
4947 if (elementDeclHandler)
4948 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004949 }
4950 break;
4951
4952 case XML_ROLE_CONTENT_ELEMENT:
4953 quant = XML_CQUANT_NONE;
4954 goto elementContent;
4955 case XML_ROLE_CONTENT_ELEMENT_OPT:
4956 quant = XML_CQUANT_OPT;
4957 goto elementContent;
4958 case XML_ROLE_CONTENT_ELEMENT_REP:
4959 quant = XML_CQUANT_REP;
4960 goto elementContent;
4961 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4962 quant = XML_CQUANT_PLUS;
4963 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004964 if (dtd->in_eldecl) {
4965 ELEMENT_TYPE *el;
4966 const XML_Char *name;
4967 int nameLen;
4968 const char *nxt = (quant == XML_CQUANT_NONE
4969 ? next
4970 : next - enc->minBytesPerChar);
4971 int myindex = nextScaffoldPart(parser);
4972 if (myindex < 0)
4973 return XML_ERROR_NO_MEMORY;
4974 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4975 dtd->scaffold[myindex].quant = quant;
4976 el = getElementType(parser, enc, s, nxt);
4977 if (!el)
4978 return XML_ERROR_NO_MEMORY;
4979 name = el->name;
4980 dtd->scaffold[myindex].name = name;
4981 nameLen = 0;
4982 for (; name[nameLen++]; );
4983 dtd->contentStringLen += nameLen;
4984 if (elementDeclHandler)
4985 handleDefault = XML_FALSE;
4986 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004987 break;
4988
4989 case XML_ROLE_GROUP_CLOSE:
4990 quant = XML_CQUANT_NONE;
4991 goto closeGroup;
4992 case XML_ROLE_GROUP_CLOSE_OPT:
4993 quant = XML_CQUANT_OPT;
4994 goto closeGroup;
4995 case XML_ROLE_GROUP_CLOSE_REP:
4996 quant = XML_CQUANT_REP;
4997 goto closeGroup;
4998 case XML_ROLE_GROUP_CLOSE_PLUS:
4999 quant = XML_CQUANT_PLUS;
5000 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005001 if (dtd->in_eldecl) {
5002 if (elementDeclHandler)
5003 handleDefault = XML_FALSE;
5004 dtd->scaffLevel--;
5005 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5006 if (dtd->scaffLevel == 0) {
5007 if (!handleDefault) {
5008 XML_Content *model = build_model(parser);
5009 if (!model)
5010 return XML_ERROR_NO_MEMORY;
5011 *eventEndPP = s;
5012 elementDeclHandler(handlerArg, declElementType->name, model);
5013 }
5014 dtd->in_eldecl = XML_FALSE;
5015 dtd->contentStringLen = 0;
5016 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005017 }
5018 break;
5019 /* End element declaration stuff */
5020
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005021 case XML_ROLE_PI:
5022 if (!reportProcessingInstruction(parser, enc, s, next))
5023 return XML_ERROR_NO_MEMORY;
5024 handleDefault = XML_FALSE;
5025 break;
5026 case XML_ROLE_COMMENT:
5027 if (!reportComment(parser, enc, s, next))
5028 return XML_ERROR_NO_MEMORY;
5029 handleDefault = XML_FALSE;
5030 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005031 case XML_ROLE_NONE:
5032 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005033 case XML_TOK_BOM:
5034 handleDefault = XML_FALSE;
5035 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005036 }
5037 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005038 case XML_ROLE_DOCTYPE_NONE:
5039 if (startDoctypeDeclHandler)
5040 handleDefault = XML_FALSE;
5041 break;
5042 case XML_ROLE_ENTITY_NONE:
5043 if (dtd->keepProcessing && entityDeclHandler)
5044 handleDefault = XML_FALSE;
5045 break;
5046 case XML_ROLE_NOTATION_NONE:
5047 if (notationDeclHandler)
5048 handleDefault = XML_FALSE;
5049 break;
5050 case XML_ROLE_ATTLIST_NONE:
5051 if (dtd->keepProcessing && attlistDeclHandler)
5052 handleDefault = XML_FALSE;
5053 break;
5054 case XML_ROLE_ELEMENT_NONE:
5055 if (elementDeclHandler)
5056 handleDefault = XML_FALSE;
5057 break;
5058 } /* end of big switch */
5059
5060 if (handleDefault && defaultHandler)
5061 reportDefault(parser, enc, s, next);
5062
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005063 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005064 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005065 *nextPtr = next;
5066 return XML_ERROR_NONE;
5067 case XML_FINISHED:
5068 return XML_ERROR_ABORTED;
5069 default:
5070 s = next;
5071 tok = XmlPrologTok(enc, s, end, &next);
5072 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005073 }
5074 /* not reached */
5075}
5076
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005077static enum XML_Error PTRCALL
5078epilogProcessor(XML_Parser parser,
5079 const char *s,
5080 const char *end,
5081 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005082{
5083 processor = epilogProcessor;
5084 eventPtr = s;
5085 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005086 const char *next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005087 int tok = XmlPrologTok(encoding, s, end, &next);
5088 eventEndPtr = next;
5089 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005090 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005091 case -XML_TOK_PROLOG_S:
5092 if (defaultHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005093 reportDefault(parser, encoding, s, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005094 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005095 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005096 }
Fred Drake31d485c2004-08-03 07:06:22 +00005097 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005098 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005099 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005100 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005101 return XML_ERROR_NONE;
5102 case XML_TOK_PROLOG_S:
5103 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005104 reportDefault(parser, encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005105 break;
5106 case XML_TOK_PI:
5107 if (!reportProcessingInstruction(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005108 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005109 break;
5110 case XML_TOK_COMMENT:
5111 if (!reportComment(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005112 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005113 break;
5114 case XML_TOK_INVALID:
5115 eventPtr = next;
5116 return XML_ERROR_INVALID_TOKEN;
5117 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005118 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005119 *nextPtr = s;
5120 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005121 }
5122 return XML_ERROR_UNCLOSED_TOKEN;
5123 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005124 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005125 *nextPtr = s;
5126 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005127 }
5128 return XML_ERROR_PARTIAL_CHAR;
5129 default:
5130 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5131 }
5132 eventPtr = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005133 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005134 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005135 *nextPtr = next;
5136 return XML_ERROR_NONE;
5137 case XML_FINISHED:
5138 return XML_ERROR_ABORTED;
5139 default: ;
5140 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005141 }
5142}
5143
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005144static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00005145processInternalEntity(XML_Parser parser, ENTITY *entity,
5146 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005147{
Fred Drake31d485c2004-08-03 07:06:22 +00005148 const char *textStart, *textEnd;
5149 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005150 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005151 OPEN_INTERNAL_ENTITY *openEntity;
5152
5153 if (freeInternalEntities) {
5154 openEntity = freeInternalEntities;
5155 freeInternalEntities = openEntity->next;
5156 }
5157 else {
5158 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
5159 if (!openEntity)
5160 return XML_ERROR_NO_MEMORY;
5161 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005162 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005163 entity->processed = 0;
5164 openEntity->next = openInternalEntities;
5165 openInternalEntities = openEntity;
5166 openEntity->entity = entity;
5167 openEntity->startTagLevel = tagLevel;
5168 openEntity->betweenDecl = betweenDecl;
5169 openEntity->internalEventPtr = NULL;
5170 openEntity->internalEventEndPtr = NULL;
5171 textStart = (char *)entity->textPtr;
5172 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005173 /* Set a safe default value in case 'next' does not get set */
5174 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005175
5176#ifdef XML_DTD
5177 if (entity->is_param) {
5178 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005179 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005180 next, &next, XML_FALSE);
5181 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005182 else
Fred Drake31d485c2004-08-03 07:06:22 +00005183#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005184 result = doContent(parser, tagLevel, internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00005185 textEnd, &next, XML_FALSE);
5186
5187 if (result == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005188 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
5189 entity->processed = (int)(next - textStart);
Fred Drake31d485c2004-08-03 07:06:22 +00005190 processor = internalEntityProcessor;
5191 }
5192 else {
5193 entity->open = XML_FALSE;
5194 openInternalEntities = openEntity->next;
5195 /* put openEntity back in list of free instances */
5196 openEntity->next = freeInternalEntities;
5197 freeInternalEntities = openEntity;
5198 }
5199 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005200 return result;
5201}
5202
Fred Drake31d485c2004-08-03 07:06:22 +00005203static enum XML_Error PTRCALL
5204internalEntityProcessor(XML_Parser parser,
5205 const char *s,
5206 const char *end,
5207 const char **nextPtr)
5208{
5209 ENTITY *entity;
5210 const char *textStart, *textEnd;
5211 const char *next;
5212 enum XML_Error result;
5213 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
5214 if (!openEntity)
5215 return XML_ERROR_UNEXPECTED_STATE;
5216
5217 entity = openEntity->entity;
5218 textStart = ((char *)entity->textPtr) + entity->processed;
5219 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005220 /* Set a safe default value in case 'next' does not get set */
5221 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005222
5223#ifdef XML_DTD
5224 if (entity->is_param) {
5225 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005226 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005227 next, &next, XML_FALSE);
5228 }
5229 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005230#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005231 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
5232 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005233
5234 if (result != XML_ERROR_NONE)
5235 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005236 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
5237 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005238 return result;
5239 }
5240 else {
5241 entity->open = XML_FALSE;
5242 openInternalEntities = openEntity->next;
5243 /* put openEntity back in list of free instances */
5244 openEntity->next = freeInternalEntities;
5245 freeInternalEntities = openEntity;
5246 }
5247
5248#ifdef XML_DTD
5249 if (entity->is_param) {
5250 int tok;
5251 processor = prologProcessor;
5252 tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005253 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005254 (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00005255 }
5256 else
5257#endif /* XML_DTD */
5258 {
5259 processor = contentProcessor;
5260 /* see externalEntityContentProcessor vs contentProcessor */
5261 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005262 nextPtr, (XML_Bool)!ps_finalBuffer);
5263 }
Fred Drake31d485c2004-08-03 07:06:22 +00005264}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005265
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005266static enum XML_Error PTRCALL
5267errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02005268 const char *UNUSED_P(s),
5269 const char *UNUSED_P(end),
5270 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005271{
5272 return errorCode;
5273}
5274
5275static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005276storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5277 const char *ptr, const char *end,
5278 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005279{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005280 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5281 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005282 if (result)
5283 return result;
5284 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5285 poolChop(pool);
5286 if (!poolAppendChar(pool, XML_T('\0')))
5287 return XML_ERROR_NO_MEMORY;
5288 return XML_ERROR_NONE;
5289}
5290
5291static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005292appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5293 const char *ptr, const char *end,
5294 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005295{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005296 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005297 for (;;) {
5298 const char *next;
5299 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5300 switch (tok) {
5301 case XML_TOK_NONE:
5302 return XML_ERROR_NONE;
5303 case XML_TOK_INVALID:
5304 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005305 eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005306 return XML_ERROR_INVALID_TOKEN;
5307 case XML_TOK_PARTIAL:
5308 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005309 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005310 return XML_ERROR_INVALID_TOKEN;
5311 case XML_TOK_CHAR_REF:
5312 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005313 XML_Char buf[XML_ENCODE_MAX];
5314 int i;
5315 int n = XmlCharRefNumber(enc, ptr);
5316 if (n < 0) {
5317 if (enc == encoding)
5318 eventPtr = ptr;
5319 return XML_ERROR_BAD_CHAR_REF;
5320 }
5321 if (!isCdata
5322 && n == 0x20 /* space */
5323 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5324 break;
5325 n = XmlEncode(n, (ICHAR *)buf);
5326 if (!n) {
5327 if (enc == encoding)
5328 eventPtr = ptr;
5329 return XML_ERROR_BAD_CHAR_REF;
5330 }
5331 for (i = 0; i < n; i++) {
5332 if (!poolAppendChar(pool, buf[i]))
5333 return XML_ERROR_NO_MEMORY;
5334 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005335 }
5336 break;
5337 case XML_TOK_DATA_CHARS:
5338 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005339 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005340 break;
5341 case XML_TOK_TRAILING_CR:
5342 next = ptr + enc->minBytesPerChar;
5343 /* fall through */
5344 case XML_TOK_ATTRIBUTE_VALUE_S:
5345 case XML_TOK_DATA_NEWLINE:
5346 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005347 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005348 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005349 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005350 break;
5351 case XML_TOK_ENTITY_REF:
5352 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005353 const XML_Char *name;
5354 ENTITY *entity;
5355 char checkEntityDecl;
5356 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5357 ptr + enc->minBytesPerChar,
5358 next - enc->minBytesPerChar);
5359 if (ch) {
5360 if (!poolAppendChar(pool, ch))
5361 return XML_ERROR_NO_MEMORY;
5362 break;
5363 }
5364 name = poolStoreString(&temp2Pool, enc,
5365 ptr + enc->minBytesPerChar,
5366 next - enc->minBytesPerChar);
5367 if (!name)
5368 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005369 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005370 poolDiscard(&temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005371 /* First, determine if a check for an existing declaration is needed;
5372 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005373 */
5374 if (pool == &dtd->pool) /* are we called from prolog? */
5375 checkEntityDecl =
5376#ifdef XML_DTD
5377 prologState.documentEntity &&
5378#endif /* XML_DTD */
5379 (dtd->standalone
5380 ? !openInternalEntities
5381 : !dtd->hasParamEntityRefs);
5382 else /* if (pool == &tempPool): we are called from content */
5383 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5384 if (checkEntityDecl) {
5385 if (!entity)
5386 return XML_ERROR_UNDEFINED_ENTITY;
5387 else if (!entity->is_internal)
5388 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5389 }
5390 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005391 /* Cannot report skipped entity here - see comments on
5392 skippedEntityHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005393 if (skippedEntityHandler)
5394 skippedEntityHandler(handlerArg, name, 0);
5395 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005396 /* Cannot call the default handler because this would be
5397 out of sync with the call to the startElementHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005398 if ((pool == &tempPool) && defaultHandler)
5399 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005400 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005401 break;
5402 }
5403 if (entity->open) {
5404 if (enc == encoding)
5405 eventPtr = ptr;
5406 return XML_ERROR_RECURSIVE_ENTITY_REF;
5407 }
5408 if (entity->notation) {
5409 if (enc == encoding)
5410 eventPtr = ptr;
5411 return XML_ERROR_BINARY_ENTITY_REF;
5412 }
5413 if (!entity->textPtr) {
5414 if (enc == encoding)
5415 eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005416 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005417 }
5418 else {
5419 enum XML_Error result;
5420 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5421 entity->open = XML_TRUE;
5422 result = appendAttributeValue(parser, internalEncoding, isCdata,
5423 (char *)entity->textPtr,
5424 (char *)textEnd, pool);
5425 entity->open = XML_FALSE;
5426 if (result)
5427 return result;
5428 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005429 }
5430 break;
5431 default:
5432 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005433 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005434 return XML_ERROR_UNEXPECTED_STATE;
5435 }
5436 ptr = next;
5437 }
5438 /* not reached */
5439}
5440
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005441static enum XML_Error
5442storeEntityValue(XML_Parser parser,
5443 const ENCODING *enc,
5444 const char *entityTextPtr,
5445 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005446{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005447 DTD * const dtd = _dtd; /* save one level of indirection */
5448 STRING_POOL *pool = &(dtd->entityValuePool);
5449 enum XML_Error result = XML_ERROR_NONE;
5450#ifdef XML_DTD
5451 int oldInEntityValue = prologState.inEntityValue;
5452 prologState.inEntityValue = 1;
5453#endif /* XML_DTD */
5454 /* never return Null for the value argument in EntityDeclHandler,
5455 since this would indicate an external entity; therefore we
5456 have to make sure that entityValuePool.start is not null */
5457 if (!pool->blocks) {
5458 if (!poolGrow(pool))
5459 return XML_ERROR_NO_MEMORY;
5460 }
5461
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005462 for (;;) {
5463 const char *next;
5464 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5465 switch (tok) {
5466 case XML_TOK_PARAM_ENTITY_REF:
5467#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005468 if (isParamEntity || enc != encoding) {
5469 const XML_Char *name;
5470 ENTITY *entity;
5471 name = poolStoreString(&tempPool, enc,
5472 entityTextPtr + enc->minBytesPerChar,
5473 next - enc->minBytesPerChar);
5474 if (!name) {
5475 result = XML_ERROR_NO_MEMORY;
5476 goto endEntityValue;
5477 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005478 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005479 poolDiscard(&tempPool);
5480 if (!entity) {
5481 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5482 /* cannot report skipped entity here - see comments on
5483 skippedEntityHandler
5484 if (skippedEntityHandler)
5485 skippedEntityHandler(handlerArg, name, 0);
5486 */
5487 dtd->keepProcessing = dtd->standalone;
5488 goto endEntityValue;
5489 }
5490 if (entity->open) {
5491 if (enc == encoding)
5492 eventPtr = entityTextPtr;
5493 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5494 goto endEntityValue;
5495 }
5496 if (entity->systemId) {
5497 if (externalEntityRefHandler) {
5498 dtd->paramEntityRead = XML_FALSE;
5499 entity->open = XML_TRUE;
5500 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5501 0,
5502 entity->base,
5503 entity->systemId,
5504 entity->publicId)) {
5505 entity->open = XML_FALSE;
5506 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5507 goto endEntityValue;
5508 }
5509 entity->open = XML_FALSE;
5510 if (!dtd->paramEntityRead)
5511 dtd->keepProcessing = dtd->standalone;
5512 }
5513 else
5514 dtd->keepProcessing = dtd->standalone;
5515 }
5516 else {
5517 entity->open = XML_TRUE;
5518 result = storeEntityValue(parser,
5519 internalEncoding,
5520 (char *)entity->textPtr,
5521 (char *)(entity->textPtr
5522 + entity->textLen));
5523 entity->open = XML_FALSE;
5524 if (result)
5525 goto endEntityValue;
5526 }
5527 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005528 }
5529#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005530 /* In the internal subset, PE references are not legal
5531 within markup declarations, e.g entity values in this case. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005532 eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005533 result = XML_ERROR_PARAM_ENTITY_REF;
5534 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005535 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005536 result = XML_ERROR_NONE;
5537 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005538 case XML_TOK_ENTITY_REF:
5539 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005540 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5541 result = XML_ERROR_NO_MEMORY;
5542 goto endEntityValue;
5543 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005544 break;
5545 case XML_TOK_TRAILING_CR:
5546 next = entityTextPtr + enc->minBytesPerChar;
5547 /* fall through */
5548 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005549 if (pool->end == pool->ptr && !poolGrow(pool)) {
5550 result = XML_ERROR_NO_MEMORY;
5551 goto endEntityValue;
5552 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005553 *(pool->ptr)++ = 0xA;
5554 break;
5555 case XML_TOK_CHAR_REF:
5556 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005557 XML_Char buf[XML_ENCODE_MAX];
5558 int i;
5559 int n = XmlCharRefNumber(enc, entityTextPtr);
5560 if (n < 0) {
5561 if (enc == encoding)
5562 eventPtr = entityTextPtr;
5563 result = XML_ERROR_BAD_CHAR_REF;
5564 goto endEntityValue;
5565 }
5566 n = XmlEncode(n, (ICHAR *)buf);
5567 if (!n) {
5568 if (enc == encoding)
5569 eventPtr = entityTextPtr;
5570 result = XML_ERROR_BAD_CHAR_REF;
5571 goto endEntityValue;
5572 }
5573 for (i = 0; i < n; i++) {
5574 if (pool->end == pool->ptr && !poolGrow(pool)) {
5575 result = XML_ERROR_NO_MEMORY;
5576 goto endEntityValue;
5577 }
5578 *(pool->ptr)++ = buf[i];
5579 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005580 }
5581 break;
5582 case XML_TOK_PARTIAL:
5583 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005584 eventPtr = entityTextPtr;
5585 result = XML_ERROR_INVALID_TOKEN;
5586 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005587 case XML_TOK_INVALID:
5588 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005589 eventPtr = next;
5590 result = XML_ERROR_INVALID_TOKEN;
5591 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005592 default:
5593 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005594 eventPtr = entityTextPtr;
5595 result = XML_ERROR_UNEXPECTED_STATE;
5596 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005597 }
5598 entityTextPtr = next;
5599 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005600endEntityValue:
5601#ifdef XML_DTD
5602 prologState.inEntityValue = oldInEntityValue;
5603#endif /* XML_DTD */
5604 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005605}
5606
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005607static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005608normalizeLines(XML_Char *s)
5609{
5610 XML_Char *p;
5611 for (;; s++) {
5612 if (*s == XML_T('\0'))
5613 return;
5614 if (*s == 0xD)
5615 break;
5616 }
5617 p = s;
5618 do {
5619 if (*s == 0xD) {
5620 *p++ = 0xA;
5621 if (*++s == 0xA)
5622 s++;
5623 }
5624 else
5625 *p++ = *s++;
5626 } while (*s);
5627 *p = XML_T('\0');
5628}
5629
5630static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005631reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5632 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005633{
5634 const XML_Char *target;
5635 XML_Char *data;
5636 const char *tem;
5637 if (!processingInstructionHandler) {
5638 if (defaultHandler)
5639 reportDefault(parser, enc, start, end);
5640 return 1;
5641 }
5642 start += enc->minBytesPerChar * 2;
5643 tem = start + XmlNameLength(enc, start);
5644 target = poolStoreString(&tempPool, enc, start, tem);
5645 if (!target)
5646 return 0;
5647 poolFinish(&tempPool);
5648 data = poolStoreString(&tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005649 XmlSkipS(enc, tem),
5650 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005651 if (!data)
5652 return 0;
5653 normalizeLines(data);
5654 processingInstructionHandler(handlerArg, target, data);
5655 poolClear(&tempPool);
5656 return 1;
5657}
5658
5659static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005660reportComment(XML_Parser parser, const ENCODING *enc,
5661 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005662{
5663 XML_Char *data;
5664 if (!commentHandler) {
5665 if (defaultHandler)
5666 reportDefault(parser, enc, start, end);
5667 return 1;
5668 }
5669 data = poolStoreString(&tempPool,
5670 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005671 start + enc->minBytesPerChar * 4,
5672 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005673 if (!data)
5674 return 0;
5675 normalizeLines(data);
5676 commentHandler(handlerArg, data);
5677 poolClear(&tempPool);
5678 return 1;
5679}
5680
5681static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005682reportDefault(XML_Parser parser, const ENCODING *enc,
5683 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005684{
5685 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005686 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005687 const char **eventPP;
5688 const char **eventEndPP;
5689 if (enc == encoding) {
5690 eventPP = &eventPtr;
5691 eventEndPP = &eventEndPtr;
5692 }
5693 else {
5694 eventPP = &(openInternalEntities->internalEventPtr);
5695 eventEndPP = &(openInternalEntities->internalEventEndPtr);
5696 }
5697 do {
5698 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02005699 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005700 *eventEndPP = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005701 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005702 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02005703 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005704 }
5705 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005706 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005707}
5708
5709
5710static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005711defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5712 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005713{
5714 DEFAULT_ATTRIBUTE *att;
5715 if (value || isId) {
5716 /* The handling of default attributes gets messed up if we have
5717 a default which duplicates a non-default. */
5718 int i;
5719 for (i = 0; i < type->nDefaultAtts; i++)
5720 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005721 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005722 if (isId && !type->idAtt && !attId->xmlns)
5723 type->idAtt = attId;
5724 }
5725 if (type->nDefaultAtts == type->allocDefaultAtts) {
5726 if (type->allocDefaultAtts == 0) {
5727 type->allocDefaultAtts = 8;
Fred Drake08317ae2003-10-21 15:38:55 +00005728 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005729 * sizeof(DEFAULT_ATTRIBUTE));
5730 if (!type->defaultAtts)
5731 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005732 }
5733 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005734 DEFAULT_ATTRIBUTE *temp;
5735 int count = type->allocDefaultAtts * 2;
5736 temp = (DEFAULT_ATTRIBUTE *)
5737 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
5738 if (temp == NULL)
5739 return 0;
5740 type->allocDefaultAtts = count;
5741 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005742 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005743 }
5744 att = type->defaultAtts + type->nDefaultAtts;
5745 att->id = attId;
5746 att->value = value;
5747 att->isCdata = isCdata;
5748 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005749 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005750 type->nDefaultAtts += 1;
5751 return 1;
5752}
5753
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005754static int
5755setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005756{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005757 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005758 const XML_Char *name;
5759 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005760 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005761 PREFIX *prefix;
5762 const XML_Char *s;
5763 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005764 if (!poolAppendChar(&dtd->pool, *s))
5765 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005766 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005767 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5768 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005769 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005770 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005771 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005772 return 0;
5773 if (prefix->name == poolStart(&dtd->pool))
5774 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005775 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005776 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005777 elementType->prefix = prefix;
5778
5779 }
5780 }
5781 return 1;
5782}
5783
5784static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005785getAttributeId(XML_Parser parser, const ENCODING *enc,
5786 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005787{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005788 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005789 ATTRIBUTE_ID *id;
5790 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005791 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5792 return NULL;
5793 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005794 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005795 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00005796 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005797 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005798 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005799 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005800 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005801 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005802 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005803 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005804 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005805 if (!ns)
5806 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005807 else if (name[0] == XML_T(ASCII_x)
5808 && name[1] == XML_T(ASCII_m)
5809 && name[2] == XML_T(ASCII_l)
5810 && name[3] == XML_T(ASCII_n)
5811 && name[4] == XML_T(ASCII_s)
5812 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005813 if (name[5] == XML_T('\0'))
5814 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005815 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005816 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005817 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005818 }
5819 else {
5820 int i;
5821 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00005822 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005823 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005824 int j;
5825 for (j = 0; j < i; j++) {
5826 if (!poolAppendChar(&dtd->pool, name[j]))
5827 return NULL;
5828 }
5829 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
5830 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005831 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005832 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07005833 if (!id->prefix)
5834 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005835 if (id->prefix->name == poolStart(&dtd->pool))
5836 poolFinish(&dtd->pool);
5837 else
5838 poolDiscard(&dtd->pool);
5839 break;
5840 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005841 }
5842 }
5843 }
5844 return id;
5845}
5846
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005847#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005848
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005849static const XML_Char *
5850getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005851{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005852 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005853 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005854 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005855
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005856 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005857 int i;
5858 int len;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005859 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005860 return NULL;
5861 len = dtd->defaultPrefix.binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005862 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005863 len--;
5864 for (i = 0; i < len; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005865 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
5866 return NULL;
5867 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005868 }
5869
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005870 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005871 for (;;) {
5872 int i;
5873 int len;
5874 const XML_Char *s;
5875 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
5876 if (!prefix)
5877 break;
5878 if (!prefix->binding)
5879 continue;
5880 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005881 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005882 for (s = prefix->name; *s; s++)
5883 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005884 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005885 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005886 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005887 len = prefix->binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005888 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005889 len--;
5890 for (i = 0; i < len; i++)
5891 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005892 return NULL;
5893 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005894 }
5895
5896
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005897 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005898 for (;;) {
5899 const XML_Char *s;
5900 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
5901 if (!e)
5902 break;
5903 if (!e->open)
5904 continue;
5905 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005906 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005907 for (s = e->name; *s; s++)
5908 if (!poolAppendChar(&tempPool, *s))
5909 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005910 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005911 }
5912
5913 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005914 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005915 return tempPool.start;
5916}
5917
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005918static XML_Bool
5919setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005920{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005921 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005922 const XML_Char *s = context;
5923
5924 while (*context != XML_T('\0')) {
5925 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5926 ENTITY *e;
5927 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005928 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005929 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005930 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005931 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005932 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005933 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005934 context = s;
5935 poolDiscard(&tempPool);
5936 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005937 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005938 PREFIX *prefix;
5939 if (poolLength(&tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005940 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005941 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005942 if (!poolAppendChar(&tempPool, XML_T('\0')))
5943 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005944 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005945 sizeof(PREFIX));
5946 if (!prefix)
5947 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005948 if (prefix->name == poolStart(&tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005949 prefix->name = poolCopyString(&dtd->pool, prefix->name);
5950 if (!prefix->name)
5951 return XML_FALSE;
5952 }
5953 poolDiscard(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005954 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005955 for (context = s + 1;
5956 *context != CONTEXT_SEP && *context != XML_T('\0');
5957 context++)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005958 if (!poolAppendChar(&tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005959 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005960 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005961 return XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00005962 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005963 &inheritedBindings) != XML_ERROR_NONE)
5964 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005965 poolDiscard(&tempPool);
5966 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005967 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005968 s = context;
5969 }
5970 else {
5971 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005972 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005973 s++;
5974 }
5975 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005976 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005977}
5978
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005979static void FASTCALL
5980normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005981{
5982 XML_Char *p = publicId;
5983 XML_Char *s;
5984 for (s = publicId; *s; s++) {
5985 switch (*s) {
5986 case 0x20:
5987 case 0xD:
5988 case 0xA:
5989 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005990 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005991 break;
5992 default:
5993 *p++ = *s;
5994 }
5995 }
5996 if (p != publicId && p[-1] == 0x20)
5997 --p;
5998 *p = XML_T('\0');
5999}
6000
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006001static DTD *
6002dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006003{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006004 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6005 if (p == NULL)
6006 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006007 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006008 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006009 hashTableInit(&(p->generalEntities), ms);
6010 hashTableInit(&(p->elementTypes), ms);
6011 hashTableInit(&(p->attributeIds), ms);
6012 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006013#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006014 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006015 hashTableInit(&(p->paramEntities), ms);
6016#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006017 p->defaultPrefix.name = NULL;
6018 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006019
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006020 p->in_eldecl = XML_FALSE;
6021 p->scaffIndex = NULL;
6022 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006023 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006024 p->scaffSize = 0;
6025 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006026 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006027
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006028 p->keepProcessing = XML_TRUE;
6029 p->hasParamEntityRefs = XML_FALSE;
6030 p->standalone = XML_FALSE;
6031 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006032}
6033
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006034static void
6035dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006036{
6037 HASH_TABLE_ITER iter;
6038 hashTableIterInit(&iter, &(p->elementTypes));
6039 for (;;) {
6040 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6041 if (!e)
6042 break;
6043 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006044 ms->free_fcn(e->defaultAtts);
6045 }
6046 hashTableClear(&(p->generalEntities));
6047#ifdef XML_DTD
6048 p->paramEntityRead = XML_FALSE;
6049 hashTableClear(&(p->paramEntities));
6050#endif /* XML_DTD */
6051 hashTableClear(&(p->elementTypes));
6052 hashTableClear(&(p->attributeIds));
6053 hashTableClear(&(p->prefixes));
6054 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006055 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006056 p->defaultPrefix.name = NULL;
6057 p->defaultPrefix.binding = NULL;
6058
6059 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006060
6061 ms->free_fcn(p->scaffIndex);
6062 p->scaffIndex = NULL;
6063 ms->free_fcn(p->scaffold);
6064 p->scaffold = NULL;
6065
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006066 p->scaffLevel = 0;
6067 p->scaffSize = 0;
6068 p->scaffCount = 0;
6069 p->contentStringLen = 0;
6070
6071 p->keepProcessing = XML_TRUE;
6072 p->hasParamEntityRefs = XML_FALSE;
6073 p->standalone = XML_FALSE;
6074}
6075
6076static void
6077dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6078{
6079 HASH_TABLE_ITER iter;
6080 hashTableIterInit(&iter, &(p->elementTypes));
6081 for (;;) {
6082 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6083 if (!e)
6084 break;
6085 if (e->allocDefaultAtts != 0)
6086 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006087 }
6088 hashTableDestroy(&(p->generalEntities));
6089#ifdef XML_DTD
6090 hashTableDestroy(&(p->paramEntities));
6091#endif /* XML_DTD */
6092 hashTableDestroy(&(p->elementTypes));
6093 hashTableDestroy(&(p->attributeIds));
6094 hashTableDestroy(&(p->prefixes));
6095 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006096 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006097 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006098 ms->free_fcn(p->scaffIndex);
6099 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006100 }
6101 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006102}
6103
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006104/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6105 The new DTD has already been initialized.
6106*/
6107static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006108dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006109{
6110 HASH_TABLE_ITER iter;
6111
6112 /* Copy the prefix table. */
6113
6114 hashTableIterInit(&iter, &(oldDtd->prefixes));
6115 for (;;) {
6116 const XML_Char *name;
6117 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6118 if (!oldP)
6119 break;
6120 name = poolCopyString(&(newDtd->pool), oldP->name);
6121 if (!name)
6122 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006123 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006124 return 0;
6125 }
6126
6127 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6128
6129 /* Copy the attribute id table. */
6130
6131 for (;;) {
6132 ATTRIBUTE_ID *newA;
6133 const XML_Char *name;
6134 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6135
6136 if (!oldA)
6137 break;
6138 /* Remember to allocate the scratch byte before the name. */
6139 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6140 return 0;
6141 name = poolCopyString(&(newDtd->pool), oldA->name);
6142 if (!name)
6143 return 0;
6144 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006145 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006146 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006147 if (!newA)
6148 return 0;
6149 newA->maybeTokenized = oldA->maybeTokenized;
6150 if (oldA->prefix) {
6151 newA->xmlns = oldA->xmlns;
6152 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006153 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006154 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006155 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006156 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006157 }
6158 }
6159
6160 /* Copy the element type table. */
6161
6162 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6163
6164 for (;;) {
6165 int i;
6166 ELEMENT_TYPE *newE;
6167 const XML_Char *name;
6168 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6169 if (!oldE)
6170 break;
6171 name = poolCopyString(&(newDtd->pool), oldE->name);
6172 if (!name)
6173 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006174 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006175 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006176 if (!newE)
6177 return 0;
6178 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006179 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6180 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6181 if (!newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006182 return 0;
6183 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006184 }
6185 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006186 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006187 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006188 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6189 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006190 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006191 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006192 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006193 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006194 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006195 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6196 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006197 newE->defaultAtts[i].value
6198 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6199 if (!newE->defaultAtts[i].value)
6200 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006201 }
6202 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006203 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006204 }
6205 }
6206
6207 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006208 if (!copyEntityTable(oldParser,
6209 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006210 &(newDtd->pool),
6211 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006212 return 0;
6213
6214#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006215 if (!copyEntityTable(oldParser,
6216 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006217 &(newDtd->pool),
6218 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006219 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006220 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006221#endif /* XML_DTD */
6222
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006223 newDtd->keepProcessing = oldDtd->keepProcessing;
6224 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006225 newDtd->standalone = oldDtd->standalone;
6226
6227 /* Don't want deep copying for scaffolding */
6228 newDtd->in_eldecl = oldDtd->in_eldecl;
6229 newDtd->scaffold = oldDtd->scaffold;
6230 newDtd->contentStringLen = oldDtd->contentStringLen;
6231 newDtd->scaffSize = oldDtd->scaffSize;
6232 newDtd->scaffLevel = oldDtd->scaffLevel;
6233 newDtd->scaffIndex = oldDtd->scaffIndex;
6234
6235 return 1;
6236} /* End dtdCopy */
6237
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006238static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006239copyEntityTable(XML_Parser oldParser,
6240 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006241 STRING_POOL *newPool,
6242 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006243{
6244 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006245 const XML_Char *cachedOldBase = NULL;
6246 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006247
6248 hashTableIterInit(&iter, oldTable);
6249
6250 for (;;) {
6251 ENTITY *newE;
6252 const XML_Char *name;
6253 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6254 if (!oldE)
6255 break;
6256 name = poolCopyString(newPool, oldE->name);
6257 if (!name)
6258 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006259 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006260 if (!newE)
6261 return 0;
6262 if (oldE->systemId) {
6263 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6264 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006265 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006266 newE->systemId = tem;
6267 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006268 if (oldE->base == cachedOldBase)
6269 newE->base = cachedNewBase;
6270 else {
6271 cachedOldBase = oldE->base;
6272 tem = poolCopyString(newPool, cachedOldBase);
6273 if (!tem)
6274 return 0;
6275 cachedNewBase = newE->base = tem;
6276 }
6277 }
6278 if (oldE->publicId) {
6279 tem = poolCopyString(newPool, oldE->publicId);
6280 if (!tem)
6281 return 0;
6282 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006283 }
6284 }
6285 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006286 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6287 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006288 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006289 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006290 newE->textPtr = tem;
6291 newE->textLen = oldE->textLen;
6292 }
6293 if (oldE->notation) {
6294 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6295 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006296 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006297 newE->notation = tem;
6298 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006299 newE->is_param = oldE->is_param;
6300 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301 }
6302 return 1;
6303}
6304
Fred Drake08317ae2003-10-21 15:38:55 +00006305#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006306
Fred Drake08317ae2003-10-21 15:38:55 +00006307static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006308keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006309{
6310 for (; *s1 == *s2; s1++, s2++)
6311 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006312 return XML_TRUE;
6313 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006314}
6315
Victor Stinner5ff71322017-06-21 14:39:22 +02006316static size_t
6317keylen(KEY s)
6318{
6319 size_t len = 0;
6320 for (; *s; s++, len++);
6321 return len;
6322}
6323
6324static void
6325copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6326{
6327 key->k[0] = 0;
6328 key->k[1] = get_hash_secret_salt(parser);
6329}
6330
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006331static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006332hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006333{
Victor Stinner5ff71322017-06-21 14:39:22 +02006334 struct siphash state;
6335 struct sipkey key;
6336 (void)sip_tobin;
6337 (void)sip24_valid;
6338 copy_salt_to_sipkey(parser, &key);
6339 sip24_init(&state, &key);
6340 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6341 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006342}
6343
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006344static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006345lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006346{
6347 size_t i;
6348 if (table->size == 0) {
6349 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006350 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006351 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006352 table->power = INIT_POWER;
6353 /* table->size is a power of 2 */
6354 table->size = (size_t)1 << INIT_POWER;
6355 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006356 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006357 if (!table->v) {
6358 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006359 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006360 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006361 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006362 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006363 }
6364 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006365 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006366 unsigned long mask = (unsigned long)table->size - 1;
6367 unsigned char step = 0;
6368 i = h & mask;
6369 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006370 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006371 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006372 if (!step)
6373 step = PROBE_STEP(h, mask, table->power);
6374 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006375 }
6376 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006377 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006378
6379 /* check for overflow (table is half full) */
6380 if (table->used >> (table->power - 1)) {
6381 unsigned char newPower = table->power + 1;
6382 size_t newSize = (size_t)1 << newPower;
6383 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006384 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006385 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006386 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006387 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006388 memset(newV, 0, tsize);
6389 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006390 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006391 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006392 size_t j = newHash & newMask;
6393 step = 0;
6394 while (newV[j]) {
6395 if (!step)
6396 step = PROBE_STEP(newHash, newMask, newPower);
6397 j < step ? (j += newSize - step) : (j -= step);
6398 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006399 newV[j] = table->v[i];
6400 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006401 table->mem->free_fcn(table->v);
6402 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006403 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006404 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006405 i = h & newMask;
6406 step = 0;
6407 while (table->v[i]) {
6408 if (!step)
6409 step = PROBE_STEP(h, newMask, newPower);
6410 i < step ? (i += newSize - step) : (i -= step);
6411 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006412 }
6413 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006414 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006415 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006416 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006417 memset(table->v[i], 0, createSize);
6418 table->v[i]->name = name;
6419 (table->used)++;
6420 return table->v[i];
6421}
6422
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006423static void FASTCALL
6424hashTableClear(HASH_TABLE *table)
6425{
6426 size_t i;
6427 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006428 table->mem->free_fcn(table->v[i]);
6429 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006430 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006431 table->used = 0;
6432}
6433
6434static void FASTCALL
6435hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006436{
6437 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006438 for (i = 0; i < table->size; i++)
6439 table->mem->free_fcn(table->v[i]);
6440 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006441}
6442
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006443static void FASTCALL
6444hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006445{
Fred Drake08317ae2003-10-21 15:38:55 +00006446 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006447 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006448 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006449 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006450 p->mem = ms;
6451}
6452
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006453static void FASTCALL
6454hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006455{
6456 iter->p = table->v;
6457 iter->end = iter->p + table->size;
6458}
6459
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006460static NAMED * FASTCALL
6461hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006462{
6463 while (iter->p != iter->end) {
6464 NAMED *tem = *(iter->p)++;
6465 if (tem)
6466 return tem;
6467 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006468 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006469}
6470
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006471static void FASTCALL
6472poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006473{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006474 pool->blocks = NULL;
6475 pool->freeBlocks = NULL;
6476 pool->start = NULL;
6477 pool->ptr = NULL;
6478 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006479 pool->mem = ms;
6480}
6481
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006482static void FASTCALL
6483poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006484{
6485 if (!pool->freeBlocks)
6486 pool->freeBlocks = pool->blocks;
6487 else {
6488 BLOCK *p = pool->blocks;
6489 while (p) {
6490 BLOCK *tem = p->next;
6491 p->next = pool->freeBlocks;
6492 pool->freeBlocks = p;
6493 p = tem;
6494 }
6495 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006496 pool->blocks = NULL;
6497 pool->start = NULL;
6498 pool->ptr = NULL;
6499 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006500}
6501
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006502static void FASTCALL
6503poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006504{
6505 BLOCK *p = pool->blocks;
6506 while (p) {
6507 BLOCK *tem = p->next;
6508 pool->mem->free_fcn(p);
6509 p = tem;
6510 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006511 p = pool->freeBlocks;
6512 while (p) {
6513 BLOCK *tem = p->next;
6514 pool->mem->free_fcn(p);
6515 p = tem;
6516 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006517}
6518
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006519static XML_Char *
6520poolAppend(STRING_POOL *pool, const ENCODING *enc,
6521 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006522{
6523 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006524 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006525 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006526 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6527 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006528 break;
6529 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006530 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006531 }
6532 return pool->start;
6533}
6534
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006535static const XML_Char * FASTCALL
6536poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006537{
6538 do {
6539 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006540 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006541 } while (*s++);
6542 s = pool->start;
6543 poolFinish(pool);
6544 return s;
6545}
6546
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006547static const XML_Char *
6548poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006549{
6550 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006551 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006552 for (; n > 0; --n, s++) {
6553 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006554 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006555 }
6556 s = pool->start;
6557 poolFinish(pool);
6558 return s;
6559}
6560
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006561static const XML_Char * FASTCALL
6562poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006563{
6564 while (*s) {
6565 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006566 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006567 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006568 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006569 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006570}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006571
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006572static XML_Char *
6573poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6574 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006575{
6576 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006577 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006578 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006579 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006580 *(pool->ptr)++ = 0;
6581 return pool->start;
6582}
6583
Victor Stinner5ff71322017-06-21 14:39:22 +02006584static size_t
6585poolBytesToAllocateFor(int blockSize)
6586{
6587 /* Unprotected math would be:
6588 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6589 **
6590 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6591 ** For a + b * c we check b * c in isolation first, so that addition of a
6592 ** on top has no chance of making us accept a small non-negative number
6593 */
6594 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6595
6596 if (blockSize <= 0)
6597 return 0;
6598
6599 if (blockSize > (int)(INT_MAX / stretch))
6600 return 0;
6601
6602 {
6603 const int stretchedBlockSize = blockSize * (int)stretch;
6604 const int bytesToAllocate = (int)(
6605 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6606 if (bytesToAllocate < 0)
6607 return 0;
6608
6609 return (size_t)bytesToAllocate;
6610 }
6611}
6612
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006613static XML_Bool FASTCALL
6614poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006615{
6616 if (pool->freeBlocks) {
6617 if (pool->start == 0) {
6618 pool->blocks = pool->freeBlocks;
6619 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006620 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006621 pool->start = pool->blocks->s;
6622 pool->end = pool->start + pool->blocks->size;
6623 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006624 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006625 }
6626 if (pool->end - pool->start < pool->freeBlocks->size) {
6627 BLOCK *tem = pool->freeBlocks->next;
6628 pool->freeBlocks->next = pool->blocks;
6629 pool->blocks = pool->freeBlocks;
6630 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006631 memcpy(pool->blocks->s, pool->start,
6632 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006633 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6634 pool->start = pool->blocks->s;
6635 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006636 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006637 }
6638 }
6639 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006640 BLOCK *temp;
6641 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02006642 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006643
6644 if (blockSize < 0)
6645 return XML_FALSE;
6646
Victor Stinner5ff71322017-06-21 14:39:22 +02006647 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6648 if (bytesToAllocate == 0)
6649 return XML_FALSE;
6650
Victor Stinner23ec4b52017-06-15 00:54:36 +02006651 temp = (BLOCK *)
Victor Stinner5ff71322017-06-21 14:39:22 +02006652 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006653 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006654 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006655 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006656 pool->blocks->size = blockSize;
6657 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6658 pool->start = pool->blocks->s;
6659 pool->end = pool->start + blockSize;
6660 }
6661 else {
6662 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006663 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02006664 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006665
6666 if (blockSize < 0)
6667 return XML_FALSE;
6668
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006669 if (blockSize < INIT_BLOCK_SIZE)
6670 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02006671 else {
6672 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
6673 if ((int)((unsigned)blockSize * 2U) < 0) {
6674 return XML_FALSE;
6675 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006676 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02006677 }
6678
6679 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6680 if (bytesToAllocate == 0)
6681 return XML_FALSE;
6682
6683 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006684 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006685 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006686 tem->size = blockSize;
6687 tem->next = pool->blocks;
6688 pool->blocks = tem;
6689 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006690 memcpy(tem->s, pool->start,
6691 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006692 pool->ptr = tem->s + (pool->ptr - pool->start);
6693 pool->start = tem->s;
6694 pool->end = tem->s + blockSize;
6695 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006696 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006697}
6698
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006699static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006700nextScaffoldPart(XML_Parser parser)
6701{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006702 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006703 CONTENT_SCAFFOLD * me;
6704 int next;
6705
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006706 if (!dtd->scaffIndex) {
6707 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
6708 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006709 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006710 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006711 }
6712
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006713 if (dtd->scaffCount >= dtd->scaffSize) {
6714 CONTENT_SCAFFOLD *temp;
6715 if (dtd->scaffold) {
6716 temp = (CONTENT_SCAFFOLD *)
6717 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
6718 if (temp == NULL)
6719 return -1;
6720 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006721 }
6722 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006723 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
6724 * sizeof(CONTENT_SCAFFOLD));
6725 if (temp == NULL)
6726 return -1;
6727 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006728 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006729 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006730 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006731 next = dtd->scaffCount++;
6732 me = &dtd->scaffold[next];
6733 if (dtd->scaffLevel) {
6734 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006735 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006736 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006737 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006738 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006739 parent->firstchild = next;
6740 parent->lastchild = next;
6741 parent->childcnt++;
6742 }
6743 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6744 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006745}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006746
6747static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006748build_node(XML_Parser parser,
6749 int src_node,
6750 XML_Content *dest,
6751 XML_Content **contpos,
6752 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006753{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006754 DTD * const dtd = _dtd; /* save one level of indirection */
6755 dest->type = dtd->scaffold[src_node].type;
6756 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006757 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006758 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006759 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006760 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006761 for (;;) {
6762 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006763 if (!*src)
6764 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006765 src++;
6766 }
6767 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006768 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006769 }
6770 else {
6771 unsigned int i;
6772 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006773 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006774 dest->children = *contpos;
6775 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006776 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
6777 i < dest->numchildren;
6778 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006779 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6780 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006781 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006782 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006783}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006784
6785static XML_Content *
6786build_model (XML_Parser parser)
6787{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006788 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006789 XML_Content *ret;
6790 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006791 XML_Char * str;
6792 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6793 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006794
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006795 ret = (XML_Content *)MALLOC(allocsize);
6796 if (!ret)
6797 return NULL;
6798
6799 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006800 cpos = &ret[1];
6801
6802 build_node(parser, 0, ret, &cpos, &str);
6803 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006804}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006805
6806static ELEMENT_TYPE *
6807getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006808 const ENCODING *enc,
6809 const char *ptr,
6810 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006811{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006812 DTD * const dtd = _dtd; /* save one level of indirection */
6813 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006814 ELEMENT_TYPE *ret;
6815
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006816 if (!name)
6817 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006818 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006819 if (!ret)
6820 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006821 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006822 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006823 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006824 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006825 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006826 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006827 }
6828 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006829}