blob: c4f3ffc215c9ef9b3f385a9fc6e3199236e99b5a [file] [log] [blame]
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001/* 19ac4776051591216f1874e34ee99b6a43a3784c8bd7d70efeb9258dd22b906a (2.2.6+)
Victor Stinner759e30e2017-09-05 01:58:08 +02002 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
Victor Stinner5ff71322017-06-21 14:39:22 +02008
Victor Stinner759e30e2017-09-05 01:58:08 +02009 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000031*/
32
Victor Stinner93d0cb52017-08-18 23:43:54 +020033#if !defined(_GNU_SOURCE)
34# define _GNU_SOURCE 1 /* syscall prototype */
35#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020036
Victor Stinner23ec4b52017-06-15 00:54:36 +020037#include <stddef.h>
38#include <string.h> /* memset(), memcpy() */
39#include <assert.h>
40#include <limits.h> /* UINT_MAX */
Victor Stinner5ff71322017-06-21 14:39:22 +020041#include <stdio.h> /* fprintf */
42#include <stdlib.h> /* getenv */
Victor Stinner23ec4b52017-06-15 00:54:36 +020043
Victor Stinner5ff71322017-06-21 14:39:22 +020044#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +020045#define getpid GetCurrentProcessId
46#else
47#include <sys/time.h> /* gettimeofday() */
48#include <sys/types.h> /* getpid() */
49#include <unistd.h> /* getpid() */
Victor Stinner93d0cb52017-08-18 23:43:54 +020050#include <fcntl.h> /* O_RDONLY */
51#include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020052#endif
53
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070054#define XML_BUILDING_EXPAT 1
55
Victor Stinner5ff71322017-06-21 14:39:22 +020056#ifdef _WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070057#include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070058#elif defined(HAVE_EXPAT_CONFIG_H)
59#include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020060#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010061
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070062#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000063#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020064#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000065
Victor Stinner93d0cb52017-08-18 23:43:54 +020066#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67# if defined(HAVE_GETRANDOM)
68# include <sys/random.h> /* getrandom */
69# else
70# include <unistd.h> /* syscall */
71# include <sys/syscall.h> /* SYS_getrandom */
72# endif
73# if ! defined(GRND_NONBLOCK)
74# define GRND_NONBLOCK 0x0001
75# endif /* defined(GRND_NONBLOCK) */
76#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
77
78#if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80# include <bsd/stdlib.h>
81#endif
82
83#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
85#endif
86
87#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
90 && !defined(_WIN32) \
91 && !defined(XML_POOR_ENTROPY)
92# error \
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
95 \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
105 \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
108 \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
111#endif
112
113
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000114#ifdef XML_UNICODE
115#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116#define XmlConvert XmlUtf16Convert
117#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700120/* Using pointer subtraction to convert to integer type. */
121#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000122typedef unsigned short ICHAR;
123#else
124#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125#define XmlConvert XmlUtf8Convert
126#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128#define XmlEncode XmlUtf8Encode
129#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
130typedef char ICHAR;
131#endif
132
133
134#ifndef XML_NS
135
136#define XmlInitEncodingNS XmlInitEncoding
137#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138#undef XmlGetInternalEncodingNS
139#define XmlGetInternalEncodingNS XmlGetInternalEncoding
140#define XmlParseXmlDeclNS XmlParseXmlDecl
141
142#endif
143
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000144#ifdef XML_UNICODE
145
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000146#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000147#define XML_T(x) (const wchar_t)x
148#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000149#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000150#define XML_T(x) (const unsigned short)x
151#define XML_L(x) x
152#endif
153
154#else
155
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000156#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000157#define XML_L(x) x
158
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000159#endif
160
161/* Round up n to be a multiple of sz, where sz is a power of 2. */
162#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
163
Benjamin Peterson5033aa72018-09-10 21:04:00 -0700164/* Do safe (NULL-aware) pointer arithmetic */
165#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
166
Fred Drake08317ae2003-10-21 15:38:55 +0000167/* Handle the case where memmove() doesn't exist. */
168#ifndef HAVE_MEMMOVE
169#ifdef HAVE_BCOPY
170#define memmove(d,s,l) bcopy((s),(d),(l))
171#else
172#error memmove does not exist on this platform, nor is a substitute available
173#endif /* HAVE_BCOPY */
174#endif /* HAVE_MEMMOVE */
175
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000176#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000177#include "xmltok.h"
178#include "xmlrole.h"
179
180typedef const XML_Char *KEY;
181
182typedef struct {
183 KEY name;
184} NAMED;
185
186typedef struct {
187 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000188 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000189 size_t size;
190 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000191 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000192} HASH_TABLE;
193
Victor Stinner5ff71322017-06-21 14:39:22 +0200194static size_t
195keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000196
Victor Stinner5ff71322017-06-21 14:39:22 +0200197static void
198copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
Fred Drake08317ae2003-10-21 15:38:55 +0000199
200/* For probing (after a collision) we need a step size relative prime
201 to the hash table size, which is a power of 2. We use double-hashing,
202 since we can calculate a second hash value cheaply by taking those bits
203 of the first hash value that were discarded (masked out) when the table
204 index was calculated: index = hash & mask, where mask = table->size - 1.
205 We limit the maximum step size to table->size / 4 (mask >> 2) and make
206 it odd, since odd numbers are always relative prime to a power of 2.
207*/
208#define SECOND_HASH(hash, mask, power) \
209 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
210#define PROBE_STEP(hash, mask, power) \
211 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
212
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000213typedef struct {
214 NAMED **p;
215 NAMED **end;
216} HASH_TABLE_ITER;
217
218#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
219#define INIT_DATA_BUF_SIZE 1024
220#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000221#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000222#define INIT_BLOCK_SIZE 1024
223#define INIT_BUFFER_SIZE 1024
224
225#define EXPAND_SPARE 24
226
227typedef struct binding {
228 struct prefix *prefix;
229 struct binding *nextTagBinding;
230 struct binding *prevPrefixBinding;
231 const struct attribute_id *attId;
232 XML_Char *uri;
233 int uriLen;
234 int uriAlloc;
235} BINDING;
236
237typedef struct prefix {
238 const XML_Char *name;
239 BINDING *binding;
240} PREFIX;
241
242typedef struct {
243 const XML_Char *str;
244 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000245 const XML_Char *prefix;
246 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000247 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000248 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000249} TAG_NAME;
250
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000251/* TAG represents an open element.
252 The name of the element is stored in both the document and API
253 encodings. The memory buffer 'buf' is a separately-allocated
254 memory area which stores the name. During the XML_Parse()/
255 XMLParseBuffer() when the element is open, the memory for the 'raw'
256 version of the name (in the document encoding) is shared with the
257 document buffer. If the element is open across calls to
258 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
259 contain the 'raw' name as well.
260
261 A parser re-uses these structures, maintaining a list of allocated
262 TAG objects in a free list.
263*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000264typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000265 struct tag *parent; /* parent of this element */
266 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000267 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000268 TAG_NAME name; /* tagName in the API encoding */
269 char *buf; /* buffer for name components */
270 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000271 BINDING *bindings;
272} TAG;
273
274typedef struct {
275 const XML_Char *name;
276 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000277 int textLen; /* length in XML_Chars */
278 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000279 const XML_Char *systemId;
280 const XML_Char *base;
281 const XML_Char *publicId;
282 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000283 XML_Bool open;
284 XML_Bool is_param;
285 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000286} ENTITY;
287
288typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000289 enum XML_Content_Type type;
290 enum XML_Content_Quant quant;
291 const XML_Char * name;
292 int firstchild;
293 int lastchild;
294 int childcnt;
295 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000296} CONTENT_SCAFFOLD;
297
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000298#define INIT_SCAFFOLD_ELEMENTS 32
299
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000300typedef struct block {
301 struct block *next;
302 int size;
303 XML_Char s[1];
304} BLOCK;
305
306typedef struct {
307 BLOCK *blocks;
308 BLOCK *freeBlocks;
309 const XML_Char *end;
310 XML_Char *ptr;
311 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000312 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000313} STRING_POOL;
314
315/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000316 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000317typedef struct attribute_id {
318 XML_Char *name;
319 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000320 XML_Bool maybeTokenized;
321 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000322} ATTRIBUTE_ID;
323
324typedef struct {
325 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000326 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000327 const XML_Char *value;
328} DEFAULT_ATTRIBUTE;
329
330typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000331 unsigned long version;
332 unsigned long hash;
333 const XML_Char *uriName;
334} NS_ATT;
335
336typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000337 const XML_Char *name;
338 PREFIX *prefix;
339 const ATTRIBUTE_ID *idAtt;
340 int nDefaultAtts;
341 int allocDefaultAtts;
342 DEFAULT_ATTRIBUTE *defaultAtts;
343} ELEMENT_TYPE;
344
345typedef struct {
346 HASH_TABLE generalEntities;
347 HASH_TABLE elementTypes;
348 HASH_TABLE attributeIds;
349 HASH_TABLE prefixes;
350 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000351 STRING_POOL entityValuePool;
352 /* false once a parameter entity reference has been skipped */
353 XML_Bool keepProcessing;
354 /* true once an internal or external PE reference has been encountered;
355 this includes the reference to an external subset */
356 XML_Bool hasParamEntityRefs;
357 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000358#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000359 /* indicates if external PE has been read */
360 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000361 HASH_TABLE paramEntities;
362#endif /* XML_DTD */
363 PREFIX defaultPrefix;
364 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000365 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000366 CONTENT_SCAFFOLD *scaffold;
367 unsigned contentStringLen;
368 unsigned scaffSize;
369 unsigned scaffCount;
370 int scaffLevel;
371 int *scaffIndex;
372} DTD;
373
374typedef struct open_internal_entity {
375 const char *internalEventPtr;
376 const char *internalEventEndPtr;
377 struct open_internal_entity *next;
378 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000379 int startTagLevel;
380 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000381} OPEN_INTERNAL_ENTITY;
382
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000383typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
384 const char *start,
385 const char *end,
386 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000387
388static Processor prologProcessor;
389static Processor prologInitProcessor;
390static Processor contentProcessor;
391static Processor cdataSectionProcessor;
392#ifdef XML_DTD
393static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000394static Processor externalParEntProcessor;
395static Processor externalParEntInitProcessor;
396static Processor entityValueProcessor;
397static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000398#endif /* XML_DTD */
399static Processor epilogProcessor;
400static Processor errorProcessor;
401static Processor externalEntityInitProcessor;
402static Processor externalEntityInitProcessor2;
403static Processor externalEntityInitProcessor3;
404static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000405static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000406
407static enum XML_Error
408handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
409static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000410processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000411 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000412static enum XML_Error
413initializeEncoding(XML_Parser parser);
414static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700415doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
416 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000417 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000418static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700419processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000420 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000421static enum XML_Error
422doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700423 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000424 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000425static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000426doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000427 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000428#ifdef XML_DTD
429static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000430doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000431 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000432#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000433
Victor Stinner5ff71322017-06-21 14:39:22 +0200434static void
435freeBindings(XML_Parser parser, BINDING *bindings);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000436static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000437storeAtts(XML_Parser parser, const ENCODING *, const char *s,
438 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000439static enum XML_Error
440addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
441 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000442static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700443defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000444 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000445static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000446storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
447 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000448static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000449appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
450 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000451static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000452getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
453 const char *end);
454static int
455setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000456static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000457storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
458 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000459static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000460reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
461 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000462static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000463reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
464 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000465static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
467 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000468
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000469static const XML_Char * getContext(XML_Parser parser);
470static XML_Bool
471setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000472
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000473static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000474
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000475static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
Benjamin Peterson4e211002018-06-26 19:25:45 -0700476/* do not call if m_parentParser != NULL */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000477static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
478static void
479dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
480static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700481dtdCopy(XML_Parser oldParser,
482 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000483static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700484copyEntityTable(XML_Parser oldParser,
485 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000486static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700487lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000488static void FASTCALL
489hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
490static void FASTCALL hashTableClear(HASH_TABLE *);
491static void FASTCALL hashTableDestroy(HASH_TABLE *);
492static void FASTCALL
493hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
494static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000495
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000496static void FASTCALL
497poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
498static void FASTCALL poolClear(STRING_POOL *);
499static void FASTCALL poolDestroy(STRING_POOL *);
500static XML_Char *
501poolAppend(STRING_POOL *pool, const ENCODING *enc,
502 const char *ptr, const char *end);
503static XML_Char *
504poolStoreString(STRING_POOL *pool, const ENCODING *enc,
505 const char *ptr, const char *end);
506static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
507static const XML_Char * FASTCALL
508poolCopyString(STRING_POOL *pool, const XML_Char *s);
509static const XML_Char *
510poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
511static const XML_Char * FASTCALL
512poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000513
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000514static int FASTCALL nextScaffoldPart(XML_Parser parser);
515static XML_Content * build_model(XML_Parser parser);
516static ELEMENT_TYPE *
517getElementType(XML_Parser parser, const ENCODING *enc,
518 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000519
Victor Stinner93d0cb52017-08-18 23:43:54 +0200520static XML_Char *copyString(const XML_Char *s,
521 const XML_Memory_Handling_Suite *memsuite);
522
Victor Stinner23ec4b52017-06-15 00:54:36 +0200523static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700524static XML_Bool startParsing(XML_Parser parser);
525
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000526static XML_Parser
527parserCreate(const XML_Char *encodingName,
528 const XML_Memory_Handling_Suite *memsuite,
529 const XML_Char *nameSep,
530 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700531
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000532static void
533parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000534
535#define poolStart(pool) ((pool)->start)
536#define poolEnd(pool) ((pool)->ptr)
537#define poolLength(pool) ((pool)->ptr - (pool)->start)
538#define poolChop(pool) ((void)--(pool->ptr))
539#define poolLastChar(pool) (((pool)->ptr)[-1])
540#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
541#define poolFinish(pool) ((pool)->start = (pool)->ptr)
542#define poolAppendChar(pool, c) \
543 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
544 ? 0 \
545 : ((*((pool)->ptr)++ = c), 1))
546
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000547struct XML_ParserStruct {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700548 /* The first member must be m_userData so that the XML_GetUserData
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000549 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000550 void *m_userData;
551 void *m_handlerArg;
552 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000553 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000554 /* first character to be parsed */
555 const char *m_bufferPtr;
556 /* past last character to be parsed */
557 char *m_bufferEnd;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700558 /* allocated end of m_buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000559 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000560 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000561 const char *m_parseEndPtr;
562 XML_Char *m_dataBuf;
563 XML_Char *m_dataBufEnd;
564 XML_StartElementHandler m_startElementHandler;
565 XML_EndElementHandler m_endElementHandler;
566 XML_CharacterDataHandler m_characterDataHandler;
567 XML_ProcessingInstructionHandler m_processingInstructionHandler;
568 XML_CommentHandler m_commentHandler;
569 XML_StartCdataSectionHandler m_startCdataSectionHandler;
570 XML_EndCdataSectionHandler m_endCdataSectionHandler;
571 XML_DefaultHandler m_defaultHandler;
572 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
573 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
574 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
575 XML_NotationDeclHandler m_notationDeclHandler;
576 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
577 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
578 XML_NotStandaloneHandler m_notStandaloneHandler;
579 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000580 XML_Parser m_externalEntityRefHandlerArg;
581 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000582 XML_UnknownEncodingHandler m_unknownEncodingHandler;
583 XML_ElementDeclHandler m_elementDeclHandler;
584 XML_AttlistDeclHandler m_attlistDeclHandler;
585 XML_EntityDeclHandler m_entityDeclHandler;
586 XML_XmlDeclHandler m_xmlDeclHandler;
587 const ENCODING *m_encoding;
588 INIT_ENCODING m_initEncoding;
589 const ENCODING *m_internalEncoding;
590 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000591 XML_Bool m_ns;
592 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000593 void *m_unknownEncodingMem;
594 void *m_unknownEncodingData;
595 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000596 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000597 PROLOG_STATE m_prologState;
598 Processor *m_processor;
599 enum XML_Error m_errorCode;
600 const char *m_eventPtr;
601 const char *m_eventEndPtr;
602 const char *m_positionPtr;
603 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000604 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000605 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000606 int m_tagLevel;
607 ENTITY *m_declEntity;
608 const XML_Char *m_doctypeName;
609 const XML_Char *m_doctypeSysid;
610 const XML_Char *m_doctypePubid;
611 const XML_Char *m_declAttributeType;
612 const XML_Char *m_declNotationName;
613 const XML_Char *m_declNotationPublicId;
614 ELEMENT_TYPE *m_declElementType;
615 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000616 XML_Bool m_declAttributeIsCdata;
617 XML_Bool m_declAttributeIsId;
618 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000619 const XML_Char *m_curBase;
620 TAG *m_tagStack;
621 TAG *m_freeTagList;
622 BINDING *m_inheritedBindings;
623 BINDING *m_freeBindingList;
624 int m_attsSize;
625 int m_nSpecifiedAtts;
626 int m_idAttIndex;
627 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000628 NS_ATT *m_nsAtts;
629 unsigned long m_nsAttsVersion;
630 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700631#ifdef XML_ATTR_INFO
632 XML_AttrInfo *m_attInfo;
633#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000634 POSITION m_position;
635 STRING_POOL m_tempPool;
636 STRING_POOL m_temp2Pool;
637 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000638 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000639 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000640 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000641 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000642#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000643 XML_Bool m_isParamEntity;
644 XML_Bool m_useForeignDTD;
645 enum XML_ParamEntityParsing m_paramEntityParsing;
646#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700647 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000648};
649
Benjamin Peterson4e211002018-06-26 19:25:45 -0700650#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
651#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p),(s)))
652#define FREE(parser, p) (parser->m_mem.free_fcn((p)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000653
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000654
Fred Drake08317ae2003-10-21 15:38:55 +0000655XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000656XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000657{
658 return XML_ParserCreate_MM(encodingName, NULL, NULL);
659}
660
Fred Drake08317ae2003-10-21 15:38:55 +0000661XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000662XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000663{
664 XML_Char tmp[2];
665 *tmp = nsSep;
666 return XML_ParserCreate_MM(encodingName, NULL, tmp);
667}
668
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000669static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700670 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
671 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
672 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
673 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
674 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
675 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000676};
677
Victor Stinner5ff71322017-06-21 14:39:22 +0200678
Benjamin Peterson4e211002018-06-26 19:25:45 -0700679/* To avoid warnings about unused functions: */
680#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
681
Victor Stinner5ff71322017-06-21 14:39:22 +0200682#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200683
684/* Obtain entropy on Linux 3.17+ */
685static int
Victor Stinner93d0cb52017-08-18 23:43:54 +0200686writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200687 int success = 0; /* full count bytes written? */
688 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200689 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200690
691 do {
692 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
693 const size_t bytesToWrite = count - bytesWrittenTotal;
694
695 const int bytesWrittenMore =
696#if defined(HAVE_GETRANDOM)
697 getrandom(currentTarget, bytesToWrite, getrandomFlags);
698#else
699 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
700#endif
701
702 if (bytesWrittenMore > 0) {
703 bytesWrittenTotal += bytesWrittenMore;
704 if (bytesWrittenTotal >= count)
705 success = 1;
706 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200707 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200708
709 return success;
710}
711
712#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
713
714
Victor Stinner93d0cb52017-08-18 23:43:54 +0200715#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
716
717/* Extract entropy from /dev/urandom */
718static int
719writeRandomBytes_dev_urandom(void * target, size_t count) {
720 int success = 0; /* full count bytes written? */
721 size_t bytesWrittenTotal = 0;
722
723 const int fd = open("/dev/urandom", O_RDONLY);
724 if (fd < 0) {
725 return 0;
726 }
727
728 do {
729 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
730 const size_t bytesToWrite = count - bytesWrittenTotal;
731
732 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
733
734 if (bytesWrittenMore > 0) {
735 bytesWrittenTotal += bytesWrittenMore;
736 if (bytesWrittenTotal >= count)
737 success = 1;
738 }
739 } while (! success && (errno == EINTR));
740
741 close(fd);
742 return success;
743}
744
745#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
746
Benjamin Peterson4e211002018-06-26 19:25:45 -0700747#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
748
Victor Stinner93d0cb52017-08-18 23:43:54 +0200749
750#if defined(HAVE_ARC4RANDOM)
751
752static void
753writeRandomBytes_arc4random(void * target, size_t count) {
754 size_t bytesWrittenTotal = 0;
755
756 while (bytesWrittenTotal < count) {
757 const uint32_t random32 = arc4random();
758 size_t i = 0;
759
760 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
761 i++, bytesWrittenTotal++) {
762 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
763 ((uint8_t *)target)[bytesWrittenTotal] = random8;
764 }
765 }
766}
767
768#endif /* defined(HAVE_ARC4RANDOM) */
769
770
Victor Stinner5ff71322017-06-21 14:39:22 +0200771#ifdef _WIN32
772
773typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200774HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
Victor Stinner5ff71322017-06-21 14:39:22 +0200775
776/* Obtain entropy on Windows XP / Windows Server 2003 and later.
Victor Stinner93d0cb52017-08-18 23:43:54 +0200777 * Hint on RtlGenRandom and the following article from libsodium.
Victor Stinner5ff71322017-06-21 14:39:22 +0200778 *
779 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
780 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
781 */
782static int
783writeRandomBytes_RtlGenRandom(void * target, size_t count) {
784 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200785 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
Victor Stinner5ff71322017-06-21 14:39:22 +0200786
787 if (advapi32) {
788 const RTLGENRANDOM_FUNC RtlGenRandom
789 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
790 if (RtlGenRandom) {
791 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
792 success = 1;
793 }
794 }
795 FreeLibrary(advapi32);
796 }
797
798 return success;
799}
800
801#endif /* _WIN32 */
802
803
Victor Stinner93d0cb52017-08-18 23:43:54 +0200804#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
805
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700806static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200807gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000808{
Victor Stinner5ff71322017-06-21 14:39:22 +0200809#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200810 FILETIME ft;
811 GetSystemTimeAsFileTime(&ft); /* never fails */
812 return ft.dwHighDateTime ^ ft.dwLowDateTime;
813#else
814 struct timeval tv;
815 int gettimeofday_res;
816
817 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200818
819#if defined(NDEBUG)
820 (void)gettimeofday_res;
821#else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200822 assert (gettimeofday_res == 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200823#endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200824
825 /* Microseconds time is <20 bits entropy */
826 return tv.tv_usec;
827#endif
828}
829
Victor Stinner93d0cb52017-08-18 23:43:54 +0200830#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
831
Victor Stinner5ff71322017-06-21 14:39:22 +0200832
833static unsigned long
834ENTROPY_DEBUG(const char * label, unsigned long entropy) {
835 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
836 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
837 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
838 label,
839 (int)sizeof(entropy) * 2, entropy,
840 (unsigned long)sizeof(entropy));
841 }
842 return entropy;
843}
844
Victor Stinner23ec4b52017-06-15 00:54:36 +0200845static unsigned long
846generate_hash_secret_salt(XML_Parser parser)
847{
Victor Stinner5ff71322017-06-21 14:39:22 +0200848 unsigned long entropy;
849 (void)parser;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700850
851 /* "Failproof" high quality providers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200852#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200853 arc4random_buf(&entropy, sizeof(entropy));
854 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200855#elif defined(HAVE_ARC4RANDOM)
856 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
857 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200858#else
859 /* Try high quality providers first .. */
860#ifdef _WIN32
861 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
862 return ENTROPY_DEBUG("RtlGenRandom", entropy);
863 }
864#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200865 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200866 return ENTROPY_DEBUG("getrandom", entropy);
867 }
868#endif
Victor Stinner93d0cb52017-08-18 23:43:54 +0200869#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
870 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
871 return ENTROPY_DEBUG("/dev/urandom", entropy);
872 }
873#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200874 /* .. and self-made low quality for backup: */
875
876 /* Process ID is 0 bits entropy if attacker has local access */
877 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200878
879 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
880 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200881 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200882 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200883 return ENTROPY_DEBUG("fallback(8)",
Victor Stinner93d0cb52017-08-18 23:43:54 +0200884 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200885 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200886#endif
887}
888
889static unsigned long
890get_hash_secret_salt(XML_Parser parser) {
891 if (parser->m_parentParser != NULL)
892 return get_hash_secret_salt(parser->m_parentParser);
893 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700894}
895
896static XML_Bool /* only valid for root parser */
897startParsing(XML_Parser parser)
898{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700899 /* hash functions must be initialized before setContext() is called */
Benjamin Peterson4e211002018-06-26 19:25:45 -0700900 if (parser->m_hash_secret_salt == 0)
901 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
902 if (parser->m_ns) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700903 /* implicit context only set for root parser, since child
904 parsers (i.e. external entity parsers) will inherit it
905 */
906 return setContext(parser, implicitContext);
907 }
908 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700909}
910
911XML_Parser XMLCALL
912XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700913 const XML_Memory_Handling_Suite *memsuite,
914 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700915{
916 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000917}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000918
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000919static XML_Parser
920parserCreate(const XML_Char *encodingName,
921 const XML_Memory_Handling_Suite *memsuite,
922 const XML_Char *nameSep,
923 DTD *dtd)
924{
925 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000926
927 if (memsuite) {
928 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000929 parser = (XML_Parser)
930 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
931 if (parser != NULL) {
932 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
933 mtemp->malloc_fcn = memsuite->malloc_fcn;
934 mtemp->realloc_fcn = memsuite->realloc_fcn;
935 mtemp->free_fcn = memsuite->free_fcn;
936 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000937 }
938 else {
939 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000940 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
941 if (parser != NULL) {
942 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
943 mtemp->malloc_fcn = malloc;
944 mtemp->realloc_fcn = realloc;
945 mtemp->free_fcn = free;
946 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000947 }
948
949 if (!parser)
950 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000951
Benjamin Peterson4e211002018-06-26 19:25:45 -0700952 parser->m_buffer = NULL;
953 parser->m_bufferLim = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000954
Benjamin Peterson4e211002018-06-26 19:25:45 -0700955 parser->m_attsSize = INIT_ATTS_SIZE;
956 parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
957 if (parser->m_atts == NULL) {
958 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000959 return NULL;
960 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700961#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700962 parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
963 if (parser->m_attInfo == NULL) {
964 FREE(parser, parser->m_atts);
965 FREE(parser, parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700966 return NULL;
967 }
968#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700969 parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
970 if (parser->m_dataBuf == NULL) {
971 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700972#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700973 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700974#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700975 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000976 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000977 }
Benjamin Peterson4e211002018-06-26 19:25:45 -0700978 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000979
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000980 if (dtd)
Benjamin Peterson4e211002018-06-26 19:25:45 -0700981 parser->m_dtd = dtd;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000982 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700983 parser->m_dtd = dtdCreate(&parser->m_mem);
984 if (parser->m_dtd == NULL) {
985 FREE(parser, parser->m_dataBuf);
986 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700987#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700988 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700989#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700990 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000991 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000992 }
993 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000994
Benjamin Peterson4e211002018-06-26 19:25:45 -0700995 parser->m_freeBindingList = NULL;
996 parser->m_freeTagList = NULL;
997 parser->m_freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000998
Benjamin Peterson4e211002018-06-26 19:25:45 -0700999 parser->m_groupSize = 0;
1000 parser->m_groupConnector = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001001
Benjamin Peterson4e211002018-06-26 19:25:45 -07001002 parser->m_unknownEncodingHandler = NULL;
1003 parser->m_unknownEncodingHandlerData = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001004
Benjamin Peterson4e211002018-06-26 19:25:45 -07001005 parser->m_namespaceSeparator = ASCII_EXCL;
1006 parser->m_ns = XML_FALSE;
1007 parser->m_ns_triplets = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001008
Benjamin Peterson4e211002018-06-26 19:25:45 -07001009 parser->m_nsAtts = NULL;
1010 parser->m_nsAttsVersion = 0;
1011 parser->m_nsAttsPower = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00001012
Benjamin Peterson4e211002018-06-26 19:25:45 -07001013 parser->m_protocolEncodingName = NULL;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001014
Benjamin Peterson4e211002018-06-26 19:25:45 -07001015 poolInit(&parser->m_tempPool, &(parser->m_mem));
1016 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001017 parserInit(parser, encodingName);
1018
Benjamin Peterson4e211002018-06-26 19:25:45 -07001019 if (encodingName && !parser->m_protocolEncodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001020 XML_ParserFree(parser);
1021 return NULL;
1022 }
1023
1024 if (nameSep) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001025 parser->m_ns = XML_TRUE;
1026 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1027 parser->m_namespaceSeparator = *nameSep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001028 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001029 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001030 parser->m_internalEncoding = XmlGetInternalEncoding();
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001031 }
1032
1033 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001034}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001035
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001036static void
1037parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001038{
Benjamin Peterson4e211002018-06-26 19:25:45 -07001039 parser->m_processor = prologInitProcessor;
1040 XmlPrologStateInit(&parser->m_prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001041 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001042 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Victor Stinner93d0cb52017-08-18 23:43:54 +02001043 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001044 parser->m_curBase = NULL;
1045 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1046 parser->m_userData = NULL;
1047 parser->m_handlerArg = NULL;
1048 parser->m_startElementHandler = NULL;
1049 parser->m_endElementHandler = NULL;
1050 parser->m_characterDataHandler = NULL;
1051 parser->m_processingInstructionHandler = NULL;
1052 parser->m_commentHandler = NULL;
1053 parser->m_startCdataSectionHandler = NULL;
1054 parser->m_endCdataSectionHandler = NULL;
1055 parser->m_defaultHandler = NULL;
1056 parser->m_startDoctypeDeclHandler = NULL;
1057 parser->m_endDoctypeDeclHandler = NULL;
1058 parser->m_unparsedEntityDeclHandler = NULL;
1059 parser->m_notationDeclHandler = NULL;
1060 parser->m_startNamespaceDeclHandler = NULL;
1061 parser->m_endNamespaceDeclHandler = NULL;
1062 parser->m_notStandaloneHandler = NULL;
1063 parser->m_externalEntityRefHandler = NULL;
1064 parser->m_externalEntityRefHandlerArg = parser;
1065 parser->m_skippedEntityHandler = NULL;
1066 parser->m_elementDeclHandler = NULL;
1067 parser->m_attlistDeclHandler = NULL;
1068 parser->m_entityDeclHandler = NULL;
1069 parser->m_xmlDeclHandler = NULL;
1070 parser->m_bufferPtr = parser->m_buffer;
1071 parser->m_bufferEnd = parser->m_buffer;
1072 parser->m_parseEndByteIndex = 0;
1073 parser->m_parseEndPtr = NULL;
1074 parser->m_declElementType = NULL;
1075 parser->m_declAttributeId = NULL;
1076 parser->m_declEntity = NULL;
1077 parser->m_doctypeName = NULL;
1078 parser->m_doctypeSysid = NULL;
1079 parser->m_doctypePubid = NULL;
1080 parser->m_declAttributeType = NULL;
1081 parser->m_declNotationName = NULL;
1082 parser->m_declNotationPublicId = NULL;
1083 parser->m_declAttributeIsCdata = XML_FALSE;
1084 parser->m_declAttributeIsId = XML_FALSE;
1085 memset(&parser->m_position, 0, sizeof(POSITION));
1086 parser->m_errorCode = XML_ERROR_NONE;
1087 parser->m_eventPtr = NULL;
1088 parser->m_eventEndPtr = NULL;
1089 parser->m_positionPtr = NULL;
1090 parser->m_openInternalEntities = NULL;
1091 parser->m_defaultExpandInternalEntities = XML_TRUE;
1092 parser->m_tagLevel = 0;
1093 parser->m_tagStack = NULL;
1094 parser->m_inheritedBindings = NULL;
1095 parser->m_nSpecifiedAtts = 0;
1096 parser->m_unknownEncodingMem = NULL;
1097 parser->m_unknownEncodingRelease = NULL;
1098 parser->m_unknownEncodingData = NULL;
1099 parser->m_parentParser = NULL;
1100 parser->m_parsingStatus.parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001101#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001102 parser->m_isParamEntity = XML_FALSE;
1103 parser->m_useForeignDTD = XML_FALSE;
1104 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001105#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001106 parser->m_hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001107}
1108
Benjamin Peterson4e211002018-06-26 19:25:45 -07001109/* moves list of bindings to m_freeBindingList */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001110static void FASTCALL
1111moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1112{
1113 while (bindings) {
1114 BINDING *b = bindings;
1115 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001116 b->nextTagBinding = parser->m_freeBindingList;
1117 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001118 }
1119}
1120
Fred Drake08317ae2003-10-21 15:38:55 +00001121XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001122XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1123{
1124 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001125 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001126
1127 if (parser == NULL)
1128 return XML_FALSE;
1129
Benjamin Peterson4e211002018-06-26 19:25:45 -07001130 if (parser->m_parentParser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001131 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001132 /* move m_tagStack to m_freeTagList */
1133 tStk = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001134 while (tStk) {
1135 TAG *tag = tStk;
1136 tStk = tStk->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001137 tag->parent = parser->m_freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001138 moveToFreeBindingList(parser, tag->bindings);
1139 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001140 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001141 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001142 /* move m_openInternalEntities to m_freeInternalEntities */
1143 openEntityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001144 while (openEntityList) {
1145 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1146 openEntityList = openEntity->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001147 openEntity->next = parser->m_freeInternalEntities;
1148 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00001149 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001150 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1151 FREE(parser, parser->m_unknownEncodingMem);
1152 if (parser->m_unknownEncodingRelease)
1153 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1154 poolClear(&parser->m_tempPool);
1155 poolClear(&parser->m_temp2Pool);
1156 FREE(parser, (void *)parser->m_protocolEncodingName);
1157 parser->m_protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001158 parserInit(parser, encodingName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001159 dtdReset(parser->m_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001160 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001161}
1162
Fred Drake08317ae2003-10-21 15:38:55 +00001163enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001164XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1165{
Victor Stinner5ff71322017-06-21 14:39:22 +02001166 if (parser == NULL)
1167 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001168 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1169 XXX There's no way for the caller to determine which of the
1170 XXX possible error cases caused the XML_STATUS_ERROR return.
1171 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001172 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001173 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001174
1175 /* Get rid of any previous encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001176 FREE(parser, (void *)parser->m_protocolEncodingName);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001177
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001178 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001179 /* No new encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001180 parser->m_protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001181 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001182 /* Copy the new encoding name into allocated memory */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001183 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1184 if (!parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001185 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001186 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001187 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001188}
1189
Fred Drake08317ae2003-10-21 15:38:55 +00001190XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001191XML_ExternalEntityParserCreate(XML_Parser oldParser,
1192 const XML_Char *context,
1193 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001194{
1195 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001196 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001197 DTD *oldDtd;
1198 XML_StartElementHandler oldStartElementHandler;
1199 XML_EndElementHandler oldEndElementHandler;
1200 XML_CharacterDataHandler oldCharacterDataHandler;
1201 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1202 XML_CommentHandler oldCommentHandler;
1203 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1204 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1205 XML_DefaultHandler oldDefaultHandler;
1206 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1207 XML_NotationDeclHandler oldNotationDeclHandler;
1208 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1209 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1210 XML_NotStandaloneHandler oldNotStandaloneHandler;
1211 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1212 XML_SkippedEntityHandler oldSkippedEntityHandler;
1213 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1214 XML_ElementDeclHandler oldElementDeclHandler;
1215 XML_AttlistDeclHandler oldAttlistDeclHandler;
1216 XML_EntityDeclHandler oldEntityDeclHandler;
1217 XML_XmlDeclHandler oldXmlDeclHandler;
1218 ELEMENT_TYPE * oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001219
Victor Stinner5ff71322017-06-21 14:39:22 +02001220 void *oldUserData;
1221 void *oldHandlerArg;
1222 XML_Bool oldDefaultExpandInternalEntities;
1223 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001224#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001225 enum XML_ParamEntityParsing oldParamEntityParsing;
1226 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001227#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001228 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001229 /* Note that the new parser shares the same hash secret as the old
1230 parser, so that dtdCopy and copyEntityTable can lookup values
1231 from hash tables associated with either parser without us having
1232 to worry which hash secrets each table has.
1233 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001234 unsigned long oldhash_secret_salt;
1235
1236 /* Validate the oldParser parameter before we pull everything out of it */
1237 if (oldParser == NULL)
1238 return NULL;
1239
1240 /* Stash the original parser contents on the stack */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001241 oldDtd = parser->m_dtd;
1242 oldStartElementHandler = parser->m_startElementHandler;
1243 oldEndElementHandler = parser->m_endElementHandler;
1244 oldCharacterDataHandler = parser->m_characterDataHandler;
1245 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1246 oldCommentHandler = parser->m_commentHandler;
1247 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1248 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1249 oldDefaultHandler = parser->m_defaultHandler;
1250 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1251 oldNotationDeclHandler = parser->m_notationDeclHandler;
1252 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1253 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1254 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1255 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1256 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1257 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1258 oldElementDeclHandler = parser->m_elementDeclHandler;
1259 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1260 oldEntityDeclHandler = parser->m_entityDeclHandler;
1261 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1262 oldDeclElementType = parser->m_declElementType;
Victor Stinner5ff71322017-06-21 14:39:22 +02001263
Benjamin Peterson4e211002018-06-26 19:25:45 -07001264 oldUserData = parser->m_userData;
1265 oldHandlerArg = parser->m_handlerArg;
1266 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1267 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
Victor Stinner5ff71322017-06-21 14:39:22 +02001268#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001269 oldParamEntityParsing = parser->m_paramEntityParsing;
1270 oldInEntityValue = parser->m_prologState.inEntityValue;
Victor Stinner5ff71322017-06-21 14:39:22 +02001271#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001272 oldns_triplets = parser->m_ns_triplets;
Victor Stinner5ff71322017-06-21 14:39:22 +02001273 /* Note that the new parser shares the same hash secret as the old
1274 parser, so that dtdCopy and copyEntityTable can lookup values
1275 from hash tables associated with either parser without us having
1276 to worry which hash secrets each table has.
1277 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001278 oldhash_secret_salt = parser->m_hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001279
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001280#ifdef XML_DTD
1281 if (!context)
1282 newDtd = oldDtd;
1283#endif /* XML_DTD */
1284
1285 /* Note that the magical uses of the pre-processor to make field
1286 access look more like C++ require that `parser' be overwritten
1287 here. This makes this function more painful to follow than it
1288 would be otherwise.
1289 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001290 if (parser->m_ns) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001291 XML_Char tmp[2];
Benjamin Peterson4e211002018-06-26 19:25:45 -07001292 *tmp = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001293 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001294 }
1295 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001296 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001297 }
1298
1299 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001300 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001301
Benjamin Peterson4e211002018-06-26 19:25:45 -07001302 parser->m_startElementHandler = oldStartElementHandler;
1303 parser->m_endElementHandler = oldEndElementHandler;
1304 parser->m_characterDataHandler = oldCharacterDataHandler;
1305 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1306 parser->m_commentHandler = oldCommentHandler;
1307 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1308 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1309 parser->m_defaultHandler = oldDefaultHandler;
1310 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1311 parser->m_notationDeclHandler = oldNotationDeclHandler;
1312 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1313 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1314 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1315 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1316 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1317 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1318 parser->m_elementDeclHandler = oldElementDeclHandler;
1319 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1320 parser->m_entityDeclHandler = oldEntityDeclHandler;
1321 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1322 parser->m_declElementType = oldDeclElementType;
1323 parser->m_userData = oldUserData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001324 if (oldUserData == oldHandlerArg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001325 parser->m_handlerArg = parser->m_userData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001326 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001327 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001328 if (oldExternalEntityRefHandlerArg != oldParser)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001329 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1330 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1331 parser->m_ns_triplets = oldns_triplets;
1332 parser->m_hash_secret_salt = oldhash_secret_salt;
1333 parser->m_parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001334#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001335 parser->m_paramEntityParsing = oldParamEntityParsing;
1336 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001337 if (context) {
1338#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001339 if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001340 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001341 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001342 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001343 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001344 parser->m_processor = externalEntityInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001345#ifdef XML_DTD
1346 }
1347 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001348 /* The DTD instance referenced by parser->m_dtd is shared between the document's
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001349 root parser and external PE parsers, therefore one does not need to
1350 call setContext. In addition, one also *must* not call setContext,
1351 because this would overwrite existing prefix->binding pointers in
Benjamin Peterson4e211002018-06-26 19:25:45 -07001352 parser->m_dtd with ones that get destroyed with the external PE parser.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001353 This would leave those prefixes with dangling pointers.
1354 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001355 parser->m_isParamEntity = XML_TRUE;
1356 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1357 parser->m_processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001358 }
1359#endif /* XML_DTD */
1360 return parser;
1361}
1362
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001363static void FASTCALL
1364destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001365{
1366 for (;;) {
1367 BINDING *b = bindings;
1368 if (!b)
1369 break;
1370 bindings = b->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001371 FREE(parser, b->uri);
1372 FREE(parser, b);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001373 }
1374}
1375
Fred Drake08317ae2003-10-21 15:38:55 +00001376void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001377XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001378{
Fred Drake31d485c2004-08-03 07:06:22 +00001379 TAG *tagList;
1380 OPEN_INTERNAL_ENTITY *entityList;
1381 if (parser == NULL)
1382 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001383 /* free m_tagStack and m_freeTagList */
1384 tagList = parser->m_tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001385 for (;;) {
1386 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001387 if (tagList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001388 if (parser->m_freeTagList == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001389 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001390 tagList = parser->m_freeTagList;
1391 parser->m_freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001392 }
Fred Drake31d485c2004-08-03 07:06:22 +00001393 p = tagList;
1394 tagList = tagList->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001395 FREE(parser, p->buf);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001396 destroyBindings(p->bindings, parser);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001397 FREE(parser, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001398 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001399 /* free m_openInternalEntities and m_freeInternalEntities */
1400 entityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001401 for (;;) {
1402 OPEN_INTERNAL_ENTITY *openEntity;
1403 if (entityList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001404 if (parser->m_freeInternalEntities == NULL)
Fred Drake31d485c2004-08-03 07:06:22 +00001405 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001406 entityList = parser->m_freeInternalEntities;
1407 parser->m_freeInternalEntities = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001408 }
1409 openEntity = entityList;
1410 entityList = entityList->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001411 FREE(parser, openEntity);
Fred Drake31d485c2004-08-03 07:06:22 +00001412 }
1413
Benjamin Peterson4e211002018-06-26 19:25:45 -07001414 destroyBindings(parser->m_freeBindingList, parser);
1415 destroyBindings(parser->m_inheritedBindings, parser);
1416 poolDestroy(&parser->m_tempPool);
1417 poolDestroy(&parser->m_temp2Pool);
1418 FREE(parser, (void *)parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001419#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001420 /* external parameter entity parsers share the DTD structure
1421 parser->m_dtd with the root parser, so we must not destroy it
1422 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001423 if (!parser->m_isParamEntity && parser->m_dtd)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001424#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001425 if (parser->m_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001426#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001427 dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
1428 FREE(parser, (void *)parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001429#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001430 FREE(parser, (void *)parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001431#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001432 FREE(parser, parser->m_groupConnector);
1433 FREE(parser, parser->m_buffer);
1434 FREE(parser, parser->m_dataBuf);
1435 FREE(parser, parser->m_nsAtts);
1436 FREE(parser, parser->m_unknownEncodingMem);
1437 if (parser->m_unknownEncodingRelease)
1438 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1439 FREE(parser, parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001440}
1441
Fred Drake08317ae2003-10-21 15:38:55 +00001442void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001443XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001444{
Victor Stinner5ff71322017-06-21 14:39:22 +02001445 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001446 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001447}
1448
Fred Drake08317ae2003-10-21 15:38:55 +00001449enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001450XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1451{
Victor Stinner5ff71322017-06-21 14:39:22 +02001452 if (parser == NULL)
1453 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001454#ifdef XML_DTD
1455 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001456 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001457 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001458 parser->m_useForeignDTD = useDTD;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001459 return XML_ERROR_NONE;
1460#else
1461 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1462#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001463}
1464
Fred Drake08317ae2003-10-21 15:38:55 +00001465void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001466XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1467{
Victor Stinner5ff71322017-06-21 14:39:22 +02001468 if (parser == NULL)
1469 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001470 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001471 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001472 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001473 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001474}
1475
Fred Drake08317ae2003-10-21 15:38:55 +00001476void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001477XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001478{
Victor Stinner5ff71322017-06-21 14:39:22 +02001479 if (parser == NULL)
1480 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001481 if (parser->m_handlerArg == parser->m_userData)
1482 parser->m_handlerArg = parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001483 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001484 parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001485}
1486
Fred Drake08317ae2003-10-21 15:38:55 +00001487enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001488XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001489{
Victor Stinner5ff71322017-06-21 14:39:22 +02001490 if (parser == NULL)
1491 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001492 if (p) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001493 p = poolCopyString(&parser->m_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001494 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001495 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001496 parser->m_curBase = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001497 }
1498 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001499 parser->m_curBase = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001500 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001501}
1502
Fred Drake08317ae2003-10-21 15:38:55 +00001503const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001504XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001505{
Victor Stinner5ff71322017-06-21 14:39:22 +02001506 if (parser == NULL)
1507 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001508 return parser->m_curBase;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001509}
1510
Fred Drake08317ae2003-10-21 15:38:55 +00001511int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001512XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001513{
Victor Stinner5ff71322017-06-21 14:39:22 +02001514 if (parser == NULL)
1515 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001516 return parser->m_nSpecifiedAtts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001517}
1518
Fred Drake08317ae2003-10-21 15:38:55 +00001519int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001520XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001521{
Victor Stinner5ff71322017-06-21 14:39:22 +02001522 if (parser == NULL)
1523 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001524 return parser->m_idAttIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001525}
1526
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001527#ifdef XML_ATTR_INFO
1528const XML_AttrInfo * XMLCALL
1529XML_GetAttributeInfo(XML_Parser parser)
1530{
Victor Stinner5ff71322017-06-21 14:39:22 +02001531 if (parser == NULL)
1532 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001533 return parser->m_attInfo;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001534}
1535#endif
1536
Fred Drake08317ae2003-10-21 15:38:55 +00001537void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001538XML_SetElementHandler(XML_Parser parser,
1539 XML_StartElementHandler start,
1540 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001541{
Victor Stinner5ff71322017-06-21 14:39:22 +02001542 if (parser == NULL)
1543 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001544 parser->m_startElementHandler = start;
1545 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001546}
1547
Fred Drake08317ae2003-10-21 15:38:55 +00001548void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001549XML_SetStartElementHandler(XML_Parser parser,
1550 XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001551 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001552 parser->m_startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001553}
1554
Fred Drake08317ae2003-10-21 15:38:55 +00001555void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001556XML_SetEndElementHandler(XML_Parser parser,
1557 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001558 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001559 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001560}
1561
Fred Drake08317ae2003-10-21 15:38:55 +00001562void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001563XML_SetCharacterDataHandler(XML_Parser parser,
1564 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001565{
Victor Stinner5ff71322017-06-21 14:39:22 +02001566 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001567 parser->m_characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001568}
1569
Fred Drake08317ae2003-10-21 15:38:55 +00001570void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001571XML_SetProcessingInstructionHandler(XML_Parser parser,
1572 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001573{
Victor Stinner5ff71322017-06-21 14:39:22 +02001574 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001575 parser->m_processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001576}
1577
Fred Drake08317ae2003-10-21 15:38:55 +00001578void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001579XML_SetCommentHandler(XML_Parser parser,
1580 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001581{
Victor Stinner5ff71322017-06-21 14:39:22 +02001582 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001583 parser->m_commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001584}
1585
Fred Drake08317ae2003-10-21 15:38:55 +00001586void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001587XML_SetCdataSectionHandler(XML_Parser parser,
1588 XML_StartCdataSectionHandler start,
1589 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001590{
Victor Stinner5ff71322017-06-21 14:39:22 +02001591 if (parser == NULL)
1592 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001593 parser->m_startCdataSectionHandler = start;
1594 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001595}
1596
Fred Drake08317ae2003-10-21 15:38:55 +00001597void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001598XML_SetStartCdataSectionHandler(XML_Parser parser,
1599 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001600 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001601 parser->m_startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001602}
1603
Fred Drake08317ae2003-10-21 15:38:55 +00001604void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001605XML_SetEndCdataSectionHandler(XML_Parser parser,
1606 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001607 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001608 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001609}
1610
Fred Drake08317ae2003-10-21 15:38:55 +00001611void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001612XML_SetDefaultHandler(XML_Parser parser,
1613 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001614{
Victor Stinner5ff71322017-06-21 14:39:22 +02001615 if (parser == NULL)
1616 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001617 parser->m_defaultHandler = handler;
1618 parser->m_defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001619}
1620
Fred Drake08317ae2003-10-21 15:38:55 +00001621void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001622XML_SetDefaultHandlerExpand(XML_Parser parser,
1623 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001624{
Victor Stinner5ff71322017-06-21 14:39:22 +02001625 if (parser == NULL)
1626 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001627 parser->m_defaultHandler = handler;
1628 parser->m_defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001629}
1630
Fred Drake08317ae2003-10-21 15:38:55 +00001631void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001632XML_SetDoctypeDeclHandler(XML_Parser parser,
1633 XML_StartDoctypeDeclHandler start,
1634 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001635{
Victor Stinner5ff71322017-06-21 14:39:22 +02001636 if (parser == NULL)
1637 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001638 parser->m_startDoctypeDeclHandler = start;
1639 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001640}
1641
Fred Drake08317ae2003-10-21 15:38:55 +00001642void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001643XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1644 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001645 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001646 parser->m_startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001647}
1648
Fred Drake08317ae2003-10-21 15:38:55 +00001649void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001650XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1651 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001652 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001653 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001654}
1655
Fred Drake08317ae2003-10-21 15:38:55 +00001656void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001657XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1658 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001659{
Victor Stinner5ff71322017-06-21 14:39:22 +02001660 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001661 parser->m_unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001662}
1663
Fred Drake08317ae2003-10-21 15:38:55 +00001664void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001665XML_SetNotationDeclHandler(XML_Parser parser,
1666 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001667{
Victor Stinner5ff71322017-06-21 14:39:22 +02001668 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001669 parser->m_notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001670}
1671
Fred Drake08317ae2003-10-21 15:38:55 +00001672void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001673XML_SetNamespaceDeclHandler(XML_Parser parser,
1674 XML_StartNamespaceDeclHandler start,
1675 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001676{
Victor Stinner5ff71322017-06-21 14:39:22 +02001677 if (parser == NULL)
1678 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001679 parser->m_startNamespaceDeclHandler = start;
1680 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001681}
1682
Fred Drake08317ae2003-10-21 15:38:55 +00001683void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001684XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1685 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001686 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001687 parser->m_startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001688}
1689
Fred Drake08317ae2003-10-21 15:38:55 +00001690void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001691XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1692 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001693 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001694 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001695}
1696
Fred Drake08317ae2003-10-21 15:38:55 +00001697void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001698XML_SetNotStandaloneHandler(XML_Parser parser,
1699 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001700{
Victor Stinner5ff71322017-06-21 14:39:22 +02001701 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001702 parser->m_notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001703}
1704
Fred Drake08317ae2003-10-21 15:38:55 +00001705void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001706XML_SetExternalEntityRefHandler(XML_Parser parser,
1707 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001708{
Victor Stinner5ff71322017-06-21 14:39:22 +02001709 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001710 parser->m_externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001711}
1712
Fred Drake08317ae2003-10-21 15:38:55 +00001713void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001714XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001715{
Victor Stinner5ff71322017-06-21 14:39:22 +02001716 if (parser == NULL)
1717 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001718 if (arg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001719 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001720 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001721 parser->m_externalEntityRefHandlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001722}
1723
Fred Drake08317ae2003-10-21 15:38:55 +00001724void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001725XML_SetSkippedEntityHandler(XML_Parser parser,
1726 XML_SkippedEntityHandler handler)
1727{
Victor Stinner5ff71322017-06-21 14:39:22 +02001728 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001729 parser->m_skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001730}
1731
Fred Drake08317ae2003-10-21 15:38:55 +00001732void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001733XML_SetUnknownEncodingHandler(XML_Parser parser,
1734 XML_UnknownEncodingHandler handler,
1735 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001736{
Victor Stinner5ff71322017-06-21 14:39:22 +02001737 if (parser == NULL)
1738 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001739 parser->m_unknownEncodingHandler = handler;
1740 parser->m_unknownEncodingHandlerData = data;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001741}
1742
Fred Drake08317ae2003-10-21 15:38:55 +00001743void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001744XML_SetElementDeclHandler(XML_Parser parser,
1745 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001746{
Victor Stinner5ff71322017-06-21 14:39:22 +02001747 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001748 parser->m_elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001749}
1750
Fred Drake08317ae2003-10-21 15:38:55 +00001751void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001752XML_SetAttlistDeclHandler(XML_Parser parser,
1753 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001754{
Victor Stinner5ff71322017-06-21 14:39:22 +02001755 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001756 parser->m_attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001757}
1758
Fred Drake08317ae2003-10-21 15:38:55 +00001759void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001760XML_SetEntityDeclHandler(XML_Parser parser,
1761 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001762{
Victor Stinner5ff71322017-06-21 14:39:22 +02001763 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001764 parser->m_entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001765}
1766
Fred Drake08317ae2003-10-21 15:38:55 +00001767void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001768XML_SetXmlDeclHandler(XML_Parser parser,
1769 XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001770 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001771 parser->m_xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001772}
1773
Fred Drake08317ae2003-10-21 15:38:55 +00001774int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001775XML_SetParamEntityParsing(XML_Parser parser,
1776 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001777{
Victor Stinner5ff71322017-06-21 14:39:22 +02001778 if (parser == NULL)
1779 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001780 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001781 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001782 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001783#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001784 parser->m_paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001785 return 1;
1786#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001787 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001788#endif
1789}
1790
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001791int XMLCALL
1792XML_SetHashSalt(XML_Parser parser,
1793 unsigned long hash_salt)
1794{
Victor Stinner5ff71322017-06-21 14:39:22 +02001795 if (parser == NULL)
1796 return 0;
1797 if (parser->m_parentParser)
1798 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001799 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001800 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001801 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001802 parser->m_hash_secret_salt = hash_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001803 return 1;
1804}
1805
Fred Drake08317ae2003-10-21 15:38:55 +00001806enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001807XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001808{
Victor Stinner5ff71322017-06-21 14:39:22 +02001809 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001810 if (parser != NULL)
1811 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001812 return XML_STATUS_ERROR;
1813 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001814 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001815 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001816 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001817 return XML_STATUS_ERROR;
1818 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001819 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001820 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001821 case XML_INITIALIZED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001822 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1823 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001824 return XML_STATUS_ERROR;
1825 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001826 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001827 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001828 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001829 }
1830
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001831 if (len == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001832 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001833 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001834 return XML_STATUS_OK;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001835 parser->m_positionPtr = parser->m_bufferPtr;
1836 parser->m_parseEndPtr = parser->m_bufferEnd;
Fred Drake31d485c2004-08-03 07:06:22 +00001837
1838 /* If data are left over from last buffer, and we now know that these
1839 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001840 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001841 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001842 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001843
Benjamin Peterson4e211002018-06-26 19:25:45 -07001844 if (parser->m_errorCode == XML_ERROR_NONE) {
1845 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001846 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001847 /* It is hard to be certain, but it seems that this case
1848 * cannot occur. This code is cleaning up a previous parse
1849 * with no new data (since len == 0). Changing the parsing
1850 * state requires getting to execute a handler function, and
1851 * there doesn't seem to be an opportunity for that while in
1852 * this circumstance.
1853 *
1854 * Given the uncertainty, we retain the code but exclude it
1855 * from coverage tests.
1856 *
1857 * LCOV_EXCL_START
1858 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001859 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
1860 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001861 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001862 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001863 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001864 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001865 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001866 /* fall through */
1867 default:
1868 return XML_STATUS_OK;
1869 }
1870 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001871 parser->m_eventEndPtr = parser->m_eventPtr;
1872 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001873 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001874 }
1875#ifndef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07001876 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001877 const char *end;
1878 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001879 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001880 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001881 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1882 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1883 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1884 parser->m_processor = errorProcessor;
Victor Stinner5ff71322017-06-21 14:39:22 +02001885 return XML_STATUS_ERROR;
1886 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001887 parser->m_parseEndByteIndex += len;
1888 parser->m_positionPtr = s;
1889 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001890
Benjamin Peterson4e211002018-06-26 19:25:45 -07001891 parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001892
Benjamin Peterson4e211002018-06-26 19:25:45 -07001893 if (parser->m_errorCode != XML_ERROR_NONE) {
1894 parser->m_eventEndPtr = parser->m_eventPtr;
1895 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001896 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001897 }
Fred Drake31d485c2004-08-03 07:06:22 +00001898 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001899 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001900 case XML_SUSPENDED:
1901 result = XML_STATUS_SUSPENDED;
1902 break;
1903 case XML_INITIALIZED:
1904 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001905 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001906 parser->m_parsingStatus.parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001907 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001908 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001909 /* fall through */
1910 default:
1911 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001912 }
1913 }
1914
Benjamin Peterson4e211002018-06-26 19:25:45 -07001915 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001916 nLeftOver = s + len - end;
1917 if (nLeftOver) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001918 if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001919 /* avoid _signed_ integer overflow */
1920 char *temp = NULL;
1921 const int bytesToAllocate = (int)((unsigned)len * 2U);
1922 if (bytesToAllocate > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001923 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
Victor Stinner5ff71322017-06-21 14:39:22 +02001924 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001925 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001926 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1927 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1928 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001929 return XML_STATUS_ERROR;
1930 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001931 parser->m_buffer = temp;
1932 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001933 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001934 memcpy(parser->m_buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001935 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001936 parser->m_bufferPtr = parser->m_buffer;
1937 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1938 parser->m_positionPtr = parser->m_bufferPtr;
1939 parser->m_parseEndPtr = parser->m_bufferEnd;
1940 parser->m_eventPtr = parser->m_bufferPtr;
1941 parser->m_eventEndPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001942 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001943 }
1944#endif /* not defined XML_CONTEXT_BYTES */
1945 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001946 void *buff = XML_GetBuffer(parser, len);
1947 if (buff == NULL)
1948 return XML_STATUS_ERROR;
1949 else {
1950 memcpy(buff, s, len);
1951 return XML_ParseBuffer(parser, len, isFinal);
1952 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001953 }
1954}
1955
Fred Drake08317ae2003-10-21 15:38:55 +00001956enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001957XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001958{
Fred Drake31d485c2004-08-03 07:06:22 +00001959 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001960 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001961
Victor Stinner5ff71322017-06-21 14:39:22 +02001962 if (parser == NULL)
1963 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001964 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001965 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001966 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001967 return XML_STATUS_ERROR;
1968 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001969 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001970 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001971 case XML_INITIALIZED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001972 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1973 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001974 return XML_STATUS_ERROR;
1975 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001976 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001977 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001978 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001979 }
1980
Benjamin Peterson4e211002018-06-26 19:25:45 -07001981 start = parser->m_bufferPtr;
1982 parser->m_positionPtr = start;
1983 parser->m_bufferEnd += len;
1984 parser->m_parseEndPtr = parser->m_bufferEnd;
1985 parser->m_parseEndByteIndex += len;
1986 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001987
Benjamin Peterson4e211002018-06-26 19:25:45 -07001988 parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001989
Benjamin Peterson4e211002018-06-26 19:25:45 -07001990 if (parser->m_errorCode != XML_ERROR_NONE) {
1991 parser->m_eventEndPtr = parser->m_eventPtr;
1992 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001993 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001994 }
Fred Drake31d485c2004-08-03 07:06:22 +00001995 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001996 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001997 case XML_SUSPENDED:
1998 result = XML_STATUS_SUSPENDED;
1999 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002000 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002001 case XML_PARSING:
2002 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002003 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002004 return result;
2005 }
2006 default: ; /* should not happen */
2007 }
2008 }
2009
Benjamin Peterson4e211002018-06-26 19:25:45 -07002010 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2011 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002012 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002013}
2014
Fred Drake08317ae2003-10-21 15:38:55 +00002015void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002016XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002017{
Victor Stinner5ff71322017-06-21 14:39:22 +02002018 if (parser == NULL)
2019 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002020 if (len < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002021 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002022 return NULL;
2023 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002024 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002025 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002026 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002027 return NULL;
2028 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002029 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002030 return NULL;
2031 default: ;
2032 }
2033
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002034 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002035#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002036 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002037#endif /* defined XML_CONTEXT_BYTES */
2038 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002039 int neededSize = (int) ((unsigned)len +
2040 (unsigned)EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd,
2041 parser->m_bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002042 if (neededSize < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002043 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002044 return NULL;
2045 }
2046#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002047 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002048 if (keep > XML_CONTEXT_BYTES)
2049 keep = XML_CONTEXT_BYTES;
2050 neededSize += keep;
2051#endif /* defined XML_CONTEXT_BYTES */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002052 if (neededSize <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002053#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002054 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2055 int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) - keep;
2056 /* The buffer pointers cannot be NULL here; we have at least some bytes in the buffer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002057 memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2058 parser->m_bufferEnd -= offset;
2059 parser->m_bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002060 }
2061#else
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002062 if (parser->m_buffer && parser->m_bufferPtr) {
2063 memmove(parser->m_buffer, parser->m_bufferPtr,
2064 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2065 parser->m_bufferEnd = parser->m_buffer +
2066 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2067 parser->m_bufferPtr = parser->m_buffer;
2068 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002069#endif /* not defined XML_CONTEXT_BYTES */
2070 }
2071 else {
2072 char *newBuf;
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002073 int bufferSize = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002074 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002075 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002076 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02002077 /* Do not invoke signed arithmetic overflow: */
2078 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002079 } while (bufferSize < neededSize && bufferSize > 0);
2080 if (bufferSize <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002081 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002082 return NULL;
2083 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002084 newBuf = (char *)MALLOC(parser, bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002085 if (newBuf == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002086 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002087 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002088 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002089 parser->m_bufferLim = newBuf + bufferSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002090#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002091 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002092 int keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002093 if (keep > XML_CONTEXT_BYTES)
2094 keep = XML_CONTEXT_BYTES;
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002095 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2096 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002097 FREE(parser, parser->m_buffer);
2098 parser->m_buffer = newBuf;
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002099 parser->m_bufferEnd = parser->m_buffer +
2100 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002101 parser->m_bufferPtr = parser->m_buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002102 }
2103 else {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002104 /* This must be a brand new buffer with no data in it yet */
2105 parser->m_bufferEnd = newBuf;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002106 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002107 }
2108#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002109 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002110 memcpy(newBuf, parser->m_bufferPtr,
2111 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson4e211002018-06-26 19:25:45 -07002112 FREE(parser, parser->m_buffer);
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002113 parser->m_bufferEnd = newBuf +
2114 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002115 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002116 else {
2117 /* This must be a brand new buffer with no data in it yet */
2118 parser->m_bufferEnd = newBuf;
2119 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002120 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002121#endif /* not defined XML_CONTEXT_BYTES */
2122 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002123 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2124 parser->m_positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002125 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002126 return parser->m_bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002127}
2128
Fred Drake31d485c2004-08-03 07:06:22 +00002129enum XML_Status XMLCALL
2130XML_StopParser(XML_Parser parser, XML_Bool resumable)
2131{
Victor Stinner5ff71322017-06-21 14:39:22 +02002132 if (parser == NULL)
2133 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002134 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002135 case XML_SUSPENDED:
2136 if (resumable) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002137 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002138 return XML_STATUS_ERROR;
2139 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002140 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002141 break;
2142 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002143 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002144 return XML_STATUS_ERROR;
2145 default:
2146 if (resumable) {
2147#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07002148 if (parser->m_isParamEntity) {
2149 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
Fred Drake31d485c2004-08-03 07:06:22 +00002150 return XML_STATUS_ERROR;
2151 }
2152#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07002153 parser->m_parsingStatus.parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002154 }
2155 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002156 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002157 }
2158 return XML_STATUS_OK;
2159}
2160
2161enum XML_Status XMLCALL
2162XML_ResumeParser(XML_Parser parser)
2163{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002164 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002165
Victor Stinner5ff71322017-06-21 14:39:22 +02002166 if (parser == NULL)
2167 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002168 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2169 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002170 return XML_STATUS_ERROR;
2171 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002172 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002173
Benjamin Peterson4e211002018-06-26 19:25:45 -07002174 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00002175
Benjamin Peterson4e211002018-06-26 19:25:45 -07002176 if (parser->m_errorCode != XML_ERROR_NONE) {
2177 parser->m_eventEndPtr = parser->m_eventPtr;
2178 parser->m_processor = errorProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00002179 return XML_STATUS_ERROR;
2180 }
2181 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002182 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002183 case XML_SUSPENDED:
2184 result = XML_STATUS_SUSPENDED;
2185 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002186 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002187 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002188 if (parser->m_parsingStatus.finalBuffer) {
2189 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002190 return result;
2191 }
2192 default: ;
2193 }
2194 }
2195
Benjamin Peterson4e211002018-06-26 19:25:45 -07002196 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2197 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002198 return result;
2199}
2200
2201void XMLCALL
2202XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2203{
Victor Stinner5ff71322017-06-21 14:39:22 +02002204 if (parser == NULL)
2205 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002206 assert(status != NULL);
2207 *status = parser->m_parsingStatus;
2208}
2209
Fred Drake08317ae2003-10-21 15:38:55 +00002210enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002211XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002212{
Victor Stinner5ff71322017-06-21 14:39:22 +02002213 if (parser == NULL)
2214 return XML_ERROR_INVALID_ARGUMENT;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002215 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002216}
2217
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002218XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002219XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002220{
Victor Stinner5ff71322017-06-21 14:39:22 +02002221 if (parser == NULL)
2222 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002223 if (parser->m_eventPtr)
2224 return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002225 return -1;
2226}
2227
Fred Drake08317ae2003-10-21 15:38:55 +00002228int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002229XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002230{
Victor Stinner5ff71322017-06-21 14:39:22 +02002231 if (parser == NULL)
2232 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002233 if (parser->m_eventEndPtr && parser->m_eventPtr)
2234 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002235 return 0;
2236}
2237
Fred Drake08317ae2003-10-21 15:38:55 +00002238const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002239XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002240{
2241#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002242 if (parser == NULL)
2243 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002244 if (parser->m_eventPtr && parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002245 if (offset != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002246 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
Victor Stinner5ff71322017-06-21 14:39:22 +02002247 if (size != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002248 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2249 return parser->m_buffer;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002250 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002251#else
2252 (void)parser;
2253 (void)offset;
2254 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002255#endif /* defined XML_CONTEXT_BYTES */
2256 return (char *) 0;
2257}
2258
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002259XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002260XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002261{
Victor Stinner5ff71322017-06-21 14:39:22 +02002262 if (parser == NULL)
2263 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002264 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2265 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2266 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002267 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002268 return parser->m_position.lineNumber + 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002269}
2270
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002271XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002272XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002273{
Victor Stinner5ff71322017-06-21 14:39:22 +02002274 if (parser == NULL)
2275 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002276 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2277 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2278 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002279 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002280 return parser->m_position.columnNumber;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002281}
2282
Fred Drake08317ae2003-10-21 15:38:55 +00002283void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002284XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2285{
Victor Stinner5ff71322017-06-21 14:39:22 +02002286 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002287 FREE(parser, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002288}
2289
Fred Drake08317ae2003-10-21 15:38:55 +00002290void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002291XML_MemMalloc(XML_Parser parser, size_t size)
2292{
Victor Stinner5ff71322017-06-21 14:39:22 +02002293 if (parser == NULL)
2294 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002295 return MALLOC(parser, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002296}
2297
Fred Drake08317ae2003-10-21 15:38:55 +00002298void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002299XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2300{
Victor Stinner5ff71322017-06-21 14:39:22 +02002301 if (parser == NULL)
2302 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002303 return REALLOC(parser, ptr, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002304}
2305
Fred Drake08317ae2003-10-21 15:38:55 +00002306void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002307XML_MemFree(XML_Parser parser, void *ptr)
2308{
Victor Stinner5ff71322017-06-21 14:39:22 +02002309 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002310 FREE(parser, ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002311}
2312
Fred Drake08317ae2003-10-21 15:38:55 +00002313void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002314XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002315{
Victor Stinner5ff71322017-06-21 14:39:22 +02002316 if (parser == NULL)
2317 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002318 if (parser->m_defaultHandler) {
2319 if (parser->m_openInternalEntities)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002320 reportDefault(parser,
Benjamin Peterson4e211002018-06-26 19:25:45 -07002321 parser->m_internalEncoding,
2322 parser->m_openInternalEntities->internalEventPtr,
2323 parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002324 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002325 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002326 }
2327}
2328
Fred Drake08317ae2003-10-21 15:38:55 +00002329const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002330XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002331{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002332 switch (code) {
2333 case XML_ERROR_NONE:
2334 return NULL;
2335 case XML_ERROR_NO_MEMORY:
2336 return XML_L("out of memory");
2337 case XML_ERROR_SYNTAX:
2338 return XML_L("syntax error");
2339 case XML_ERROR_NO_ELEMENTS:
2340 return XML_L("no element found");
2341 case XML_ERROR_INVALID_TOKEN:
2342 return XML_L("not well-formed (invalid token)");
2343 case XML_ERROR_UNCLOSED_TOKEN:
2344 return XML_L("unclosed token");
2345 case XML_ERROR_PARTIAL_CHAR:
2346 return XML_L("partial character");
2347 case XML_ERROR_TAG_MISMATCH:
2348 return XML_L("mismatched tag");
2349 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2350 return XML_L("duplicate attribute");
2351 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2352 return XML_L("junk after document element");
2353 case XML_ERROR_PARAM_ENTITY_REF:
2354 return XML_L("illegal parameter entity reference");
2355 case XML_ERROR_UNDEFINED_ENTITY:
2356 return XML_L("undefined entity");
2357 case XML_ERROR_RECURSIVE_ENTITY_REF:
2358 return XML_L("recursive entity reference");
2359 case XML_ERROR_ASYNC_ENTITY:
2360 return XML_L("asynchronous entity");
2361 case XML_ERROR_BAD_CHAR_REF:
2362 return XML_L("reference to invalid character number");
2363 case XML_ERROR_BINARY_ENTITY_REF:
2364 return XML_L("reference to binary entity");
2365 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2366 return XML_L("reference to external entity in attribute");
2367 case XML_ERROR_MISPLACED_XML_PI:
2368 return XML_L("XML or text declaration not at start of entity");
2369 case XML_ERROR_UNKNOWN_ENCODING:
2370 return XML_L("unknown encoding");
2371 case XML_ERROR_INCORRECT_ENCODING:
2372 return XML_L("encoding specified in XML declaration is incorrect");
2373 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2374 return XML_L("unclosed CDATA section");
2375 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2376 return XML_L("error in processing external entity reference");
2377 case XML_ERROR_NOT_STANDALONE:
2378 return XML_L("document is not standalone");
2379 case XML_ERROR_UNEXPECTED_STATE:
2380 return XML_L("unexpected parser state - please send a bug report");
2381 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2382 return XML_L("entity declared in parameter entity");
2383 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2384 return XML_L("requested feature requires XML_DTD support in Expat");
2385 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2386 return XML_L("cannot change setting once parsing has begun");
2387 /* Added in 1.95.7. */
2388 case XML_ERROR_UNBOUND_PREFIX:
2389 return XML_L("unbound prefix");
2390 /* Added in 1.95.8. */
2391 case XML_ERROR_UNDECLARING_PREFIX:
2392 return XML_L("must not undeclare prefix");
2393 case XML_ERROR_INCOMPLETE_PE:
2394 return XML_L("incomplete markup in parameter entity");
2395 case XML_ERROR_XML_DECL:
2396 return XML_L("XML declaration not well-formed");
2397 case XML_ERROR_TEXT_DECL:
2398 return XML_L("text declaration not well-formed");
2399 case XML_ERROR_PUBLICID:
2400 return XML_L("illegal character(s) in public id");
2401 case XML_ERROR_SUSPENDED:
2402 return XML_L("parser suspended");
2403 case XML_ERROR_NOT_SUSPENDED:
2404 return XML_L("parser not suspended");
2405 case XML_ERROR_ABORTED:
2406 return XML_L("parsing aborted");
2407 case XML_ERROR_FINISHED:
2408 return XML_L("parsing finished");
2409 case XML_ERROR_SUSPEND_PE:
2410 return XML_L("cannot suspend in external parameter entity");
2411 /* Added in 2.0.0. */
2412 case XML_ERROR_RESERVED_PREFIX_XML:
2413 return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name");
2414 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2415 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2416 case XML_ERROR_RESERVED_NAMESPACE_URI:
2417 return XML_L("prefix must not be bound to one of the reserved namespace names");
2418 /* Added in 2.2.5. */
2419 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2420 return XML_L("invalid argument");
2421 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002422 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002423}
2424
Fred Drake08317ae2003-10-21 15:38:55 +00002425const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002426XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002427
2428 /* V1 is used to string-ize the version number. However, it would
2429 string-ize the actual version macro *names* unless we get them
2430 substituted before being passed to V1. CPP is defined to expand
2431 a macro, then rescan for more expansions. Thus, we use V2 to expand
2432 the version macros, then CPP will expand the resulting V1() macro
2433 with the correct numerals. */
2434 /* ### I'm assuming cpp is portable in this respect... */
2435
2436#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2437#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2438
2439 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2440
2441#undef V1
2442#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002443}
2444
Fred Drake08317ae2003-10-21 15:38:55 +00002445XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002446XML_ExpatVersionInfo(void)
2447{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002448 XML_Expat_Version version;
2449
2450 version.major = XML_MAJOR_VERSION;
2451 version.minor = XML_MINOR_VERSION;
2452 version.micro = XML_MICRO_VERSION;
2453
2454 return version;
2455}
2456
Fred Drake08317ae2003-10-21 15:38:55 +00002457const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002458XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002459{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002460 static const XML_Feature features[] = {
2461 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2462 sizeof(XML_Char)},
2463 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2464 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002465#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002466 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002467#endif
2468#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002469 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002470#endif
2471#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002472 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002473#endif
2474#ifdef XML_CONTEXT_BYTES
2475 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2476 XML_CONTEXT_BYTES},
2477#endif
2478#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002479 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002480#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002481#ifdef XML_NS
2482 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2483#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002484#ifdef XML_LARGE_SIZE
2485 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2486#endif
2487#ifdef XML_ATTR_INFO
2488 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2489#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002490 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002491 };
2492
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002493 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002494}
2495
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002496/* Initially tag->rawName always points into the parse buffer;
2497 for those TAG instances opened while the current parse buffer was
2498 processed, and not yet closed, we need to store tag->rawName in a more
2499 permanent location, since the parse buffer is about to be discarded.
2500*/
2501static XML_Bool
2502storeRawNames(XML_Parser parser)
2503{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002504 TAG *tag = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002505 while (tag) {
2506 int bufSize;
2507 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2508 char *rawNameBuf = tag->buf + nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002509 /* Stop if already stored. Since m_tagStack is a stack, we can stop
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002510 at the first entry that has already been copied; everything
2511 below it in the stack is already been accounted for in a
2512 previous call to this function.
2513 */
2514 if (tag->rawName == rawNameBuf)
2515 break;
2516 /* For re-use purposes we need to ensure that the
2517 size of tag->buf is a multiple of sizeof(XML_Char).
2518 */
2519 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2520 if (bufSize > tag->bufEnd - tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002521 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002522 if (temp == NULL)
2523 return XML_FALSE;
2524 /* if tag->name.str points to tag->buf (only when namespace
2525 processing is off) then we have to update it
2526 */
2527 if (tag->name.str == (XML_Char *)tag->buf)
2528 tag->name.str = (XML_Char *)temp;
2529 /* if tag->name.localPart is set (when namespace processing is on)
2530 then update it as well, since it will always point into tag->buf
2531 */
2532 if (tag->name.localPart)
2533 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2534 (XML_Char *)tag->buf);
2535 tag->buf = temp;
2536 tag->bufEnd = temp + bufSize;
2537 rawNameBuf = temp + nameLen;
2538 }
2539 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2540 tag->rawName = rawNameBuf;
2541 tag = tag->parent;
2542 }
2543 return XML_TRUE;
2544}
2545
2546static enum XML_Error PTRCALL
2547contentProcessor(XML_Parser parser,
2548 const char *start,
2549 const char *end,
2550 const char **endPtr)
2551{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002552 enum XML_Error result = doContent(parser, 0, parser->m_encoding, start, end,
2553 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002554 if (result == XML_ERROR_NONE) {
2555 if (!storeRawNames(parser))
2556 return XML_ERROR_NO_MEMORY;
2557 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002558 return result;
2559}
2560
2561static enum XML_Error PTRCALL
2562externalEntityInitProcessor(XML_Parser parser,
2563 const char *start,
2564 const char *end,
2565 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002566{
2567 enum XML_Error result = initializeEncoding(parser);
2568 if (result != XML_ERROR_NONE)
2569 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002570 parser->m_processor = externalEntityInitProcessor2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002571 return externalEntityInitProcessor2(parser, start, end, endPtr);
2572}
2573
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002574static enum XML_Error PTRCALL
2575externalEntityInitProcessor2(XML_Parser parser,
2576 const char *start,
2577 const char *end,
2578 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002579{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002580 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002581 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002582 switch (tok) {
2583 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002584 /* If we are at the end of the buffer, this would cause the next stage,
2585 i.e. externalEntityInitProcessor3, to pass control directly to
2586 doContent (by detecting XML_TOK_NONE) without processing any xml text
2587 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2588 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002589 if (next == end && !parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002590 *endPtr = next;
2591 return XML_ERROR_NONE;
2592 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002593 start = next;
2594 break;
2595 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002596 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002597 *endPtr = start;
2598 return XML_ERROR_NONE;
2599 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002600 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002601 return XML_ERROR_UNCLOSED_TOKEN;
2602 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002603 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002604 *endPtr = start;
2605 return XML_ERROR_NONE;
2606 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002607 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002608 return XML_ERROR_PARTIAL_CHAR;
2609 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002610 parser->m_processor = externalEntityInitProcessor3;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002611 return externalEntityInitProcessor3(parser, start, end, endPtr);
2612}
2613
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002614static enum XML_Error PTRCALL
2615externalEntityInitProcessor3(XML_Parser parser,
2616 const char *start,
2617 const char *end,
2618 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002619{
Fred Drake31d485c2004-08-03 07:06:22 +00002620 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002621 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002622 parser->m_eventPtr = start;
2623 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2624 parser->m_eventEndPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00002625
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002626 switch (tok) {
2627 case XML_TOK_XML_DECL:
2628 {
Fred Drake31d485c2004-08-03 07:06:22 +00002629 enum XML_Error result;
2630 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002631 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002632 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002633 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002634 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002635 *endPtr = next;
2636 return XML_ERROR_NONE;
2637 case XML_FINISHED:
2638 return XML_ERROR_ABORTED;
2639 default:
2640 start = next;
2641 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002642 }
2643 break;
2644 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002645 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002646 *endPtr = start;
2647 return XML_ERROR_NONE;
2648 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002649 return XML_ERROR_UNCLOSED_TOKEN;
2650 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002651 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002652 *endPtr = start;
2653 return XML_ERROR_NONE;
2654 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002655 return XML_ERROR_PARTIAL_CHAR;
2656 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002657 parser->m_processor = externalEntityContentProcessor;
2658 parser->m_tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002659 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002660}
2661
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002662static enum XML_Error PTRCALL
2663externalEntityContentProcessor(XML_Parser parser,
2664 const char *start,
2665 const char *end,
2666 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002667{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002668 enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end,
2669 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002670 if (result == XML_ERROR_NONE) {
2671 if (!storeRawNames(parser))
2672 return XML_ERROR_NO_MEMORY;
2673 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002674 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002675}
2676
2677static enum XML_Error
2678doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002679 int startTagLevel,
2680 const ENCODING *enc,
2681 const char *s,
2682 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002683 const char **nextPtr,
2684 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002685{
Fred Drake31d485c2004-08-03 07:06:22 +00002686 /* save one level of indirection */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002687 DTD * const dtd = parser->m_dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002688
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002689 const char **eventPP;
2690 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002691 if (enc == parser->m_encoding) {
2692 eventPP = &parser->m_eventPtr;
2693 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002694 }
2695 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002696 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2697 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002698 }
2699 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002700
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002701 for (;;) {
2702 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2703 int tok = XmlContentTok(enc, s, end, &next);
2704 *eventEndPP = next;
2705 switch (tok) {
2706 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002707 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002708 *nextPtr = s;
2709 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002710 }
2711 *eventEndPP = end;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002712 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002713 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002714 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002715 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002716 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002717 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002718 /* We are at the end of the final buffer, should we check for
2719 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002720 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002721 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002722 return XML_ERROR_NO_ELEMENTS;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002723 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002724 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002725 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002726 return XML_ERROR_NONE;
2727 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002728 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002729 *nextPtr = s;
2730 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002731 }
2732 if (startTagLevel > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002733 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002734 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002735 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002736 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002737 }
2738 return XML_ERROR_NO_ELEMENTS;
2739 case XML_TOK_INVALID:
2740 *eventPP = next;
2741 return XML_ERROR_INVALID_TOKEN;
2742 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002743 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002744 *nextPtr = s;
2745 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002746 }
2747 return XML_ERROR_UNCLOSED_TOKEN;
2748 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002749 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002750 *nextPtr = s;
2751 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002752 }
2753 return XML_ERROR_PARTIAL_CHAR;
2754 case XML_TOK_ENTITY_REF:
2755 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002756 const XML_Char *name;
2757 ENTITY *entity;
2758 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2759 s + enc->minBytesPerChar,
2760 next - enc->minBytesPerChar);
2761 if (ch) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002762 if (parser->m_characterDataHandler)
2763 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2764 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002765 reportDefault(parser, enc, s, next);
2766 break;
2767 }
2768 name = poolStoreString(&dtd->pool, enc,
2769 s + enc->minBytesPerChar,
2770 next - enc->minBytesPerChar);
2771 if (!name)
2772 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002773 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002774 poolDiscard(&dtd->pool);
2775 /* First, determine if a check for an existing declaration is needed;
2776 if yes, check that the entity exists, and that it is internal,
2777 otherwise call the skipped entity or default handler.
2778 */
2779 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2780 if (!entity)
2781 return XML_ERROR_UNDEFINED_ENTITY;
2782 else if (!entity->is_internal)
2783 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2784 }
2785 else if (!entity) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002786 if (parser->m_skippedEntityHandler)
2787 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2788 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002789 reportDefault(parser, enc, s, next);
2790 break;
2791 }
2792 if (entity->open)
2793 return XML_ERROR_RECURSIVE_ENTITY_REF;
2794 if (entity->notation)
2795 return XML_ERROR_BINARY_ENTITY_REF;
2796 if (entity->textPtr) {
2797 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002798 if (!parser->m_defaultExpandInternalEntities) {
2799 if (parser->m_skippedEntityHandler)
2800 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
2801 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002802 reportDefault(parser, enc, s, next);
2803 break;
2804 }
Fred Drake31d485c2004-08-03 07:06:22 +00002805 result = processInternalEntity(parser, entity, XML_FALSE);
2806 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002807 return result;
2808 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002809 else if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002810 const XML_Char *context;
2811 entity->open = XML_TRUE;
2812 context = getContext(parser);
2813 entity->open = XML_FALSE;
2814 if (!context)
2815 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002816 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002817 context,
2818 entity->base,
2819 entity->systemId,
2820 entity->publicId))
2821 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002822 poolDiscard(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002823 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002824 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002825 reportDefault(parser, enc, s, next);
2826 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002827 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002828 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002829 /* fall through */
2830 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002831 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002832 TAG *tag;
2833 enum XML_Error result;
2834 XML_Char *toPtr;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002835 if (parser->m_freeTagList) {
2836 tag = parser->m_freeTagList;
2837 parser->m_freeTagList = parser->m_freeTagList->parent;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002838 }
2839 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002840 tag = (TAG *)MALLOC(parser, sizeof(TAG));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002841 if (!tag)
2842 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002843 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002844 if (!tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002845 FREE(parser, tag);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002846 return XML_ERROR_NO_MEMORY;
2847 }
2848 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2849 }
2850 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002851 tag->parent = parser->m_tagStack;
2852 parser->m_tagStack = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002853 tag->name.localPart = NULL;
2854 tag->name.prefix = NULL;
2855 tag->rawName = s + enc->minBytesPerChar;
2856 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002857 ++parser->m_tagLevel;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002858 {
2859 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2860 const char *fromPtr = tag->rawName;
2861 toPtr = (XML_Char *)tag->buf;
2862 for (;;) {
2863 int bufSize;
2864 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002865 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002866 &fromPtr, rawNameEnd,
2867 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002868 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner5ff71322017-06-21 14:39:22 +02002869 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002870 tag->name.strLen = convLen;
2871 break;
2872 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002873 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002874 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002875 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002876 if (temp == NULL)
2877 return XML_ERROR_NO_MEMORY;
2878 tag->buf = temp;
2879 tag->bufEnd = temp + bufSize;
2880 toPtr = (XML_Char *)temp + convLen;
2881 }
2882 }
2883 }
2884 tag->name.str = (XML_Char *)tag->buf;
2885 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002886 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2887 if (result)
2888 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002889 if (parser->m_startElementHandler)
2890 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2891 (const XML_Char **)parser->m_atts);
2892 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002893 reportDefault(parser, enc, s, next);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002894 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002895 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002896 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002897 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002898 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002899 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2900 {
2901 const char *rawName = s + enc->minBytesPerChar;
2902 enum XML_Error result;
2903 BINDING *bindings = NULL;
2904 XML_Bool noElmHandlers = XML_TRUE;
2905 TAG_NAME name;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002906 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002907 rawName + XmlNameLength(enc, rawName));
2908 if (!name.str)
2909 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002910 poolFinish(&parser->m_tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002911 result = storeAtts(parser, enc, s, &name, &bindings);
Victor Stinner5ff71322017-06-21 14:39:22 +02002912 if (result != XML_ERROR_NONE) {
2913 freeBindings(parser, bindings);
Fred Drake4faea012003-01-28 06:42:40 +00002914 return result;
Victor Stinner5ff71322017-06-21 14:39:22 +02002915 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002916 poolFinish(&parser->m_tempPool);
2917 if (parser->m_startElementHandler) {
2918 parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002919 noElmHandlers = XML_FALSE;
2920 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002921 if (parser->m_endElementHandler) {
2922 if (parser->m_startElementHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002923 *eventPP = *eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002924 parser->m_endElementHandler(parser->m_handlerArg, name.str);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002925 noElmHandlers = XML_FALSE;
2926 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002927 if (noElmHandlers && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002928 reportDefault(parser, enc, s, next);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002929 poolClear(&parser->m_tempPool);
Victor Stinner5ff71322017-06-21 14:39:22 +02002930 freeBindings(parser, bindings);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002931 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002932 if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2933 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2934 parser->m_processor = epilogProcessor;
2935 else
2936 return epilogProcessor(parser, next, end, nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002937 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002938 break;
2939 case XML_TOK_END_TAG:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002940 if (parser->m_tagLevel == startTagLevel)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002941 return XML_ERROR_ASYNC_ENTITY;
2942 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002943 int len;
2944 const char *rawName;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002945 TAG *tag = parser->m_tagStack;
2946 parser->m_tagStack = tag->parent;
2947 tag->parent = parser->m_freeTagList;
2948 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002949 rawName = s + enc->minBytesPerChar*2;
2950 len = XmlNameLength(enc, rawName);
2951 if (len != tag->rawNameLength
2952 || memcmp(tag->rawName, rawName, len) != 0) {
2953 *eventPP = rawName;
2954 return XML_ERROR_TAG_MISMATCH;
2955 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002956 --parser->m_tagLevel;
2957 if (parser->m_endElementHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002958 const XML_Char *localPart;
2959 const XML_Char *prefix;
2960 XML_Char *uri;
2961 localPart = tag->name.localPart;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002962 if (parser->m_ns && localPart) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002963 /* localPart and prefix may have been overwritten in
2964 tag->name.str, since this points to the binding->uri
2965 buffer which gets re-used; so we have to add them again
2966 */
2967 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2968 /* don't need to check for space - already done in storeAtts() */
2969 while (*localPart) *uri++ = *localPart++;
2970 prefix = (XML_Char *)tag->name.prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002971 if (parser->m_ns_triplets && prefix) {
2972 *uri++ = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002973 while (*prefix) *uri++ = *prefix++;
2974 }
2975 *uri = XML_T('\0');
2976 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002977 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002978 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002979 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002980 reportDefault(parser, enc, s, next);
2981 while (tag->bindings) {
2982 BINDING *b = tag->bindings;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002983 if (parser->m_endNamespaceDeclHandler)
2984 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002985 tag->bindings = tag->bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002986 b->nextTagBinding = parser->m_freeBindingList;
2987 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002988 b->prefix->binding = b->prevPrefixBinding;
2989 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002990 if (parser->m_tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002991 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002992 }
2993 break;
2994 case XML_TOK_CHAR_REF:
2995 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002996 int n = XmlCharRefNumber(enc, s);
2997 if (n < 0)
2998 return XML_ERROR_BAD_CHAR_REF;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002999 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003000 XML_Char buf[XML_ENCODE_MAX];
Benjamin Peterson4e211002018-06-26 19:25:45 -07003001 parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003002 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003003 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003004 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003005 }
3006 break;
3007 case XML_TOK_XML_DECL:
3008 return XML_ERROR_MISPLACED_XML_PI;
3009 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003010 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003011 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003012 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003013 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003014 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003015 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003016 break;
3017 case XML_TOK_CDATA_SECT_OPEN:
3018 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003019 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003020 if (parser->m_startCdataSectionHandler)
3021 parser->m_startCdataSectionHandler(parser->m_handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003022#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003023 /* Suppose you doing a transformation on a document that involves
3024 changing only the character data. You set up a defaultHandler
3025 and a characterDataHandler. The defaultHandler simply copies
3026 characters through. The characterDataHandler does the
3027 transformation and writes the characters out escaping them as
3028 necessary. This case will fail to work if we leave out the
3029 following two lines (because & and < inside CDATA sections will
3030 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003031
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003032 However, now we have a start/endCdataSectionHandler, so it seems
3033 easier to let the user deal with this.
3034 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003035 else if (parser->m_characterDataHandler)
3036 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003037#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003038 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003039 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00003040 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3041 if (result != XML_ERROR_NONE)
3042 return result;
3043 else if (!next) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003044 parser->m_processor = cdataSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003045 return result;
3046 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003047 }
3048 break;
3049 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00003050 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003051 *nextPtr = s;
3052 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003053 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003054 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003055 if (MUST_CONVERT(enc, s)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003056 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3057 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3058 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3059 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003060 }
3061 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003062 parser->m_characterDataHandler(parser->m_handlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003063 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003064 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003065 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003066 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003067 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003068 /* We are at the end of the final buffer, should we check for
3069 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00003070 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003071 if (startTagLevel == 0) {
3072 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003073 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003074 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003075 if (parser->m_tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003076 *eventPP = end;
3077 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003078 }
Fred Drake31d485c2004-08-03 07:06:22 +00003079 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003080 return XML_ERROR_NONE;
3081 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003082 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003083 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003084 if (charDataHandler) {
3085 if (MUST_CONVERT(enc, s)) {
3086 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003087 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3088 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003089 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003090 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3091 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003092 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003093 break;
3094 *eventPP = s;
3095 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003096 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003097 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003098 charDataHandler(parser->m_handlerArg,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003099 (XML_Char *)s,
3100 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003101 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003102 else if (parser->m_defaultHandler)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003103 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003104 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003105 break;
3106 case XML_TOK_PI:
3107 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003108 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003109 break;
3110 case XML_TOK_COMMENT:
3111 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003112 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003113 break;
3114 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003115 /* All of the tokens produced by XmlContentTok() have their own
3116 * explicit cases, so this default is not strictly necessary.
3117 * However it is a useful safety net, so we retain the code and
3118 * simply exclude it from the coverage tests.
3119 *
3120 * LCOV_EXCL_START
3121 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003122 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003123 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003124 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003125 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003126 }
3127 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003128 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003129 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003130 *nextPtr = next;
3131 return XML_ERROR_NONE;
3132 case XML_FINISHED:
3133 return XML_ERROR_ABORTED;
3134 default: ;
3135 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003136 }
3137 /* not reached */
3138}
3139
Victor Stinner5ff71322017-06-21 14:39:22 +02003140/* This function does not call free() on the allocated memory, merely
Benjamin Peterson4e211002018-06-26 19:25:45 -07003141 * moving it to the parser's m_freeBindingList where it can be freed or
Victor Stinner5ff71322017-06-21 14:39:22 +02003142 * reused as appropriate.
3143 */
3144static void
3145freeBindings(XML_Parser parser, BINDING *bindings)
3146{
3147 while (bindings) {
3148 BINDING *b = bindings;
3149
Benjamin Peterson4e211002018-06-26 19:25:45 -07003150 /* m_startNamespaceDeclHandler will have been called for this
Victor Stinner5ff71322017-06-21 14:39:22 +02003151 * binding in addBindings(), so call the end handler now.
3152 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003153 if (parser->m_endNamespaceDeclHandler)
3154 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Victor Stinner5ff71322017-06-21 14:39:22 +02003155
3156 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003157 b->nextTagBinding = parser->m_freeBindingList;
3158 parser->m_freeBindingList = b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003159 b->prefix->binding = b->prevPrefixBinding;
3160 }
3161}
3162
Fred Drake4faea012003-01-28 06:42:40 +00003163/* Precondition: all arguments must be non-NULL;
3164 Purpose:
3165 - normalize attributes
3166 - check attributes for well-formedness
3167 - generate namespace aware attribute names (URI, prefix)
3168 - build list of attributes for startElementHandler
3169 - default attributes
3170 - process namespace declarations (check and report them)
3171 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003172*/
3173static enum XML_Error
3174storeAtts(XML_Parser parser, const ENCODING *enc,
3175 const char *attStr, TAG_NAME *tagNamePtr,
3176 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003177{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003178 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003179 ELEMENT_TYPE *elementType;
3180 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003181 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003182 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003183 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003184 int i;
3185 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003186 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003187 int nPrefixes = 0;
3188 BINDING *binding;
3189 const XML_Char *localPart;
3190
3191 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003192 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00003193 if (!elementType) {
3194 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3195 if (!name)
3196 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003197 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003198 sizeof(ELEMENT_TYPE));
3199 if (!elementType)
3200 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003201 if (parser->m_ns && !setElementTypePrefix(parser, elementType))
Fred Drake4faea012003-01-28 06:42:40 +00003202 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003203 }
Fred Drake4faea012003-01-28 06:42:40 +00003204 nDefaultAtts = elementType->nDefaultAtts;
3205
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003206 /* get the attributes from the tokenizer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003207 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3208 if (n + nDefaultAtts > parser->m_attsSize) {
3209 int oldAttsSize = parser->m_attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003210 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003211#ifdef XML_ATTR_INFO
3212 XML_AttrInfo *temp2;
3213#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003214 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3215 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003216 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003217 parser->m_attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003218 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003219 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003220 parser->m_atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003221#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003222 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003223 if (temp2 == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003224 parser->m_attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003225 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003226 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003227 parser->m_attInfo = temp2;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003228#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003229 if (n > oldAttsSize)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003230 XmlGetAttributes(enc, attStr, n, parser->m_atts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003231 }
Fred Drake4faea012003-01-28 06:42:40 +00003232
Benjamin Peterson4e211002018-06-26 19:25:45 -07003233 appAtts = (const XML_Char **)parser->m_atts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003234 for (i = 0; i < n; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003235 ATTRIBUTE *currAtt = &parser->m_atts[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003236#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003237 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003238#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003239 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003240 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3241 currAtt->name
3242 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003243 if (!attId)
3244 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003245#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003246 currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003247 currAttInfo->nameEnd = currAttInfo->nameStart +
3248 XmlNameLength(enc, currAtt->name);
Benjamin Peterson4e211002018-06-26 19:25:45 -07003249 currAttInfo->valueStart = parser->m_parseEndByteIndex -
3250 (parser->m_parseEndPtr - currAtt->valuePtr);
3251 currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003252#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003253 /* Detect duplicate attributes by their QNames. This does not work when
3254 namespace processing is turned on and different prefixes for the same
3255 namespace are used. For this case we have a check further down.
3256 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003257 if ((attId->name)[-1]) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003258 if (enc == parser->m_encoding)
3259 parser->m_eventPtr = parser->m_atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003260 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3261 }
3262 (attId->name)[-1] = 1;
3263 appAtts[attIndex++] = attId->name;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003264 if (!parser->m_atts[i].normalized) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003265 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003266 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003267
3268 /* figure out whether declared as other than CDATA */
3269 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003270 int j;
3271 for (j = 0; j < nDefaultAtts; j++) {
3272 if (attId == elementType->defaultAtts[j].id) {
3273 isCdata = elementType->defaultAtts[j].isCdata;
3274 break;
3275 }
3276 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003277 }
3278
3279 /* normalize the attribute value */
3280 result = storeAttributeValue(parser, enc, isCdata,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003281 parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd,
3282 &parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003283 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003284 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003285 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3286 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003287 }
Fred Drake4faea012003-01-28 06:42:40 +00003288 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003289 /* the value did not need normalizing */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003290 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr,
3291 parser->m_atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003292 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003293 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003294 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003295 }
3296 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003297 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003298 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003299 /* deal with namespace declarations here */
3300 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3301 appAtts[attIndex], bindingsPtr);
3302 if (result)
3303 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003304 --attIndex;
3305 }
3306 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003307 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003308 attIndex++;
3309 nPrefixes++;
3310 (attId->name)[-1] = 2;
3311 }
3312 }
3313 else
3314 attIndex++;
3315 }
Fred Drake4faea012003-01-28 06:42:40 +00003316
3317 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003318 parser->m_nSpecifiedAtts = attIndex;
Fred Drake4faea012003-01-28 06:42:40 +00003319 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3320 for (i = 0; i < attIndex; i += 2)
3321 if (appAtts[i] == elementType->idAtt->name) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003322 parser->m_idAttIndex = i;
Fred Drake4faea012003-01-28 06:42:40 +00003323 break;
3324 }
3325 }
3326 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003327 parser->m_idAttIndex = -1;
Fred Drake4faea012003-01-28 06:42:40 +00003328
3329 /* do attribute defaulting */
3330 for (i = 0; i < nDefaultAtts; i++) {
3331 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3332 if (!(da->id->name)[-1] && da->value) {
3333 if (da->id->prefix) {
3334 if (da->id->xmlns) {
3335 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3336 da->value, bindingsPtr);
3337 if (result)
3338 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003339 }
3340 else {
Fred Drake4faea012003-01-28 06:42:40 +00003341 (da->id->name)[-1] = 2;
3342 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003343 appAtts[attIndex++] = da->id->name;
3344 appAtts[attIndex++] = da->value;
3345 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003346 }
Fred Drake4faea012003-01-28 06:42:40 +00003347 else {
3348 (da->id->name)[-1] = 1;
3349 appAtts[attIndex++] = da->id->name;
3350 appAtts[attIndex++] = da->value;
3351 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003352 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003353 }
Fred Drake4faea012003-01-28 06:42:40 +00003354 appAtts[attIndex] = 0;
3355
Fred Drake08317ae2003-10-21 15:38:55 +00003356 /* expand prefixed attribute names, check for duplicates,
3357 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003358 i = 0;
3359 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00003360 int j; /* hash table index */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003361 unsigned long version = parser->m_nsAttsVersion;
3362 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3363 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003364 /* size of hash table must be at least 2 * (# of prefixed attributes) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003365 if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
Fred Drake08317ae2003-10-21 15:38:55 +00003366 NS_ATT *temp;
3367 /* hash table size must also be a power of 2 and >= 8 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003368 while (nPrefixes >> parser->m_nsAttsPower++);
3369 if (parser->m_nsAttsPower < 3)
3370 parser->m_nsAttsPower = 3;
3371 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3372 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003373 if (!temp) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003374 /* Restore actual size of memory in m_nsAtts */
3375 parser->m_nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003376 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003377 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003378 parser->m_nsAtts = temp;
3379 version = 0; /* force re-initialization of m_nsAtts hash table */
Fred Drake08317ae2003-10-21 15:38:55 +00003380 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003381 /* using a version flag saves us from initializing m_nsAtts every time */
Fred Drake08317ae2003-10-21 15:38:55 +00003382 if (!version) { /* initialize version flags when version wraps around */
3383 version = INIT_ATTS_VERSION;
3384 for (j = nsAttsSize; j != 0; )
Benjamin Peterson4e211002018-06-26 19:25:45 -07003385 parser->m_nsAtts[--j].version = version;
Fred Drake08317ae2003-10-21 15:38:55 +00003386 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003387 parser->m_nsAttsVersion = --version;
Fred Drake08317ae2003-10-21 15:38:55 +00003388
3389 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003390 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003391 const XML_Char *s = appAtts[i];
3392 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003393 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003394 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003395 unsigned long uriHash;
3396 struct siphash sip_state;
3397 struct sipkey sip_key;
3398
3399 copy_salt_to_sipkey(parser, &sip_key);
3400 sip24_init(&sip_state, &sip_key);
3401
Fred Drake08317ae2003-10-21 15:38:55 +00003402 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003403 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003404 if (!id || !id->prefix) {
3405 /* This code is walking through the appAtts array, dealing
3406 * with (in this case) a prefixed attribute name. To be in
3407 * the array, the attribute must have already been bound, so
3408 * has to have passed through the hash table lookup once
3409 * already. That implies that an entry for it already
3410 * exists, so the lookup above will return a pointer to
3411 * already allocated memory. There is no opportunaity for
3412 * the allocator to fail, so the condition above cannot be
3413 * fulfilled.
3414 *
3415 * Since it is difficult to be certain that the above
3416 * analysis is complete, we retain the test and merely
3417 * remove the code from coverage tests.
3418 */
3419 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3420 }
Fred Drake08317ae2003-10-21 15:38:55 +00003421 b = id->prefix->binding;
3422 if (!b)
3423 return XML_ERROR_UNBOUND_PREFIX;
3424
Fred Drake08317ae2003-10-21 15:38:55 +00003425 for (j = 0; j < b->uriLen; j++) {
3426 const XML_Char c = b->uri[j];
Benjamin Peterson4e211002018-06-26 19:25:45 -07003427 if (!poolAppendChar(&parser->m_tempPool, c))
Fred Drake08317ae2003-10-21 15:38:55 +00003428 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003429 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003430
3431 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3432
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003433 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003434 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003435
3436 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3437
Fred Drake08317ae2003-10-21 15:38:55 +00003438 do { /* copies null terminator */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003439 if (!poolAppendChar(&parser->m_tempPool, *s))
Fred Drake08317ae2003-10-21 15:38:55 +00003440 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003441 } while (*s++);
3442
Victor Stinner5ff71322017-06-21 14:39:22 +02003443 uriHash = (unsigned long)sip24_final(&sip_state);
3444
Fred Drake08317ae2003-10-21 15:38:55 +00003445 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003446 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003447 */
3448 unsigned char step = 0;
3449 unsigned long mask = nsAttsSize - 1;
3450 j = uriHash & mask; /* index into hash table */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003451 while (parser->m_nsAtts[j].version == version) {
Fred Drake08317ae2003-10-21 15:38:55 +00003452 /* for speed we compare stored hash values first */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003453 if (uriHash == parser->m_nsAtts[j].hash) {
3454 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3455 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
Fred Drake08317ae2003-10-21 15:38:55 +00003456 /* s1 is null terminated, but not s2 */
3457 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3458 if (*s1 == 0)
3459 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3460 }
3461 if (!step)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003462 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003463 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003464 }
Fred Drake08317ae2003-10-21 15:38:55 +00003465 }
3466
Benjamin Peterson4e211002018-06-26 19:25:45 -07003467 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3468 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
Fred Drake08317ae2003-10-21 15:38:55 +00003469 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003470 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003471 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003472 return XML_ERROR_NO_MEMORY;
3473 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003474 }
Fred Drake08317ae2003-10-21 15:38:55 +00003475
3476 /* store expanded name in attribute list */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003477 s = poolStart(&parser->m_tempPool);
3478 poolFinish(&parser->m_tempPool);
Fred Drake08317ae2003-10-21 15:38:55 +00003479 appAtts[i] = s;
3480
3481 /* fill empty slot with new version, uriName and hash value */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003482 parser->m_nsAtts[j].version = version;
3483 parser->m_nsAtts[j].hash = uriHash;
3484 parser->m_nsAtts[j].uriName = s;
Fred Drake08317ae2003-10-21 15:38:55 +00003485
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003486 if (!--nPrefixes) {
3487 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003488 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003489 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003490 }
Fred Drake08317ae2003-10-21 15:38:55 +00003491 else /* not prefixed */
3492 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003493 }
3494 }
Fred Drake08317ae2003-10-21 15:38:55 +00003495 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003496 for (; i < attIndex; i += 2)
3497 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003498 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3499 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003500
Benjamin Peterson4e211002018-06-26 19:25:45 -07003501 if (!parser->m_ns)
Fred Drake08317ae2003-10-21 15:38:55 +00003502 return XML_ERROR_NONE;
3503
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003504 /* expand the element type name */
3505 if (elementType->prefix) {
3506 binding = elementType->prefix->binding;
3507 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003508 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003509 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003510 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003511 ;
3512 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003513 else if (dtd->defaultPrefix.binding) {
3514 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003515 localPart = tagNamePtr->str;
3516 }
3517 else
3518 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003519 prefixLen = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003520 if (parser->m_ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003521 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003522 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003523 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003524 tagNamePtr->localPart = localPart;
3525 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003526 tagNamePtr->prefix = binding->prefix->name;
3527 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003528 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003529 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003530 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003531 if (n > binding->uriAlloc) {
3532 TAG *p;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003533 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003534 if (!uri)
3535 return XML_ERROR_NO_MEMORY;
3536 binding->uriAlloc = n + EXPAND_SPARE;
3537 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003538 for (p = parser->m_tagStack; p; p = p->parent)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003539 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003540 p->name.str = uri;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003541 FREE(parser, binding->uri);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003542 binding->uri = uri;
3543 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003544 /* if m_namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003545 uri = binding->uri + binding->uriLen;
3546 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003547 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003548 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003549 uri += i - 1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003550 *uri = parser->m_namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003551 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3552 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003553 tagNamePtr->str = binding->uri;
3554 return XML_ERROR_NONE;
3555}
3556
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003557/* addBinding() overwrites the value of prefix->binding without checking.
3558 Therefore one must keep track of the old value outside of addBinding().
3559*/
3560static enum XML_Error
3561addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3562 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003563{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003564 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003565 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3566 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3567 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3568 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3569 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3570 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003571 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003572 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003573 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3574 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003575 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3576 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3577 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3578 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3579 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003580 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003581 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003582 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3583
3584 XML_Bool mustBeXML = XML_FALSE;
3585 XML_Bool isXML = XML_TRUE;
3586 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003587
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003588 BINDING *b;
3589 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003590
Fred Drake31d485c2004-08-03 07:06:22 +00003591 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003592 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003593 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003594
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003595 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003596 && prefix->name[0] == XML_T(ASCII_x)
3597 && prefix->name[1] == XML_T(ASCII_m)
3598 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003599
3600 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003601 if (prefix->name[3] == XML_T(ASCII_n)
3602 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003603 && prefix->name[5] == XML_T('\0'))
3604 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3605
3606 if (prefix->name[3] == XML_T('\0'))
3607 mustBeXML = XML_TRUE;
3608 }
3609
3610 for (len = 0; uri[len]; len++) {
3611 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3612 isXML = XML_FALSE;
3613
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003614 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003615 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3616 isXMLNS = XML_FALSE;
3617 }
3618 isXML = isXML && len == xmlLen;
3619 isXMLNS = isXMLNS && len == xmlnsLen;
3620
3621 if (mustBeXML != isXML)
3622 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3623 : XML_ERROR_RESERVED_NAMESPACE_URI;
3624
3625 if (isXMLNS)
3626 return XML_ERROR_RESERVED_NAMESPACE_URI;
3627
Benjamin Peterson4e211002018-06-26 19:25:45 -07003628 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003629 len++;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003630 if (parser->m_freeBindingList) {
3631 b = parser->m_freeBindingList;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003632 if (len > b->uriAlloc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003633 XML_Char *temp = (XML_Char *)REALLOC(parser, b->uri,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003634 sizeof(XML_Char) * (len + EXPAND_SPARE));
3635 if (temp == NULL)
3636 return XML_ERROR_NO_MEMORY;
3637 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003638 b->uriAlloc = len + EXPAND_SPARE;
3639 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003640 parser->m_freeBindingList = b->nextTagBinding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003641 }
3642 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003643 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003644 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003645 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003646 b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003647 if (!b->uri) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003648 FREE(parser, b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003649 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003650 }
3651 b->uriAlloc = len + EXPAND_SPARE;
3652 }
3653 b->uriLen = len;
3654 memcpy(b->uri, uri, len * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003655 if (parser->m_namespaceSeparator)
3656 b->uri[len - 1] = parser->m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003657 b->prefix = prefix;
3658 b->attId = attId;
3659 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003660 /* NULL binding when default namespace undeclared */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003661 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003662 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003663 else
3664 prefix->binding = b;
3665 b->nextTagBinding = *bindingsPtr;
3666 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003667 /* if attId == NULL then we are not starting a namespace scope */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003668 if (attId && parser->m_startNamespaceDeclHandler)
3669 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003670 prefix->binding ? uri : 0);
3671 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003672}
3673
3674/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003675 the whole file is parsed with one call.
3676*/
3677static enum XML_Error PTRCALL
3678cdataSectionProcessor(XML_Parser parser,
3679 const char *start,
3680 const char *end,
3681 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003682{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003683 enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end,
3684 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003685 if (result != XML_ERROR_NONE)
3686 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003687 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003688 if (parser->m_parentParser) { /* we are parsing an external entity */
3689 parser->m_processor = externalEntityContentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003690 return externalEntityContentProcessor(parser, start, end, endPtr);
3691 }
3692 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003693 parser->m_processor = contentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003694 return contentProcessor(parser, start, end, endPtr);
3695 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003696 }
3697 return result;
3698}
3699
Fred Drake31d485c2004-08-03 07:06:22 +00003700/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003701 the section is not yet closed.
3702*/
3703static enum XML_Error
3704doCdataSection(XML_Parser parser,
3705 const ENCODING *enc,
3706 const char **startPtr,
3707 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003708 const char **nextPtr,
3709 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003710{
3711 const char *s = *startPtr;
3712 const char **eventPP;
3713 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003714 if (enc == parser->m_encoding) {
3715 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003716 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003717 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003718 }
3719 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003720 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3721 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003722 }
3723 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003724 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003725
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003726 for (;;) {
3727 const char *next;
3728 int tok = XmlCdataSectionTok(enc, s, end, &next);
3729 *eventEndPP = next;
3730 switch (tok) {
3731 case XML_TOK_CDATA_SECT_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003732 if (parser->m_endCdataSectionHandler)
3733 parser->m_endCdataSectionHandler(parser->m_handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003734#if 0
3735 /* see comment under XML_TOK_CDATA_SECT_OPEN */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003736 else if (parser->m_characterDataHandler)
3737 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003738#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003739 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003740 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003741 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003742 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003743 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003744 return XML_ERROR_ABORTED;
3745 else
3746 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003747 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003748 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003749 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003750 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003751 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003752 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003753 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003754 break;
3755 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003756 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003757 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003758 if (charDataHandler) {
3759 if (MUST_CONVERT(enc, s)) {
3760 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003761 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3762 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003763 *eventEndPP = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003764 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3765 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003766 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003767 break;
3768 *eventPP = s;
3769 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003770 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003771 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003772 charDataHandler(parser->m_handlerArg,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003773 (XML_Char *)s,
3774 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003775 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003776 else if (parser->m_defaultHandler)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003777 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003778 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003779 break;
3780 case XML_TOK_INVALID:
3781 *eventPP = next;
3782 return XML_ERROR_INVALID_TOKEN;
3783 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003784 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003785 *nextPtr = s;
3786 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003787 }
3788 return XML_ERROR_PARTIAL_CHAR;
3789 case XML_TOK_PARTIAL:
3790 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003791 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003792 *nextPtr = s;
3793 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003794 }
3795 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3796 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003797 /* Every token returned by XmlCdataSectionTok() has its own
3798 * explicit case, so this default case will never be executed.
3799 * We retain it as a safety net and exclude it from the coverage
3800 * statistics.
3801 *
3802 * LCOV_EXCL_START
3803 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003804 *eventPP = next;
3805 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003806 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003807 }
Fred Drake31d485c2004-08-03 07:06:22 +00003808
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003809 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003810 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003811 case XML_SUSPENDED:
3812 *nextPtr = next;
3813 return XML_ERROR_NONE;
3814 case XML_FINISHED:
3815 return XML_ERROR_ABORTED;
3816 default: ;
3817 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003818 }
3819 /* not reached */
3820}
3821
3822#ifdef XML_DTD
3823
3824/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003825 the whole file is parsed with one call.
3826*/
3827static enum XML_Error PTRCALL
3828ignoreSectionProcessor(XML_Parser parser,
3829 const char *start,
3830 const char *end,
3831 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003832{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003833 enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end,
3834 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003835 if (result != XML_ERROR_NONE)
3836 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003837 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003838 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003839 return prologProcessor(parser, start, end, endPtr);
3840 }
3841 return result;
3842}
3843
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003844/* startPtr gets set to non-null is the section is closed, and to null
3845 if the section is not yet closed.
3846*/
3847static enum XML_Error
3848doIgnoreSection(XML_Parser parser,
3849 const ENCODING *enc,
3850 const char **startPtr,
3851 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003852 const char **nextPtr,
3853 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003854{
3855 const char *next;
3856 int tok;
3857 const char *s = *startPtr;
3858 const char **eventPP;
3859 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003860 if (enc == parser->m_encoding) {
3861 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003862 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003863 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003864 }
3865 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003866 /* It's not entirely clear, but it seems the following two lines
3867 * of code cannot be executed. The only occasions on which 'enc'
Benjamin Peterson4e211002018-06-26 19:25:45 -07003868 * is not 'encoding' are when this function is called
Victor Stinner93d0cb52017-08-18 23:43:54 +02003869 * from the internal entity processing, and IGNORE sections are an
3870 * error in internal entities.
3871 *
3872 * Since it really isn't clear that this is true, we keep the code
3873 * and just remove it from our coverage tests.
3874 *
3875 * LCOV_EXCL_START
3876 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003877 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3878 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003879 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003880 }
3881 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003882 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003883 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3884 *eventEndPP = next;
3885 switch (tok) {
3886 case XML_TOK_IGNORE_SECT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003887 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003888 reportDefault(parser, enc, s, next);
3889 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003890 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003891 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003892 return XML_ERROR_ABORTED;
3893 else
3894 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003895 case XML_TOK_INVALID:
3896 *eventPP = next;
3897 return XML_ERROR_INVALID_TOKEN;
3898 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003899 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003900 *nextPtr = s;
3901 return XML_ERROR_NONE;
3902 }
3903 return XML_ERROR_PARTIAL_CHAR;
3904 case XML_TOK_PARTIAL:
3905 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003906 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003907 *nextPtr = s;
3908 return XML_ERROR_NONE;
3909 }
3910 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3911 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003912 /* All of the tokens that XmlIgnoreSectionTok() returns have
3913 * explicit cases to handle them, so this default case is never
3914 * executed. We keep it as a safety net anyway, and remove it
3915 * from our test coverage statistics.
3916 *
3917 * LCOV_EXCL_START
3918 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003919 *eventPP = next;
3920 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003921 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003922 }
3923 /* not reached */
3924}
3925
3926#endif /* XML_DTD */
3927
3928static enum XML_Error
3929initializeEncoding(XML_Parser parser)
3930{
3931 const char *s;
3932#ifdef XML_UNICODE
3933 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003934 /* See comments abount `protoclEncodingName` in parserInit() */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003935 if (!parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003936 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003937 else {
3938 int i;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003939 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003940 if (i == sizeof(encodingBuf) - 1
Benjamin Peterson4e211002018-06-26 19:25:45 -07003941 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003942 encodingBuf[0] = '\0';
3943 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003944 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003945 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003946 }
3947 encodingBuf[i] = '\0';
3948 s = encodingBuf;
3949 }
3950#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003951 s = parser->m_protocolEncodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003952#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003953 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003954 return XML_ERROR_NONE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003955 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003956}
3957
3958static enum XML_Error
3959processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003960 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003961{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003962 const char *encodingName = NULL;
3963 const XML_Char *storedEncName = NULL;
3964 const ENCODING *newEncoding = NULL;
3965 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003966 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003967 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003968 int standalone = -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003969 if (!(parser->m_ns
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003970 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003971 : XmlParseXmlDecl)(isGeneralTextEntity,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003972 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003973 s,
3974 next,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003975 &parser->m_eventPtr,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003976 &version,
3977 &versionend,
3978 &encodingName,
3979 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003980 &standalone)) {
3981 if (isGeneralTextEntity)
3982 return XML_ERROR_TEXT_DECL;
3983 else
3984 return XML_ERROR_XML_DECL;
3985 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003986 if (!isGeneralTextEntity && standalone == 1) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003987 parser->m_dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003988#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07003989 if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3990 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003991#endif /* XML_DTD */
3992 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003993 if (parser->m_xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003994 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003995 storedEncName = poolStoreString(&parser->m_temp2Pool,
3996 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003997 encodingName,
3998 encodingName
Benjamin Peterson4e211002018-06-26 19:25:45 -07003999 + XmlNameLength(parser->m_encoding, encodingName));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004000 if (!storedEncName)
4001 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004002 poolFinish(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004003 }
4004 if (version) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004005 storedversion = poolStoreString(&parser->m_temp2Pool,
4006 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004007 version,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004008 versionend - parser->m_encoding->minBytesPerChar);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004009 if (!storedversion)
4010 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004011 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004012 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004013 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004014 else if (parser->m_defaultHandler)
4015 reportDefault(parser, parser->m_encoding, s, next);
4016 if (parser->m_protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004017 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004018 /* Check that the specified encoding does not conflict with what
4019 * the parser has already deduced. Do we have the same number
4020 * of bytes in the smallest representation of a character? If
4021 * this is UTF-16, is it the same endianness?
4022 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004023 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
Victor Stinner93d0cb52017-08-18 23:43:54 +02004024 || (newEncoding->minBytesPerChar == 2 &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004025 newEncoding != parser->m_encoding)) {
4026 parser->m_eventPtr = encodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004027 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004028 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004029 parser->m_encoding = newEncoding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004030 }
4031 else if (encodingName) {
4032 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004033 if (!storedEncName) {
4034 storedEncName = poolStoreString(
Benjamin Peterson4e211002018-06-26 19:25:45 -07004035 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4036 encodingName + XmlNameLength(parser->m_encoding, encodingName));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004037 if (!storedEncName)
4038 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004039 }
4040 result = handleUnknownEncoding(parser, storedEncName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004041 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004042 if (result == XML_ERROR_UNKNOWN_ENCODING)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004043 parser->m_eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004044 return result;
4045 }
4046 }
4047
4048 if (storedEncName || storedversion)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004049 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004050
4051 return XML_ERROR_NONE;
4052}
4053
4054static enum XML_Error
4055handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4056{
Benjamin Peterson4e211002018-06-26 19:25:45 -07004057 if (parser->m_unknownEncodingHandler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004058 XML_Encoding info;
4059 int i;
4060 for (i = 0; i < 256; i++)
4061 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004062 info.convert = NULL;
4063 info.data = NULL;
4064 info.release = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004065 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004066 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004067 ENCODING *enc;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004068 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4069 if (!parser->m_unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004070 if (info.release)
4071 info.release(info.data);
4072 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004073 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004074 enc = (parser->m_ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004075 ? XmlInitUnknownEncodingNS
Benjamin Peterson4e211002018-06-26 19:25:45 -07004076 : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004077 info.map,
4078 info.convert,
4079 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004080 if (enc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004081 parser->m_unknownEncodingData = info.data;
4082 parser->m_unknownEncodingRelease = info.release;
4083 parser->m_encoding = enc;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004084 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004085 }
4086 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004087 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004088 info.release(info.data);
4089 }
4090 return XML_ERROR_UNKNOWN_ENCODING;
4091}
4092
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004093static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004094prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004095 const char *s,
4096 const char *end,
4097 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004098{
4099 enum XML_Error result = initializeEncoding(parser);
4100 if (result != XML_ERROR_NONE)
4101 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004102 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004103 return prologProcessor(parser, s, end, nextPtr);
4104}
4105
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004106#ifdef XML_DTD
4107
4108static enum XML_Error PTRCALL
4109externalParEntInitProcessor(XML_Parser parser,
4110 const char *s,
4111 const char *end,
4112 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004113{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004114 enum XML_Error result = initializeEncoding(parser);
4115 if (result != XML_ERROR_NONE)
4116 return result;
4117
4118 /* we know now that XML_Parse(Buffer) has been called,
4119 so we consider the external parameter entity read */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004120 parser->m_dtd->paramEntityRead = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004121
Benjamin Peterson4e211002018-06-26 19:25:45 -07004122 if (parser->m_prologState.inEntityValue) {
4123 parser->m_processor = entityValueInitProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004124 return entityValueInitProcessor(parser, s, end, nextPtr);
4125 }
4126 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004127 parser->m_processor = externalParEntProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004128 return externalParEntProcessor(parser, s, end, nextPtr);
4129 }
4130}
4131
4132static enum XML_Error PTRCALL
4133entityValueInitProcessor(XML_Parser parser,
4134 const char *s,
4135 const char *end,
4136 const char **nextPtr)
4137{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004138 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00004139 const char *start = s;
4140 const char *next = start;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004141 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004142
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004143 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004144 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4145 parser->m_eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004146 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004147 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00004148 *nextPtr = s;
4149 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004150 }
4151 switch (tok) {
4152 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004153 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004154 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004155 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004156 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004157 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004158 case XML_TOK_NONE: /* start == end */
4159 default:
4160 break;
4161 }
Fred Drake31d485c2004-08-03 07:06:22 +00004162 /* found end of entity value - can store it now */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004163 return storeEntityValue(parser, parser->m_encoding, s, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004164 }
4165 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00004166 enum XML_Error result;
4167 result = processXmlDecl(parser, 0, start, next);
4168 if (result != XML_ERROR_NONE)
4169 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004170 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For that
Victor Stinner93d0cb52017-08-18 23:43:54 +02004171 * to happen, a parameter entity parsing handler must have
4172 * attempted to suspend the parser, which fails and raises an
4173 * error. The parser can be aborted, but can't be suspended.
4174 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004175 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004176 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004177 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004178 /* stop scanning for text declaration - we found one */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004179 parser->m_processor = entityValueProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004180 return entityValueProcessor(parser, next, end, nextPtr);
4181 }
4182 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4183 return XML_TOK_NONE on the next call, which would then cause the
4184 function to exit with *nextPtr set to s - that is what we want for other
4185 tokens, but not for the BOM - we would rather like to skip it;
4186 then, when this routine is entered the next time, XmlPrologTok will
4187 return XML_TOK_INVALID, since the BOM is still in the buffer
4188 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004189 else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004190 *nextPtr = next;
4191 return XML_ERROR_NONE;
4192 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004193 /* If we get this token, we have the start of what might be a
4194 normal tag, but not a declaration (i.e. it doesn't begin with
4195 "<!"). In a DTD context, that isn't legal.
4196 */
4197 else if (tok == XML_TOK_INSTANCE_START) {
4198 *nextPtr = next;
4199 return XML_ERROR_SYNTAX;
4200 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004201 start = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004202 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004203 }
4204}
4205
4206static enum XML_Error PTRCALL
4207externalParEntProcessor(XML_Parser parser,
4208 const char *s,
4209 const char *end,
4210 const char **nextPtr)
4211{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004212 const char *next = s;
4213 int tok;
4214
Benjamin Peterson4e211002018-06-26 19:25:45 -07004215 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004216 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004217 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004218 *nextPtr = s;
4219 return XML_ERROR_NONE;
4220 }
4221 switch (tok) {
4222 case XML_TOK_INVALID:
4223 return XML_ERROR_INVALID_TOKEN;
4224 case XML_TOK_PARTIAL:
4225 return XML_ERROR_UNCLOSED_TOKEN;
4226 case XML_TOK_PARTIAL_CHAR:
4227 return XML_ERROR_PARTIAL_CHAR;
4228 case XML_TOK_NONE: /* start == end */
4229 default:
4230 break;
4231 }
4232 }
4233 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4234 However, when parsing an external subset, doProlog will not accept a BOM
4235 as valid, and report a syntax error, so we have to skip the BOM
4236 */
4237 else if (tok == XML_TOK_BOM) {
4238 s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004239 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004240 }
4241
Benjamin Peterson4e211002018-06-26 19:25:45 -07004242 parser->m_processor = prologProcessor;
4243 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4244 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004245}
4246
4247static enum XML_Error PTRCALL
4248entityValueProcessor(XML_Parser parser,
4249 const char *s,
4250 const char *end,
4251 const char **nextPtr)
4252{
4253 const char *start = s;
4254 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004255 const ENCODING *enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004256 int tok;
4257
4258 for (;;) {
4259 tok = XmlPrologTok(enc, start, end, &next);
4260 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004261 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004262 *nextPtr = s;
4263 return XML_ERROR_NONE;
4264 }
4265 switch (tok) {
4266 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004267 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004268 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004269 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004270 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004271 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004272 case XML_TOK_NONE: /* start == end */
4273 default:
4274 break;
4275 }
Fred Drake31d485c2004-08-03 07:06:22 +00004276 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004277 return storeEntityValue(parser, enc, s, end);
4278 }
4279 start = next;
4280 }
4281}
4282
4283#endif /* XML_DTD */
4284
4285static enum XML_Error PTRCALL
4286prologProcessor(XML_Parser parser,
4287 const char *s,
4288 const char *end,
4289 const char **nextPtr)
4290{
4291 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004292 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4293 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4294 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004295}
4296
4297static enum XML_Error
4298doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004299 const ENCODING *enc,
4300 const char *s,
4301 const char *end,
4302 int tok,
4303 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00004304 const char **nextPtr,
4305 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004306{
4307#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004308 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004309#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004310 static const XML_Char atypeCDATA[] =
4311 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4312 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4313 static const XML_Char atypeIDREF[] =
4314 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4315 static const XML_Char atypeIDREFS[] =
4316 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4317 static const XML_Char atypeENTITY[] =
4318 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4319 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4320 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004321 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004322 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4323 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4324 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4325 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4326 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4327 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4328 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004329
Fred Drake31d485c2004-08-03 07:06:22 +00004330 /* save one level of indirection */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004331 DTD * const dtd = parser->m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004332
4333 const char **eventPP;
4334 const char **eventEndPP;
4335 enum XML_Content_Quant quant;
4336
Benjamin Peterson4e211002018-06-26 19:25:45 -07004337 if (enc == parser->m_encoding) {
4338 eventPP = &parser->m_eventPtr;
4339 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004340 }
4341 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004342 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4343 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004344 }
Fred Drake31d485c2004-08-03 07:06:22 +00004345
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004346 for (;;) {
4347 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004348 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004349 *eventPP = s;
4350 *eventEndPP = next;
4351 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004352 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004353 *nextPtr = s;
4354 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004355 }
4356 switch (tok) {
4357 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004358 *eventPP = next;
4359 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004360 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004361 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004362 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004363 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004364 case -XML_TOK_PROLOG_S:
4365 tok = -tok;
4366 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004367 case XML_TOK_NONE:
4368#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004369 /* for internal PE NOT referenced between declarations */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004370 if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00004371 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004372 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004373 }
4374 /* WFC: PE Between Declarations - must check that PE contains
4375 complete markup, not only for external PEs, but also for
4376 internal PEs if the reference occurs between declarations.
4377 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004378 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4379 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004380 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004381 return XML_ERROR_INCOMPLETE_PE;
4382 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004383 return XML_ERROR_NONE;
4384 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004385#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004386 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004387 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004388 tok = -tok;
4389 next = end;
4390 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004391 }
4392 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004393 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004394 switch (role) {
4395 case XML_ROLE_XML_DECL:
4396 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004397 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4398 if (result != XML_ERROR_NONE)
4399 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004400 enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004401 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004402 }
4403 break;
4404 case XML_ROLE_DOCTYPE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004405 if (parser->m_startDoctypeDeclHandler) {
4406 parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next);
4407 if (!parser->m_doctypeName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004408 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004409 poolFinish(&parser->m_tempPool);
4410 parser->m_doctypePubid = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004411 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004412 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004413 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004414 break;
4415 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004416 if (parser->m_startDoctypeDeclHandler) {
4417 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4418 parser->m_doctypePubid, 1);
4419 parser->m_doctypeName = NULL;
4420 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004421 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004422 }
4423 break;
4424#ifdef XML_DTD
4425 case XML_ROLE_TEXT_DECL:
4426 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004427 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4428 if (result != XML_ERROR_NONE)
4429 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004430 enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004431 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004432 }
4433 break;
4434#endif /* XML_DTD */
4435 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004436#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004437 parser->m_useForeignDTD = XML_FALSE;
4438 parser->m_declEntity = (ENTITY *)lookup(parser,
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004439 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004440 externalSubsetName,
4441 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004442 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004443 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004444#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004445 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004446 if (parser->m_startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004447 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004448 if (!XmlIsPublicId(enc, s, next, eventPP))
4449 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004450 pubId = poolStoreString(&parser->m_tempPool, enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004451 s + enc->minBytesPerChar,
4452 next - enc->minBytesPerChar);
4453 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004454 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004455 normalizePublicId(pubId);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004456 poolFinish(&parser->m_tempPool);
4457 parser->m_doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004458 handleDefault = XML_FALSE;
4459 goto alreadyChecked;
4460 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004461 /* fall through */
4462 case XML_ROLE_ENTITY_PUBLIC_ID:
4463 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004464 return XML_ERROR_PUBLICID;
4465 alreadyChecked:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004466 if (dtd->keepProcessing && parser->m_declEntity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004467 XML_Char *tem = poolStoreString(&dtd->pool,
4468 enc,
4469 s + enc->minBytesPerChar,
4470 next - enc->minBytesPerChar);
4471 if (!tem)
4472 return XML_ERROR_NO_MEMORY;
4473 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004474 parser->m_declEntity->publicId = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004475 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004476 /* Don't suppress the default handler if we fell through from
4477 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4478 */
4479 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004480 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004481 }
4482 break;
4483 case XML_ROLE_DOCTYPE_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004484 if (parser->m_doctypeName) {
4485 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName,
4486 parser->m_doctypeSysid, parser->m_doctypePubid, 0);
4487 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004488 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004489 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004490 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4491 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004492 was not set, indicating an external subset
4493 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004494#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004495 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004496 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4497 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004498 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004499 ENTITY *entity = (ENTITY *)lookup(parser,
4500 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004501 externalSubsetName,
4502 sizeof(ENTITY));
Victor Stinner93d0cb52017-08-18 23:43:54 +02004503 if (!entity) {
4504 /* The external subset name "#" will have already been
4505 * inserted into the hash table at the start of the
4506 * external entity parsing, so no allocation will happen
4507 * and lookup() cannot fail.
4508 */
4509 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4510 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004511 if (parser->m_useForeignDTD)
4512 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004513 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004514 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004515 0,
4516 entity->base,
4517 entity->systemId,
4518 entity->publicId))
4519 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004520 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004521 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004522 parser->m_notStandaloneHandler &&
4523 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004524 return XML_ERROR_NOT_STANDALONE;
4525 }
4526 /* if we didn't read the foreign DTD then this means that there
4527 is no external subset and we must reset dtd->hasParamEntityRefs
4528 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004529 else if (!parser->m_doctypeSysid)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004530 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004531 /* end of DTD - no need to update dtd->keepProcessing */
4532 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004533 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004534 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004535#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004536 if (parser->m_endDoctypeDeclHandler) {
4537 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004538 handleDefault = XML_FALSE;
4539 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004540 break;
4541 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004542#ifdef XML_DTD
4543 /* if there is no DOCTYPE declaration then now is the
4544 last chance to read the foreign DTD
4545 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004546 if (parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004547 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004548 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004549 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004550 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004551 externalSubsetName,
4552 sizeof(ENTITY));
4553 if (!entity)
4554 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004555 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004556 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004557 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004558 0,
4559 entity->base,
4560 entity->systemId,
4561 entity->publicId))
4562 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004563 if (dtd->paramEntityRead) {
4564 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004565 parser->m_notStandaloneHandler &&
4566 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004567 return XML_ERROR_NOT_STANDALONE;
4568 }
4569 /* if we didn't read the foreign DTD then this means that there
4570 is no external subset and we must reset dtd->hasParamEntityRefs
4571 */
4572 else
4573 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004574 /* end of DTD - no need to update dtd->keepProcessing */
4575 }
4576 }
4577#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004578 parser->m_processor = contentProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004579 return contentProcessor(parser, s, end, nextPtr);
4580 case XML_ROLE_ATTLIST_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004581 parser->m_declElementType = getElementType(parser, enc, s, next);
4582 if (!parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004583 return XML_ERROR_NO_MEMORY;
4584 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004585 case XML_ROLE_ATTRIBUTE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004586 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4587 if (!parser->m_declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004588 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004589 parser->m_declAttributeIsCdata = XML_FALSE;
4590 parser->m_declAttributeType = NULL;
4591 parser->m_declAttributeIsId = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004592 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004593 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004594 parser->m_declAttributeIsCdata = XML_TRUE;
4595 parser->m_declAttributeType = atypeCDATA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004596 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004597 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004598 parser->m_declAttributeIsId = XML_TRUE;
4599 parser->m_declAttributeType = atypeID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004600 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004601 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004602 parser->m_declAttributeType = atypeIDREF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004603 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004604 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004605 parser->m_declAttributeType = atypeIDREFS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004606 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004607 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004608 parser->m_declAttributeType = atypeENTITY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004609 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004610 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004611 parser->m_declAttributeType = atypeENTITIES;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004612 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004613 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004614 parser->m_declAttributeType = atypeNMTOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004615 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004616 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004617 parser->m_declAttributeType = atypeNMTOKENS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004618 checkAttListDeclHandler:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004619 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004620 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004621 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004622 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4623 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004624 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004625 const XML_Char *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004626 if (parser->m_declAttributeType) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004627 prefix = enumValueSep;
4628 }
4629 else {
4630 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4631 ? notationPrefix
4632 : enumValueStart);
4633 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004634 if (!poolAppendString(&parser->m_tempPool, prefix))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004635 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004636 if (!poolAppend(&parser->m_tempPool, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004637 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004638 parser->m_declAttributeType = parser->m_tempPool.start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004639 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004640 }
4641 break;
4642 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4643 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004644 if (dtd->keepProcessing) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004645 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4646 parser->m_declAttributeIsCdata, parser->m_declAttributeIsId,
Fred Drake08317ae2003-10-21 15:38:55 +00004647 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004648 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004649 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4650 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4651 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4652 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004653 /* Enumerated or Notation type */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004654 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4655 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004656 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004657 parser->m_declAttributeType = parser->m_tempPool.start;
4658 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004659 }
4660 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004661 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4662 parser->m_declAttributeId->name, parser->m_declAttributeType,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004663 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004664 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004665 handleDefault = XML_FALSE;
4666 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004667 }
4668 break;
4669 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4670 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004671 if (dtd->keepProcessing) {
4672 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004673 enum XML_Error result =
Benjamin Peterson4e211002018-06-26 19:25:45 -07004674 storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata,
Fred Drake08317ae2003-10-21 15:38:55 +00004675 s + enc->minBytesPerChar,
4676 next - enc->minBytesPerChar,
4677 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004678 if (result)
4679 return result;
4680 attVal = poolStart(&dtd->pool);
4681 poolFinish(&dtd->pool);
4682 /* ID attributes aren't allowed to have a default */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004683 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4684 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004685 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004686 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4687 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4688 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4689 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004690 /* Enumerated or Notation type */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004691 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4692 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004693 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004694 parser->m_declAttributeType = parser->m_tempPool.start;
4695 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004696 }
4697 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004698 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4699 parser->m_declAttributeId->name, parser->m_declAttributeType,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004700 attVal,
4701 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004702 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004703 handleDefault = XML_FALSE;
4704 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004705 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004706 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004707 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004708 if (dtd->keepProcessing) {
4709 enum XML_Error result = storeEntityValue(parser, enc,
4710 s + enc->minBytesPerChar,
4711 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004712 if (parser->m_declEntity) {
4713 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4714 parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004715 poolFinish(&dtd->entityValuePool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004716 if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004717 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004718 parser->m_entityDeclHandler(parser->m_handlerArg,
4719 parser->m_declEntity->name,
4720 parser->m_declEntity->is_param,
4721 parser->m_declEntity->textPtr,
4722 parser->m_declEntity->textLen,
4723 parser->m_curBase, 0, 0, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004724 handleDefault = XML_FALSE;
4725 }
4726 }
4727 else
4728 poolDiscard(&dtd->entityValuePool);
4729 if (result != XML_ERROR_NONE)
4730 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004731 }
4732 break;
4733 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004734#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004735 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004736#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004737 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004738 if (parser->m_startDoctypeDeclHandler) {
4739 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004740 s + enc->minBytesPerChar,
4741 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004742 if (parser->m_doctypeSysid == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004743 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004744 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004745 handleDefault = XML_FALSE;
4746 }
4747#ifdef XML_DTD
4748 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07004749 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4750 for the case where no parser->m_startDoctypeDeclHandler is set */
4751 parser->m_doctypeSysid = externalSubsetName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004752#endif /* XML_DTD */
4753 if (!dtd->standalone
4754#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004755 && !parser->m_paramEntityParsing
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004756#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004757 && parser->m_notStandaloneHandler
4758 && !parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004759 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004760#ifndef XML_DTD
4761 break;
4762#else /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004763 if (!parser->m_declEntity) {
4764 parser->m_declEntity = (ENTITY *)lookup(parser,
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004765 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004766 externalSubsetName,
4767 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004768 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004769 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004770 parser->m_declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004771 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004772#endif /* XML_DTD */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07004773 /* fall through */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004774 case XML_ROLE_ENTITY_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004775 if (dtd->keepProcessing && parser->m_declEntity) {
4776 parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004777 s + enc->minBytesPerChar,
4778 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004779 if (!parser->m_declEntity->systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004780 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004781 parser->m_declEntity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004782 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004783 /* Don't suppress the default handler if we fell through from
4784 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4785 */
4786 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004787 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004788 }
4789 break;
4790 case XML_ROLE_ENTITY_COMPLETE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004791 if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004792 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004793 parser->m_entityDeclHandler(parser->m_handlerArg,
4794 parser->m_declEntity->name,
4795 parser->m_declEntity->is_param,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004796 0,0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004797 parser->m_declEntity->base,
4798 parser->m_declEntity->systemId,
4799 parser->m_declEntity->publicId,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004800 0);
4801 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004802 }
4803 break;
4804 case XML_ROLE_ENTITY_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004805 if (dtd->keepProcessing && parser->m_declEntity) {
4806 parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4807 if (!parser->m_declEntity->notation)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004808 return XML_ERROR_NO_MEMORY;
4809 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004810 if (parser->m_unparsedEntityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004811 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004812 parser->m_unparsedEntityDeclHandler(parser->m_handlerArg,
4813 parser->m_declEntity->name,
4814 parser->m_declEntity->base,
4815 parser->m_declEntity->systemId,
4816 parser->m_declEntity->publicId,
4817 parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004818 handleDefault = XML_FALSE;
4819 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004820 else if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004821 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004822 parser->m_entityDeclHandler(parser->m_handlerArg,
4823 parser->m_declEntity->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004824 0,0,0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004825 parser->m_declEntity->base,
4826 parser->m_declEntity->systemId,
4827 parser->m_declEntity->publicId,
4828 parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004829 handleDefault = XML_FALSE;
4830 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004831 }
4832 break;
4833 case XML_ROLE_GENERAL_ENTITY_NAME:
4834 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004835 if (XmlPredefinedEntityName(enc, s, next)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004836 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004837 break;
4838 }
4839 if (dtd->keepProcessing) {
4840 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4841 if (!name)
4842 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004843 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004844 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004845 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004846 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004847 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004848 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004849 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004850 }
4851 else {
4852 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004853 parser->m_declEntity->publicId = NULL;
4854 parser->m_declEntity->is_param = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004855 /* if we have a parent parser or are reading an internal parameter
4856 entity, then the entity declaration is not considered "internal"
4857 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004858 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4859 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004860 handleDefault = XML_FALSE;
4861 }
4862 }
4863 else {
4864 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004865 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004866 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004867 }
4868 break;
4869 case XML_ROLE_PARAM_ENTITY_NAME:
4870#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004871 if (dtd->keepProcessing) {
4872 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4873 if (!name)
4874 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004875 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004876 name, sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004877 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004878 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004879 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004880 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004881 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004882 }
4883 else {
4884 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004885 parser->m_declEntity->publicId = NULL;
4886 parser->m_declEntity->is_param = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004887 /* if we have a parent parser or are reading an internal parameter
4888 entity, then the entity declaration is not considered "internal"
4889 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004890 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4891 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004892 handleDefault = XML_FALSE;
4893 }
4894 }
4895 else {
4896 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004897 parser->m_declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004898 }
4899#else /* not XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004900 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004901#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004902 break;
4903 case XML_ROLE_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004904 parser->m_declNotationPublicId = NULL;
4905 parser->m_declNotationName = NULL;
4906 if (parser->m_notationDeclHandler) {
4907 parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next);
4908 if (!parser->m_declNotationName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004909 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004910 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004911 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004912 }
4913 break;
4914 case XML_ROLE_NOTATION_PUBLIC_ID:
4915 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004916 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004917 if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4918 XML_Char *tem = poolStoreString(&parser->m_tempPool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004919 enc,
4920 s + enc->minBytesPerChar,
4921 next - enc->minBytesPerChar);
4922 if (!tem)
4923 return XML_ERROR_NO_MEMORY;
4924 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004925 parser->m_declNotationPublicId = tem;
4926 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004927 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004928 }
4929 break;
4930 case XML_ROLE_NOTATION_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004931 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004932 const XML_Char *systemId
Benjamin Peterson4e211002018-06-26 19:25:45 -07004933 = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004934 s + enc->minBytesPerChar,
4935 next - enc->minBytesPerChar);
4936 if (!systemId)
4937 return XML_ERROR_NO_MEMORY;
4938 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004939 parser->m_notationDeclHandler(parser->m_handlerArg,
4940 parser->m_declNotationName,
4941 parser->m_curBase,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004942 systemId,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004943 parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004944 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004945 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004946 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004947 break;
4948 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004949 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004950 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004951 parser->m_notationDeclHandler(parser->m_handlerArg,
4952 parser->m_declNotationName,
4953 parser->m_curBase,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004954 0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004955 parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004956 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004957 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004958 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004959 break;
4960 case XML_ROLE_ERROR:
4961 switch (tok) {
4962 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004963 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004964 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004965 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004966 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004967 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004968 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004969 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004970 }
4971#ifdef XML_DTD
4972 case XML_ROLE_IGNORE_SECT:
4973 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004974 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004975 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004976 reportDefault(parser, enc, s, next);
4977 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004978 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4979 if (result != XML_ERROR_NONE)
4980 return result;
4981 else if (!next) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004982 parser->m_processor = ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004983 return result;
4984 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004985 }
4986 break;
4987#endif /* XML_DTD */
4988 case XML_ROLE_GROUP_OPEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004989 if (parser->m_prologState.level >= parser->m_groupSize) {
4990 if (parser->m_groupSize) {
4991 char *temp = (char *)REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2);
Victor Stinner93d0cb52017-08-18 23:43:54 +02004992 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004993 parser->m_groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004994 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004995 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004996 parser->m_groupConnector = temp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004997 if (dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004998 int *temp = (int *)REALLOC(parser, dtd->scaffIndex,
4999 parser->m_groupSize * sizeof(int));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005000 if (temp == NULL)
5001 return XML_ERROR_NO_MEMORY;
5002 dtd->scaffIndex = temp;
5003 }
5004 }
5005 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005006 parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32);
5007 if (!parser->m_groupConnector) {
5008 parser->m_groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005009 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005010 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005011 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005012 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005013 parser->m_groupConnector[parser->m_prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005014 if (dtd->in_eldecl) {
5015 int myindex = nextScaffoldPart(parser);
5016 if (myindex < 0)
5017 return XML_ERROR_NO_MEMORY;
5018 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5019 dtd->scaffLevel++;
5020 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005021 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005022 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005023 }
5024 break;
5025 case XML_ROLE_GROUP_SEQUENCE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005026 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005027 return XML_ERROR_SYNTAX;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005028 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5029 if (dtd->in_eldecl && parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005030 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005031 break;
5032 case XML_ROLE_GROUP_CHOICE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005033 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005034 return XML_ERROR_SYNTAX;
5035 if (dtd->in_eldecl
Benjamin Peterson4e211002018-06-26 19:25:45 -07005036 && !parser->m_groupConnector[parser->m_prologState.level]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005037 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5038 != XML_CTYPE_MIXED)
5039 ) {
5040 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5041 = XML_CTYPE_CHOICE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005042 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005043 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005044 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005045 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005046 break;
5047 case XML_ROLE_PARAM_ENTITY_REF:
5048#ifdef XML_DTD
5049 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005050 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005051 if (!parser->m_paramEntityParsing)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005052 dtd->keepProcessing = dtd->standalone;
5053 else {
5054 const XML_Char *name;
5055 ENTITY *entity;
5056 name = poolStoreString(&dtd->pool, enc,
5057 s + enc->minBytesPerChar,
5058 next - enc->minBytesPerChar);
5059 if (!name)
5060 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005061 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005062 poolDiscard(&dtd->pool);
5063 /* first, determine if a check for an existing declaration is needed;
5064 if yes, check that the entity exists, and that it is internal,
5065 otherwise call the skipped entity handler
5066 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005067 if (parser->m_prologState.documentEntity &&
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005068 (dtd->standalone
Benjamin Peterson4e211002018-06-26 19:25:45 -07005069 ? !parser->m_openInternalEntities
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005070 : !dtd->hasParamEntityRefs)) {
5071 if (!entity)
5072 return XML_ERROR_UNDEFINED_ENTITY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005073 else if (!entity->is_internal) {
5074 /* It's hard to exhaustively search the code to be sure,
5075 * but there doesn't seem to be a way of executing the
5076 * following line. There are two cases:
5077 *
5078 * If 'standalone' is false, the DTD must have no
5079 * parameter entities or we wouldn't have passed the outer
5080 * 'if' statement. That measn the only entity in the hash
5081 * table is the external subset name "#" which cannot be
5082 * given as a parameter entity name in XML syntax, so the
5083 * lookup must have returned NULL and we don't even reach
5084 * the test for an internal entity.
5085 *
5086 * If 'standalone' is true, it does not seem to be
5087 * possible to create entities taking this code path that
5088 * are not internal entities, so fail the test above.
5089 *
5090 * Because this analysis is very uncertain, the code is
5091 * being left in place and merely removed from the
5092 * coverage test statistics.
5093 */
5094 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5095 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005096 }
5097 else if (!entity) {
5098 dtd->keepProcessing = dtd->standalone;
5099 /* cannot report skipped entities in declarations */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005100 if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) {
5101 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005102 handleDefault = XML_FALSE;
5103 }
5104 break;
5105 }
5106 if (entity->open)
5107 return XML_ERROR_RECURSIVE_ENTITY_REF;
5108 if (entity->textPtr) {
5109 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005110 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00005111 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5112 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005113 if (result != XML_ERROR_NONE)
5114 return result;
5115 handleDefault = XML_FALSE;
5116 break;
5117 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005118 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005119 dtd->paramEntityRead = XML_FALSE;
5120 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005121 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005122 0,
5123 entity->base,
5124 entity->systemId,
5125 entity->publicId)) {
5126 entity->open = XML_FALSE;
5127 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5128 }
5129 entity->open = XML_FALSE;
5130 handleDefault = XML_FALSE;
5131 if (!dtd->paramEntityRead) {
5132 dtd->keepProcessing = dtd->standalone;
5133 break;
5134 }
5135 }
5136 else {
5137 dtd->keepProcessing = dtd->standalone;
5138 break;
5139 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005140 }
5141#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005142 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07005143 parser->m_notStandaloneHandler &&
5144 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005145 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005146 break;
5147
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005148 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005149
5150 case XML_ROLE_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005151 if (parser->m_elementDeclHandler) {
5152 parser->m_declElementType = getElementType(parser, enc, s, next);
5153 if (!parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005154 return XML_ERROR_NO_MEMORY;
5155 dtd->scaffLevel = 0;
5156 dtd->scaffCount = 0;
5157 dtd->in_eldecl = XML_TRUE;
5158 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005159 }
5160 break;
5161
5162 case XML_ROLE_CONTENT_ANY:
5163 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005164 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005165 if (parser->m_elementDeclHandler) {
5166 XML_Content * content = (XML_Content *) MALLOC(parser, sizeof(XML_Content));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005167 if (!content)
5168 return XML_ERROR_NO_MEMORY;
5169 content->quant = XML_CQUANT_NONE;
5170 content->name = NULL;
5171 content->numchildren = 0;
5172 content->children = NULL;
5173 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5174 XML_CTYPE_ANY :
5175 XML_CTYPE_EMPTY);
5176 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005177 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005178 handleDefault = XML_FALSE;
5179 }
5180 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005181 }
5182 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005183
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005184 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005185 if (dtd->in_eldecl) {
5186 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5187 = XML_CTYPE_MIXED;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005188 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005189 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005190 }
5191 break;
5192
5193 case XML_ROLE_CONTENT_ELEMENT:
5194 quant = XML_CQUANT_NONE;
5195 goto elementContent;
5196 case XML_ROLE_CONTENT_ELEMENT_OPT:
5197 quant = XML_CQUANT_OPT;
5198 goto elementContent;
5199 case XML_ROLE_CONTENT_ELEMENT_REP:
5200 quant = XML_CQUANT_REP;
5201 goto elementContent;
5202 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5203 quant = XML_CQUANT_PLUS;
5204 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005205 if (dtd->in_eldecl) {
5206 ELEMENT_TYPE *el;
5207 const XML_Char *name;
5208 int nameLen;
5209 const char *nxt = (quant == XML_CQUANT_NONE
5210 ? next
5211 : next - enc->minBytesPerChar);
5212 int myindex = nextScaffoldPart(parser);
5213 if (myindex < 0)
5214 return XML_ERROR_NO_MEMORY;
5215 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5216 dtd->scaffold[myindex].quant = quant;
5217 el = getElementType(parser, enc, s, nxt);
5218 if (!el)
5219 return XML_ERROR_NO_MEMORY;
5220 name = el->name;
5221 dtd->scaffold[myindex].name = name;
5222 nameLen = 0;
5223 for (; name[nameLen++]; );
5224 dtd->contentStringLen += nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005225 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005226 handleDefault = XML_FALSE;
5227 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005228 break;
5229
5230 case XML_ROLE_GROUP_CLOSE:
5231 quant = XML_CQUANT_NONE;
5232 goto closeGroup;
5233 case XML_ROLE_GROUP_CLOSE_OPT:
5234 quant = XML_CQUANT_OPT;
5235 goto closeGroup;
5236 case XML_ROLE_GROUP_CLOSE_REP:
5237 quant = XML_CQUANT_REP;
5238 goto closeGroup;
5239 case XML_ROLE_GROUP_CLOSE_PLUS:
5240 quant = XML_CQUANT_PLUS;
5241 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005242 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005243 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005244 handleDefault = XML_FALSE;
5245 dtd->scaffLevel--;
5246 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5247 if (dtd->scaffLevel == 0) {
5248 if (!handleDefault) {
5249 XML_Content *model = build_model(parser);
5250 if (!model)
5251 return XML_ERROR_NO_MEMORY;
5252 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005253 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005254 }
5255 dtd->in_eldecl = XML_FALSE;
5256 dtd->contentStringLen = 0;
5257 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005258 }
5259 break;
5260 /* End element declaration stuff */
5261
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005262 case XML_ROLE_PI:
5263 if (!reportProcessingInstruction(parser, enc, s, next))
5264 return XML_ERROR_NO_MEMORY;
5265 handleDefault = XML_FALSE;
5266 break;
5267 case XML_ROLE_COMMENT:
5268 if (!reportComment(parser, enc, s, next))
5269 return XML_ERROR_NO_MEMORY;
5270 handleDefault = XML_FALSE;
5271 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005272 case XML_ROLE_NONE:
5273 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005274 case XML_TOK_BOM:
5275 handleDefault = XML_FALSE;
5276 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005277 }
5278 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005279 case XML_ROLE_DOCTYPE_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005280 if (parser->m_startDoctypeDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005281 handleDefault = XML_FALSE;
5282 break;
5283 case XML_ROLE_ENTITY_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005284 if (dtd->keepProcessing && parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005285 handleDefault = XML_FALSE;
5286 break;
5287 case XML_ROLE_NOTATION_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005288 if (parser->m_notationDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005289 handleDefault = XML_FALSE;
5290 break;
5291 case XML_ROLE_ATTLIST_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005292 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005293 handleDefault = XML_FALSE;
5294 break;
5295 case XML_ROLE_ELEMENT_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005296 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005297 handleDefault = XML_FALSE;
5298 break;
5299 } /* end of big switch */
5300
Benjamin Peterson4e211002018-06-26 19:25:45 -07005301 if (handleDefault && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005302 reportDefault(parser, enc, s, next);
5303
Benjamin Peterson4e211002018-06-26 19:25:45 -07005304 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005305 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005306 *nextPtr = next;
5307 return XML_ERROR_NONE;
5308 case XML_FINISHED:
5309 return XML_ERROR_ABORTED;
5310 default:
5311 s = next;
5312 tok = XmlPrologTok(enc, s, end, &next);
5313 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005314 }
5315 /* not reached */
5316}
5317
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005318static enum XML_Error PTRCALL
5319epilogProcessor(XML_Parser parser,
5320 const char *s,
5321 const char *end,
5322 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005323{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005324 parser->m_processor = epilogProcessor;
5325 parser->m_eventPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005326 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005327 const char *next = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005328 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5329 parser->m_eventEndPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005330 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005331 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005332 case -XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005333 if (parser->m_defaultHandler) {
5334 reportDefault(parser, parser->m_encoding, s, next);
5335 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005336 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005337 }
Fred Drake31d485c2004-08-03 07:06:22 +00005338 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005339 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005340 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005341 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005342 return XML_ERROR_NONE;
5343 case XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005344 if (parser->m_defaultHandler)
5345 reportDefault(parser, parser->m_encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005346 break;
5347 case XML_TOK_PI:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005348 if (!reportProcessingInstruction(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005349 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005350 break;
5351 case XML_TOK_COMMENT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005352 if (!reportComment(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005353 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005354 break;
5355 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005356 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005357 return XML_ERROR_INVALID_TOKEN;
5358 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005359 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005360 *nextPtr = s;
5361 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005362 }
5363 return XML_ERROR_UNCLOSED_TOKEN;
5364 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005365 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005366 *nextPtr = s;
5367 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005368 }
5369 return XML_ERROR_PARTIAL_CHAR;
5370 default:
5371 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5372 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005373 parser->m_eventPtr = s = next;
5374 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005375 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005376 *nextPtr = next;
5377 return XML_ERROR_NONE;
5378 case XML_FINISHED:
5379 return XML_ERROR_ABORTED;
5380 default: ;
5381 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005382 }
5383}
5384
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005385static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00005386processInternalEntity(XML_Parser parser, ENTITY *entity,
5387 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005388{
Fred Drake31d485c2004-08-03 07:06:22 +00005389 const char *textStart, *textEnd;
5390 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005391 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005392 OPEN_INTERNAL_ENTITY *openEntity;
5393
Benjamin Peterson4e211002018-06-26 19:25:45 -07005394 if (parser->m_freeInternalEntities) {
5395 openEntity = parser->m_freeInternalEntities;
5396 parser->m_freeInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005397 }
5398 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005399 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
Fred Drake31d485c2004-08-03 07:06:22 +00005400 if (!openEntity)
5401 return XML_ERROR_NO_MEMORY;
5402 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005403 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005404 entity->processed = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005405 openEntity->next = parser->m_openInternalEntities;
5406 parser->m_openInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005407 openEntity->entity = entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005408 openEntity->startTagLevel = parser->m_tagLevel;
Fred Drake31d485c2004-08-03 07:06:22 +00005409 openEntity->betweenDecl = betweenDecl;
5410 openEntity->internalEventPtr = NULL;
5411 openEntity->internalEventEndPtr = NULL;
5412 textStart = (char *)entity->textPtr;
5413 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005414 /* Set a safe default value in case 'next' does not get set */
5415 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005416
5417#ifdef XML_DTD
5418 if (entity->is_param) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005419 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5420 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005421 next, &next, XML_FALSE);
5422 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005423 else
Fred Drake31d485c2004-08-03 07:06:22 +00005424#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005425 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00005426 textEnd, &next, XML_FALSE);
5427
5428 if (result == XML_ERROR_NONE) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005429 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005430 entity->processed = (int)(next - textStart);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005431 parser->m_processor = internalEntityProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005432 }
5433 else {
5434 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005435 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005436 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005437 openEntity->next = parser->m_freeInternalEntities;
5438 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005439 }
5440 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005441 return result;
5442}
5443
Fred Drake31d485c2004-08-03 07:06:22 +00005444static enum XML_Error PTRCALL
5445internalEntityProcessor(XML_Parser parser,
5446 const char *s,
5447 const char *end,
5448 const char **nextPtr)
5449{
5450 ENTITY *entity;
5451 const char *textStart, *textEnd;
5452 const char *next;
5453 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005454 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00005455 if (!openEntity)
5456 return XML_ERROR_UNEXPECTED_STATE;
5457
5458 entity = openEntity->entity;
5459 textStart = ((char *)entity->textPtr) + entity->processed;
5460 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005461 /* Set a safe default value in case 'next' does not get set */
5462 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005463
5464#ifdef XML_DTD
5465 if (entity->is_param) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005466 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5467 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005468 next, &next, XML_FALSE);
5469 }
5470 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005471#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005472 result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005473 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005474
5475 if (result != XML_ERROR_NONE)
5476 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005477 else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005478 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005479 return result;
5480 }
5481 else {
5482 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005483 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005484 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005485 openEntity->next = parser->m_freeInternalEntities;
5486 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005487 }
5488
5489#ifdef XML_DTD
5490 if (entity->is_param) {
5491 int tok;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005492 parser->m_processor = prologProcessor;
5493 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5494 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5495 (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00005496 }
5497 else
5498#endif /* XML_DTD */
5499 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005500 parser->m_processor = contentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005501 /* see externalEntityContentProcessor vs contentProcessor */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005502 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end,
5503 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005504 }
Fred Drake31d485c2004-08-03 07:06:22 +00005505}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005506
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005507static enum XML_Error PTRCALL
5508errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02005509 const char *UNUSED_P(s),
5510 const char *UNUSED_P(end),
5511 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005512{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005513 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005514}
5515
5516static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005517storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5518 const char *ptr, const char *end,
5519 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005520{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005521 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5522 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005523 if (result)
5524 return result;
5525 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5526 poolChop(pool);
5527 if (!poolAppendChar(pool, XML_T('\0')))
5528 return XML_ERROR_NO_MEMORY;
5529 return XML_ERROR_NONE;
5530}
5531
5532static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005533appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5534 const char *ptr, const char *end,
5535 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005536{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005537 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005538 for (;;) {
5539 const char *next;
5540 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5541 switch (tok) {
5542 case XML_TOK_NONE:
5543 return XML_ERROR_NONE;
5544 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005545 if (enc == parser->m_encoding)
5546 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005547 return XML_ERROR_INVALID_TOKEN;
5548 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005549 if (enc == parser->m_encoding)
5550 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005551 return XML_ERROR_INVALID_TOKEN;
5552 case XML_TOK_CHAR_REF:
5553 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005554 XML_Char buf[XML_ENCODE_MAX];
5555 int i;
5556 int n = XmlCharRefNumber(enc, ptr);
5557 if (n < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005558 if (enc == parser->m_encoding)
5559 parser->m_eventPtr = ptr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005560 return XML_ERROR_BAD_CHAR_REF;
5561 }
5562 if (!isCdata
5563 && n == 0x20 /* space */
5564 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5565 break;
5566 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005567 /* The XmlEncode() functions can never return 0 here. That
5568 * error return happens if the code point passed in is either
5569 * negative or greater than or equal to 0x110000. The
5570 * XmlCharRefNumber() functions will all return a number
5571 * strictly less than 0x110000 or a negative value if an error
5572 * occurred. The negative value is intercepted above, so
5573 * XmlEncode() is never passed a value it might return an
5574 * error for.
5575 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005576 for (i = 0; i < n; i++) {
5577 if (!poolAppendChar(pool, buf[i]))
5578 return XML_ERROR_NO_MEMORY;
5579 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005580 }
5581 break;
5582 case XML_TOK_DATA_CHARS:
5583 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005584 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005585 break;
5586 case XML_TOK_TRAILING_CR:
5587 next = ptr + enc->minBytesPerChar;
5588 /* fall through */
5589 case XML_TOK_ATTRIBUTE_VALUE_S:
5590 case XML_TOK_DATA_NEWLINE:
5591 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005592 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005593 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005594 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005595 break;
5596 case XML_TOK_ENTITY_REF:
5597 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005598 const XML_Char *name;
5599 ENTITY *entity;
5600 char checkEntityDecl;
5601 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5602 ptr + enc->minBytesPerChar,
5603 next - enc->minBytesPerChar);
5604 if (ch) {
5605 if (!poolAppendChar(pool, ch))
5606 return XML_ERROR_NO_MEMORY;
5607 break;
5608 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005609 name = poolStoreString(&parser->m_temp2Pool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005610 ptr + enc->minBytesPerChar,
5611 next - enc->minBytesPerChar);
5612 if (!name)
5613 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005614 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005615 poolDiscard(&parser->m_temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005616 /* First, determine if a check for an existing declaration is needed;
5617 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005618 */
5619 if (pool == &dtd->pool) /* are we called from prolog? */
5620 checkEntityDecl =
5621#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005622 parser->m_prologState.documentEntity &&
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005623#endif /* XML_DTD */
5624 (dtd->standalone
Benjamin Peterson4e211002018-06-26 19:25:45 -07005625 ? !parser->m_openInternalEntities
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005626 : !dtd->hasParamEntityRefs);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005627 else /* if (pool == &parser->m_tempPool): we are called from content */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005628 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5629 if (checkEntityDecl) {
5630 if (!entity)
5631 return XML_ERROR_UNDEFINED_ENTITY;
5632 else if (!entity->is_internal)
5633 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5634 }
5635 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005636 /* Cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005637 parser->m_skippedEntityHandler.
5638 if (parser->m_skippedEntityHandler)
5639 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005640 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005641 /* Cannot call the default handler because this would be
5642 out of sync with the call to the startElementHandler.
Benjamin Peterson4e211002018-06-26 19:25:45 -07005643 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005644 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005645 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005646 break;
5647 }
5648 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005649 if (enc == parser->m_encoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005650 /* It does not appear that this line can be executed.
5651 *
5652 * The "if (entity->open)" check catches recursive entity
5653 * definitions. In order to be called with an open
5654 * entity, it must have gone through this code before and
5655 * been through the recursive call to
5656 * appendAttributeValue() some lines below. That call
5657 * sets the local encoding ("enc") to the parser's
5658 * internal encoding (internal_utf8 or internal_utf16),
5659 * which can never be the same as the principle encoding.
5660 * It doesn't appear there is another code path that gets
5661 * here with entity->open being TRUE.
5662 *
5663 * Since it is not certain that this logic is watertight,
5664 * we keep the line and merely exclude it from coverage
5665 * tests.
5666 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005667 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
Victor Stinner93d0cb52017-08-18 23:43:54 +02005668 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005669 return XML_ERROR_RECURSIVE_ENTITY_REF;
5670 }
5671 if (entity->notation) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005672 if (enc == parser->m_encoding)
5673 parser->m_eventPtr = ptr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005674 return XML_ERROR_BINARY_ENTITY_REF;
5675 }
5676 if (!entity->textPtr) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005677 if (enc == parser->m_encoding)
5678 parser->m_eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005679 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005680 }
5681 else {
5682 enum XML_Error result;
5683 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5684 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005685 result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005686 (char *)entity->textPtr,
5687 (char *)textEnd, pool);
5688 entity->open = XML_FALSE;
5689 if (result)
5690 return result;
5691 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005692 }
5693 break;
5694 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005695 /* The only token returned by XmlAttributeValueTok() that does
5696 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5697 * Getting that would require an entity name to contain an
5698 * incomplete XML character (e.g. \xE2\x82); however previous
5699 * tokenisers will have already recognised and rejected such
5700 * names before XmlAttributeValueTok() gets a look-in. This
5701 * default case should be retained as a safety net, but the code
5702 * excluded from coverage tests.
5703 *
5704 * LCOV_EXCL_START
5705 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005706 if (enc == parser->m_encoding)
5707 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005708 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005709 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005710 }
5711 ptr = next;
5712 }
5713 /* not reached */
5714}
5715
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005716static enum XML_Error
5717storeEntityValue(XML_Parser parser,
5718 const ENCODING *enc,
5719 const char *entityTextPtr,
5720 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005721{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005722 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005723 STRING_POOL *pool = &(dtd->entityValuePool);
5724 enum XML_Error result = XML_ERROR_NONE;
5725#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005726 int oldInEntityValue = parser->m_prologState.inEntityValue;
5727 parser->m_prologState.inEntityValue = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005728#endif /* XML_DTD */
5729 /* never return Null for the value argument in EntityDeclHandler,
5730 since this would indicate an external entity; therefore we
5731 have to make sure that entityValuePool.start is not null */
5732 if (!pool->blocks) {
5733 if (!poolGrow(pool))
5734 return XML_ERROR_NO_MEMORY;
5735 }
5736
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005737 for (;;) {
5738 const char *next;
5739 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5740 switch (tok) {
5741 case XML_TOK_PARAM_ENTITY_REF:
5742#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005743 if (parser->m_isParamEntity || enc != parser->m_encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005744 const XML_Char *name;
5745 ENTITY *entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005746 name = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005747 entityTextPtr + enc->minBytesPerChar,
5748 next - enc->minBytesPerChar);
5749 if (!name) {
5750 result = XML_ERROR_NO_MEMORY;
5751 goto endEntityValue;
5752 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005753 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005754 poolDiscard(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005755 if (!entity) {
5756 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5757 /* cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005758 parser->m_skippedEntityHandler
5759 if (parser->m_skippedEntityHandler)
5760 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005761 */
5762 dtd->keepProcessing = dtd->standalone;
5763 goto endEntityValue;
5764 }
5765 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005766 if (enc == parser->m_encoding)
5767 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005768 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5769 goto endEntityValue;
5770 }
5771 if (entity->systemId) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005772 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005773 dtd->paramEntityRead = XML_FALSE;
5774 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005775 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005776 0,
5777 entity->base,
5778 entity->systemId,
5779 entity->publicId)) {
5780 entity->open = XML_FALSE;
5781 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5782 goto endEntityValue;
5783 }
5784 entity->open = XML_FALSE;
5785 if (!dtd->paramEntityRead)
5786 dtd->keepProcessing = dtd->standalone;
5787 }
5788 else
5789 dtd->keepProcessing = dtd->standalone;
5790 }
5791 else {
5792 entity->open = XML_TRUE;
5793 result = storeEntityValue(parser,
Benjamin Peterson4e211002018-06-26 19:25:45 -07005794 parser->m_internalEncoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005795 (char *)entity->textPtr,
5796 (char *)(entity->textPtr
5797 + entity->textLen));
5798 entity->open = XML_FALSE;
5799 if (result)
5800 goto endEntityValue;
5801 }
5802 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005803 }
5804#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005805 /* In the internal subset, PE references are not legal
5806 within markup declarations, e.g entity values in this case. */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005807 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005808 result = XML_ERROR_PARAM_ENTITY_REF;
5809 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005810 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005811 result = XML_ERROR_NONE;
5812 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005813 case XML_TOK_ENTITY_REF:
5814 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005815 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5816 result = XML_ERROR_NO_MEMORY;
5817 goto endEntityValue;
5818 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005819 break;
5820 case XML_TOK_TRAILING_CR:
5821 next = entityTextPtr + enc->minBytesPerChar;
5822 /* fall through */
5823 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005824 if (pool->end == pool->ptr && !poolGrow(pool)) {
5825 result = XML_ERROR_NO_MEMORY;
5826 goto endEntityValue;
5827 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005828 *(pool->ptr)++ = 0xA;
5829 break;
5830 case XML_TOK_CHAR_REF:
5831 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005832 XML_Char buf[XML_ENCODE_MAX];
5833 int i;
5834 int n = XmlCharRefNumber(enc, entityTextPtr);
5835 if (n < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005836 if (enc == parser->m_encoding)
5837 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005838 result = XML_ERROR_BAD_CHAR_REF;
5839 goto endEntityValue;
5840 }
5841 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005842 /* The XmlEncode() functions can never return 0 here. That
5843 * error return happens if the code point passed in is either
5844 * negative or greater than or equal to 0x110000. The
5845 * XmlCharRefNumber() functions will all return a number
5846 * strictly less than 0x110000 or a negative value if an error
5847 * occurred. The negative value is intercepted above, so
5848 * XmlEncode() is never passed a value it might return an
5849 * error for.
5850 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005851 for (i = 0; i < n; i++) {
5852 if (pool->end == pool->ptr && !poolGrow(pool)) {
5853 result = XML_ERROR_NO_MEMORY;
5854 goto endEntityValue;
5855 }
5856 *(pool->ptr)++ = buf[i];
5857 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005858 }
5859 break;
5860 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005861 if (enc == parser->m_encoding)
5862 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005863 result = XML_ERROR_INVALID_TOKEN;
5864 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005865 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005866 if (enc == parser->m_encoding)
5867 parser->m_eventPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005868 result = XML_ERROR_INVALID_TOKEN;
5869 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005870 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005871 /* This default case should be unnecessary -- all the tokens
5872 * that XmlEntityValueTok() can return have their own explicit
5873 * cases -- but should be retained for safety. We do however
5874 * exclude it from the coverage statistics.
5875 *
5876 * LCOV_EXCL_START
5877 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005878 if (enc == parser->m_encoding)
5879 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005880 result = XML_ERROR_UNEXPECTED_STATE;
5881 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005882 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005883 }
5884 entityTextPtr = next;
5885 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005886endEntityValue:
5887#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005888 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005889#endif /* XML_DTD */
5890 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005891}
5892
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005893static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005894normalizeLines(XML_Char *s)
5895{
5896 XML_Char *p;
5897 for (;; s++) {
5898 if (*s == XML_T('\0'))
5899 return;
5900 if (*s == 0xD)
5901 break;
5902 }
5903 p = s;
5904 do {
5905 if (*s == 0xD) {
5906 *p++ = 0xA;
5907 if (*++s == 0xA)
5908 s++;
5909 }
5910 else
5911 *p++ = *s++;
5912 } while (*s);
5913 *p = XML_T('\0');
5914}
5915
5916static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005917reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5918 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005919{
5920 const XML_Char *target;
5921 XML_Char *data;
5922 const char *tem;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005923 if (!parser->m_processingInstructionHandler) {
5924 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005925 reportDefault(parser, enc, start, end);
5926 return 1;
5927 }
5928 start += enc->minBytesPerChar * 2;
5929 tem = start + XmlNameLength(enc, start);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005930 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005931 if (!target)
5932 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005933 poolFinish(&parser->m_tempPool);
5934 data = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005935 XmlSkipS(enc, tem),
5936 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005937 if (!data)
5938 return 0;
5939 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005940 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5941 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005942 return 1;
5943}
5944
5945static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005946reportComment(XML_Parser parser, const ENCODING *enc,
5947 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005948{
5949 XML_Char *data;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005950 if (!parser->m_commentHandler) {
5951 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005952 reportDefault(parser, enc, start, end);
5953 return 1;
5954 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005955 data = poolStoreString(&parser->m_tempPool,
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005956 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005957 start + enc->minBytesPerChar * 4,
5958 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005959 if (!data)
5960 return 0;
5961 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005962 parser->m_commentHandler(parser->m_handlerArg, data);
5963 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005964 return 1;
5965}
5966
5967static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005968reportDefault(XML_Parser parser, const ENCODING *enc,
5969 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005970{
5971 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005972 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005973 const char **eventPP;
5974 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005975 if (enc == parser->m_encoding) {
5976 eventPP = &parser->m_eventPtr;
5977 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005978 }
5979 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005980 /* To get here, two things must be true; the parser must be
5981 * using a character encoding that is not the same as the
5982 * encoding passed in, and the encoding passed in must need
5983 * conversion to the internal format (UTF-8 unless XML_UNICODE
5984 * is defined). The only occasions on which the encoding passed
5985 * in is not the same as the parser's encoding are when it is
5986 * the internal encoding (e.g. a previously defined parameter
5987 * entity, already converted to internal format). This by
5988 * definition doesn't need conversion, so the whole branch never
5989 * gets executed.
5990 *
5991 * For safety's sake we don't delete these lines and merely
5992 * exclude them from coverage statistics.
5993 *
5994 * LCOV_EXCL_START
5995 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005996 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5997 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005998 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005999 }
6000 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006001 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6002 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006003 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006004 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006005 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006006 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006007 }
6008 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07006009 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006010}
6011
6012
6013static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006014defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6015 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006016{
6017 DEFAULT_ATTRIBUTE *att;
6018 if (value || isId) {
6019 /* The handling of default attributes gets messed up if we have
6020 a default which duplicates a non-default. */
6021 int i;
6022 for (i = 0; i < type->nDefaultAtts; i++)
6023 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006024 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006025 if (isId && !type->idAtt && !attId->xmlns)
6026 type->idAtt = attId;
6027 }
6028 if (type->nDefaultAtts == type->allocDefaultAtts) {
6029 if (type->allocDefaultAtts == 0) {
6030 type->allocDefaultAtts = 8;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006031 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(parser, type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006032 * sizeof(DEFAULT_ATTRIBUTE));
Benjamin Peterson4e211002018-06-26 19:25:45 -07006033 if (!type->defaultAtts) {
6034 type->allocDefaultAtts = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006035 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006036 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006037 }
6038 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006039 DEFAULT_ATTRIBUTE *temp;
6040 int count = type->allocDefaultAtts * 2;
6041 temp = (DEFAULT_ATTRIBUTE *)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006042 REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006043 if (temp == NULL)
6044 return 0;
6045 type->allocDefaultAtts = count;
6046 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006047 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006048 }
6049 att = type->defaultAtts + type->nDefaultAtts;
6050 att->id = attId;
6051 att->value = value;
6052 att->isCdata = isCdata;
6053 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006054 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006055 type->nDefaultAtts += 1;
6056 return 1;
6057}
6058
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006059static int
6060setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006061{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006062 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006063 const XML_Char *name;
6064 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006065 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006066 PREFIX *prefix;
6067 const XML_Char *s;
6068 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006069 if (!poolAppendChar(&dtd->pool, *s))
6070 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006071 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006072 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6073 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006074 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006075 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006076 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006077 return 0;
6078 if (prefix->name == poolStart(&dtd->pool))
6079 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006080 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006081 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006082 elementType->prefix = prefix;
6083
6084 }
6085 }
6086 return 1;
6087}
6088
6089static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006090getAttributeId(XML_Parser parser, const ENCODING *enc,
6091 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006092{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006093 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006094 ATTRIBUTE_ID *id;
6095 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006096 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6097 return NULL;
6098 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006099 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006100 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006101 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006102 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006103 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006104 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006105 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006106 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006107 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006108 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006109 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07006110 if (!parser->m_ns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006111 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006112 else if (name[0] == XML_T(ASCII_x)
6113 && name[1] == XML_T(ASCII_m)
6114 && name[2] == XML_T(ASCII_l)
6115 && name[3] == XML_T(ASCII_n)
6116 && name[4] == XML_T(ASCII_s)
6117 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006118 if (name[5] == XML_T('\0'))
6119 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006120 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006121 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006122 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006123 }
6124 else {
6125 int i;
6126 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006127 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006128 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006129 int j;
6130 for (j = 0; j < i; j++) {
6131 if (!poolAppendChar(&dtd->pool, name[j]))
6132 return NULL;
6133 }
6134 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6135 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006136 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006137 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07006138 if (!id->prefix)
6139 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006140 if (id->prefix->name == poolStart(&dtd->pool))
6141 poolFinish(&dtd->pool);
6142 else
6143 poolDiscard(&dtd->pool);
6144 break;
6145 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006146 }
6147 }
6148 }
6149 return id;
6150}
6151
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006152#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006153
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006154static const XML_Char *
6155getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006156{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006157 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006158 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006159 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006160
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006161 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006162 int i;
6163 int len;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006164 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006165 return NULL;
6166 len = dtd->defaultPrefix.binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006167 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006168 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006169 for (i = 0; i < len; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006170 if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006171 /* Because of memory caching, I don't believe this line can be
6172 * executed.
6173 *
6174 * This is part of a loop copying the default prefix binding
6175 * URI into the parser's temporary string pool. Previously,
6176 * that URI was copied into the same string pool, with a
6177 * terminating NUL character, as part of setContext(). When
6178 * the pool was cleared, that leaves a block definitely big
6179 * enough to hold the URI on the free block list of the pool.
6180 * The URI copy in getContext() therefore cannot run out of
6181 * memory.
6182 *
6183 * If the pool is used between the setContext() and
6184 * getContext() calls, the worst it can do is leave a bigger
6185 * block on the front of the free list. Given that this is
6186 * all somewhat inobvious and program logic can be changed, we
6187 * don't delete the line but we do exclude it from the test
6188 * coverage statistics.
6189 */
6190 return NULL; /* LCOV_EXCL_LINE */
6191 }
6192 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006193 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006194 }
6195
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006196 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006197 for (;;) {
6198 int i;
6199 int len;
6200 const XML_Char *s;
6201 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6202 if (!prefix)
6203 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006204 if (!prefix->binding) {
6205 /* This test appears to be (justifiable) paranoia. There does
6206 * not seem to be a way of injecting a prefix without a binding
6207 * that doesn't get errored long before this function is called.
6208 * The test should remain for safety's sake, so we instead
6209 * exclude the following line from the coverage statistics.
6210 */
6211 continue; /* LCOV_EXCL_LINE */
6212 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006213 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006214 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006215 for (s = prefix->name; *s; s++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006216 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006217 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006218 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006219 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006220 len = prefix->binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006221 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006222 len--;
6223 for (i = 0; i < len; i++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006224 if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006225 return NULL;
6226 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006227 }
6228
6229
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006230 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006231 for (;;) {
6232 const XML_Char *s;
6233 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6234 if (!e)
6235 break;
6236 if (!e->open)
6237 continue;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006238 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006239 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006240 for (s = e->name; *s; s++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006241 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006242 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006243 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006244 }
6245
Benjamin Peterson4e211002018-06-26 19:25:45 -07006246 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006247 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006248 return parser->m_tempPool.start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006249}
6250
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006251static XML_Bool
6252setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006253{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006254 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006255 const XML_Char *s = context;
6256
6257 while (*context != XML_T('\0')) {
6258 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6259 ENTITY *e;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006260 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006261 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006262 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006263 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006264 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006265 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006266 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006267 context = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006268 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006269 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006270 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006271 PREFIX *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006272 if (poolLength(&parser->m_tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006273 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006274 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006275 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006276 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006277 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006278 sizeof(PREFIX));
6279 if (!prefix)
6280 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006281 if (prefix->name == poolStart(&parser->m_tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006282 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6283 if (!prefix->name)
6284 return XML_FALSE;
6285 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006286 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006287 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006288 for (context = s + 1;
6289 *context != CONTEXT_SEP && *context != XML_T('\0');
6290 context++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006291 if (!poolAppendChar(&parser->m_tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006292 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006293 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006294 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006295 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6296 &parser->m_inheritedBindings) != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006297 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006298 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006299 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006300 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301 s = context;
6302 }
6303 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006304 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006305 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006306 s++;
6307 }
6308 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006309 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006310}
6311
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006312static void FASTCALL
6313normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006314{
6315 XML_Char *p = publicId;
6316 XML_Char *s;
6317 for (s = publicId; *s; s++) {
6318 switch (*s) {
6319 case 0x20:
6320 case 0xD:
6321 case 0xA:
6322 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006323 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006324 break;
6325 default:
6326 *p++ = *s;
6327 }
6328 }
6329 if (p != publicId && p[-1] == 0x20)
6330 --p;
6331 *p = XML_T('\0');
6332}
6333
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006334static DTD *
6335dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006336{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006337 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6338 if (p == NULL)
6339 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006340 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006341 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006342 hashTableInit(&(p->generalEntities), ms);
6343 hashTableInit(&(p->elementTypes), ms);
6344 hashTableInit(&(p->attributeIds), ms);
6345 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006346#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006347 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006348 hashTableInit(&(p->paramEntities), ms);
6349#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006350 p->defaultPrefix.name = NULL;
6351 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006352
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006353 p->in_eldecl = XML_FALSE;
6354 p->scaffIndex = NULL;
6355 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006356 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006357 p->scaffSize = 0;
6358 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006359 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006360
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006361 p->keepProcessing = XML_TRUE;
6362 p->hasParamEntityRefs = XML_FALSE;
6363 p->standalone = XML_FALSE;
6364 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006365}
6366
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006367static void
6368dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006369{
6370 HASH_TABLE_ITER iter;
6371 hashTableIterInit(&iter, &(p->elementTypes));
6372 for (;;) {
6373 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6374 if (!e)
6375 break;
6376 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006377 ms->free_fcn(e->defaultAtts);
6378 }
6379 hashTableClear(&(p->generalEntities));
6380#ifdef XML_DTD
6381 p->paramEntityRead = XML_FALSE;
6382 hashTableClear(&(p->paramEntities));
6383#endif /* XML_DTD */
6384 hashTableClear(&(p->elementTypes));
6385 hashTableClear(&(p->attributeIds));
6386 hashTableClear(&(p->prefixes));
6387 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006388 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006389 p->defaultPrefix.name = NULL;
6390 p->defaultPrefix.binding = NULL;
6391
6392 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006393
6394 ms->free_fcn(p->scaffIndex);
6395 p->scaffIndex = NULL;
6396 ms->free_fcn(p->scaffold);
6397 p->scaffold = NULL;
6398
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006399 p->scaffLevel = 0;
6400 p->scaffSize = 0;
6401 p->scaffCount = 0;
6402 p->contentStringLen = 0;
6403
6404 p->keepProcessing = XML_TRUE;
6405 p->hasParamEntityRefs = XML_FALSE;
6406 p->standalone = XML_FALSE;
6407}
6408
6409static void
6410dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6411{
6412 HASH_TABLE_ITER iter;
6413 hashTableIterInit(&iter, &(p->elementTypes));
6414 for (;;) {
6415 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6416 if (!e)
6417 break;
6418 if (e->allocDefaultAtts != 0)
6419 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006420 }
6421 hashTableDestroy(&(p->generalEntities));
6422#ifdef XML_DTD
6423 hashTableDestroy(&(p->paramEntities));
6424#endif /* XML_DTD */
6425 hashTableDestroy(&(p->elementTypes));
6426 hashTableDestroy(&(p->attributeIds));
6427 hashTableDestroy(&(p->prefixes));
6428 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006429 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006430 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006431 ms->free_fcn(p->scaffIndex);
6432 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006433 }
6434 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006435}
6436
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006437/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6438 The new DTD has already been initialized.
6439*/
6440static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006441dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006442{
6443 HASH_TABLE_ITER iter;
6444
6445 /* Copy the prefix table. */
6446
6447 hashTableIterInit(&iter, &(oldDtd->prefixes));
6448 for (;;) {
6449 const XML_Char *name;
6450 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6451 if (!oldP)
6452 break;
6453 name = poolCopyString(&(newDtd->pool), oldP->name);
6454 if (!name)
6455 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006456 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006457 return 0;
6458 }
6459
6460 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6461
6462 /* Copy the attribute id table. */
6463
6464 for (;;) {
6465 ATTRIBUTE_ID *newA;
6466 const XML_Char *name;
6467 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6468
6469 if (!oldA)
6470 break;
6471 /* Remember to allocate the scratch byte before the name. */
6472 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6473 return 0;
6474 name = poolCopyString(&(newDtd->pool), oldA->name);
6475 if (!name)
6476 return 0;
6477 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006478 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006479 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006480 if (!newA)
6481 return 0;
6482 newA->maybeTokenized = oldA->maybeTokenized;
6483 if (oldA->prefix) {
6484 newA->xmlns = oldA->xmlns;
6485 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006486 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006487 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006488 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006489 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006490 }
6491 }
6492
6493 /* Copy the element type table. */
6494
6495 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6496
6497 for (;;) {
6498 int i;
6499 ELEMENT_TYPE *newE;
6500 const XML_Char *name;
6501 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6502 if (!oldE)
6503 break;
6504 name = poolCopyString(&(newDtd->pool), oldE->name);
6505 if (!name)
6506 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006507 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006508 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006509 if (!newE)
6510 return 0;
6511 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006512 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6513 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6514 if (!newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006515 return 0;
6516 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006517 }
6518 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006519 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006520 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006521 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6522 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006523 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006524 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006525 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006526 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006527 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006528 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6529 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006530 newE->defaultAtts[i].value
6531 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6532 if (!newE->defaultAtts[i].value)
6533 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006534 }
6535 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006536 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006537 }
6538 }
6539
6540 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006541 if (!copyEntityTable(oldParser,
6542 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006543 &(newDtd->pool),
6544 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006545 return 0;
6546
6547#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006548 if (!copyEntityTable(oldParser,
6549 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006550 &(newDtd->pool),
6551 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006552 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006553 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006554#endif /* XML_DTD */
6555
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006556 newDtd->keepProcessing = oldDtd->keepProcessing;
6557 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006558 newDtd->standalone = oldDtd->standalone;
6559
6560 /* Don't want deep copying for scaffolding */
6561 newDtd->in_eldecl = oldDtd->in_eldecl;
6562 newDtd->scaffold = oldDtd->scaffold;
6563 newDtd->contentStringLen = oldDtd->contentStringLen;
6564 newDtd->scaffSize = oldDtd->scaffSize;
6565 newDtd->scaffLevel = oldDtd->scaffLevel;
6566 newDtd->scaffIndex = oldDtd->scaffIndex;
6567
6568 return 1;
6569} /* End dtdCopy */
6570
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006571static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006572copyEntityTable(XML_Parser oldParser,
6573 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006574 STRING_POOL *newPool,
6575 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006576{
6577 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006578 const XML_Char *cachedOldBase = NULL;
6579 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006580
6581 hashTableIterInit(&iter, oldTable);
6582
6583 for (;;) {
6584 ENTITY *newE;
6585 const XML_Char *name;
6586 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6587 if (!oldE)
6588 break;
6589 name = poolCopyString(newPool, oldE->name);
6590 if (!name)
6591 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006592 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006593 if (!newE)
6594 return 0;
6595 if (oldE->systemId) {
6596 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6597 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006598 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006599 newE->systemId = tem;
6600 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006601 if (oldE->base == cachedOldBase)
6602 newE->base = cachedNewBase;
6603 else {
6604 cachedOldBase = oldE->base;
6605 tem = poolCopyString(newPool, cachedOldBase);
6606 if (!tem)
6607 return 0;
6608 cachedNewBase = newE->base = tem;
6609 }
6610 }
6611 if (oldE->publicId) {
6612 tem = poolCopyString(newPool, oldE->publicId);
6613 if (!tem)
6614 return 0;
6615 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006616 }
6617 }
6618 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006619 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6620 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006621 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006622 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006623 newE->textPtr = tem;
6624 newE->textLen = oldE->textLen;
6625 }
6626 if (oldE->notation) {
6627 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6628 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006629 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006630 newE->notation = tem;
6631 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006632 newE->is_param = oldE->is_param;
6633 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006634 }
6635 return 1;
6636}
6637
Fred Drake08317ae2003-10-21 15:38:55 +00006638#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006639
Fred Drake08317ae2003-10-21 15:38:55 +00006640static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006641keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006642{
6643 for (; *s1 == *s2; s1++, s2++)
6644 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006645 return XML_TRUE;
6646 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006647}
6648
Victor Stinner5ff71322017-06-21 14:39:22 +02006649static size_t
6650keylen(KEY s)
6651{
6652 size_t len = 0;
6653 for (; *s; s++, len++);
6654 return len;
6655}
6656
6657static void
6658copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6659{
6660 key->k[0] = 0;
6661 key->k[1] = get_hash_secret_salt(parser);
6662}
6663
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006664static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006665hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006666{
Victor Stinner5ff71322017-06-21 14:39:22 +02006667 struct siphash state;
6668 struct sipkey key;
Victor Stinner5ff71322017-06-21 14:39:22 +02006669 (void)sip24_valid;
6670 copy_salt_to_sipkey(parser, &key);
6671 sip24_init(&state, &key);
6672 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6673 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006674}
6675
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006676static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006677lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006678{
6679 size_t i;
6680 if (table->size == 0) {
6681 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006682 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006683 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006684 table->power = INIT_POWER;
6685 /* table->size is a power of 2 */
6686 table->size = (size_t)1 << INIT_POWER;
6687 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006688 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006689 if (!table->v) {
6690 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006691 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006692 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006693 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006694 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006695 }
6696 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006697 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006698 unsigned long mask = (unsigned long)table->size - 1;
6699 unsigned char step = 0;
6700 i = h & mask;
6701 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006702 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006703 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006704 if (!step)
6705 step = PROBE_STEP(h, mask, table->power);
6706 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006707 }
6708 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006709 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006710
6711 /* check for overflow (table is half full) */
6712 if (table->used >> (table->power - 1)) {
6713 unsigned char newPower = table->power + 1;
6714 size_t newSize = (size_t)1 << newPower;
6715 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006716 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006717 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006718 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006719 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006720 memset(newV, 0, tsize);
6721 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006722 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006723 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006724 size_t j = newHash & newMask;
6725 step = 0;
6726 while (newV[j]) {
6727 if (!step)
6728 step = PROBE_STEP(newHash, newMask, newPower);
6729 j < step ? (j += newSize - step) : (j -= step);
6730 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006731 newV[j] = table->v[i];
6732 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006733 table->mem->free_fcn(table->v);
6734 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006735 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006736 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006737 i = h & newMask;
6738 step = 0;
6739 while (table->v[i]) {
6740 if (!step)
6741 step = PROBE_STEP(h, newMask, newPower);
6742 i < step ? (i += newSize - step) : (i -= step);
6743 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006744 }
6745 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006746 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006747 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006748 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006749 memset(table->v[i], 0, createSize);
6750 table->v[i]->name = name;
6751 (table->used)++;
6752 return table->v[i];
6753}
6754
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006755static void FASTCALL
6756hashTableClear(HASH_TABLE *table)
6757{
6758 size_t i;
6759 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006760 table->mem->free_fcn(table->v[i]);
6761 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006762 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006763 table->used = 0;
6764}
6765
6766static void FASTCALL
6767hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006768{
6769 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006770 for (i = 0; i < table->size; i++)
6771 table->mem->free_fcn(table->v[i]);
6772 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006773}
6774
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006775static void FASTCALL
6776hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006777{
Fred Drake08317ae2003-10-21 15:38:55 +00006778 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006779 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006780 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006781 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006782 p->mem = ms;
6783}
6784
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006785static void FASTCALL
6786hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006787{
6788 iter->p = table->v;
6789 iter->end = iter->p + table->size;
6790}
6791
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006792static NAMED * FASTCALL
6793hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006794{
6795 while (iter->p != iter->end) {
6796 NAMED *tem = *(iter->p)++;
6797 if (tem)
6798 return tem;
6799 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006800 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006801}
6802
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006803static void FASTCALL
6804poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006805{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006806 pool->blocks = NULL;
6807 pool->freeBlocks = NULL;
6808 pool->start = NULL;
6809 pool->ptr = NULL;
6810 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006811 pool->mem = ms;
6812}
6813
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006814static void FASTCALL
6815poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006816{
6817 if (!pool->freeBlocks)
6818 pool->freeBlocks = pool->blocks;
6819 else {
6820 BLOCK *p = pool->blocks;
6821 while (p) {
6822 BLOCK *tem = p->next;
6823 p->next = pool->freeBlocks;
6824 pool->freeBlocks = p;
6825 p = tem;
6826 }
6827 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006828 pool->blocks = NULL;
6829 pool->start = NULL;
6830 pool->ptr = NULL;
6831 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006832}
6833
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006834static void FASTCALL
6835poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006836{
6837 BLOCK *p = pool->blocks;
6838 while (p) {
6839 BLOCK *tem = p->next;
6840 pool->mem->free_fcn(p);
6841 p = tem;
6842 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006843 p = pool->freeBlocks;
6844 while (p) {
6845 BLOCK *tem = p->next;
6846 pool->mem->free_fcn(p);
6847 p = tem;
6848 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006849}
6850
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006851static XML_Char *
6852poolAppend(STRING_POOL *pool, const ENCODING *enc,
6853 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006854{
6855 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006856 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006857 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006858 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6859 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006860 break;
6861 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006862 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006863 }
6864 return pool->start;
6865}
6866
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006867static const XML_Char * FASTCALL
6868poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006869{
6870 do {
6871 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006872 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006873 } while (*s++);
6874 s = pool->start;
6875 poolFinish(pool);
6876 return s;
6877}
6878
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006879static const XML_Char *
6880poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006881{
Victor Stinner93d0cb52017-08-18 23:43:54 +02006882 if (!pool->ptr && !poolGrow(pool)) {
6883 /* The following line is unreachable given the current usage of
6884 * poolCopyStringN(). Currently it is called from exactly one
6885 * place to copy the text of a simple general entity. By that
6886 * point, the name of the entity is already stored in the pool, so
6887 * pool->ptr cannot be NULL.
6888 *
6889 * If poolCopyStringN() is used elsewhere as it well might be,
6890 * this line may well become executable again. Regardless, this
6891 * sort of check shouldn't be removed lightly, so we just exclude
6892 * it from the coverage statistics.
6893 */
6894 return NULL; /* LCOV_EXCL_LINE */
6895 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006896 for (; n > 0; --n, s++) {
6897 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006898 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006899 }
6900 s = pool->start;
6901 poolFinish(pool);
6902 return s;
6903}
6904
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006905static const XML_Char * FASTCALL
6906poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006907{
6908 while (*s) {
6909 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006910 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006911 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006912 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006913 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006914}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006915
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006916static XML_Char *
6917poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6918 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006919{
6920 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006921 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006922 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006923 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006924 *(pool->ptr)++ = 0;
6925 return pool->start;
6926}
6927
Victor Stinner5ff71322017-06-21 14:39:22 +02006928static size_t
6929poolBytesToAllocateFor(int blockSize)
6930{
6931 /* Unprotected math would be:
6932 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6933 **
6934 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6935 ** For a + b * c we check b * c in isolation first, so that addition of a
6936 ** on top has no chance of making us accept a small non-negative number
6937 */
6938 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6939
6940 if (blockSize <= 0)
6941 return 0;
6942
6943 if (blockSize > (int)(INT_MAX / stretch))
6944 return 0;
6945
6946 {
6947 const int stretchedBlockSize = blockSize * (int)stretch;
6948 const int bytesToAllocate = (int)(
6949 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6950 if (bytesToAllocate < 0)
6951 return 0;
6952
6953 return (size_t)bytesToAllocate;
6954 }
6955}
6956
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006957static XML_Bool FASTCALL
6958poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006959{
6960 if (pool->freeBlocks) {
6961 if (pool->start == 0) {
6962 pool->blocks = pool->freeBlocks;
6963 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006964 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006965 pool->start = pool->blocks->s;
6966 pool->end = pool->start + pool->blocks->size;
6967 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006968 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006969 }
6970 if (pool->end - pool->start < pool->freeBlocks->size) {
6971 BLOCK *tem = pool->freeBlocks->next;
6972 pool->freeBlocks->next = pool->blocks;
6973 pool->blocks = pool->freeBlocks;
6974 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006975 memcpy(pool->blocks->s, pool->start,
6976 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006977 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6978 pool->start = pool->blocks->s;
6979 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006980 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006981 }
6982 }
6983 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006984 BLOCK *temp;
6985 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02006986 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006987
Benjamin Peterson4e211002018-06-26 19:25:45 -07006988 /* NOTE: Needs to be calculated prior to calling `realloc`
6989 to avoid dangling pointers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +02006990 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6991
6992 if (blockSize < 0) {
6993 /* This condition traps a situation where either more than
6994 * INT_MAX/2 bytes have already been allocated. This isn't
6995 * readily testable, since it is unlikely that an average
6996 * machine will have that much memory, so we exclude it from the
6997 * coverage statistics.
6998 */
6999 return XML_FALSE; /* LCOV_EXCL_LINE */
7000 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007001
Victor Stinner5ff71322017-06-21 14:39:22 +02007002 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7003 if (bytesToAllocate == 0)
7004 return XML_FALSE;
7005
Victor Stinner23ec4b52017-06-15 00:54:36 +02007006 temp = (BLOCK *)
Victor Stinner5ff71322017-06-21 14:39:22 +02007007 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007008 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007009 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007010 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007011 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007012 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007013 pool->start = pool->blocks->s;
7014 pool->end = pool->start + blockSize;
7015 }
7016 else {
7017 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00007018 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02007019 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007020
Victor Stinner93d0cb52017-08-18 23:43:54 +02007021 if (blockSize < 0) {
7022 /* This condition traps a situation where either more than
7023 * INT_MAX bytes have already been allocated (which is prevented
7024 * by various pieces of program logic, not least this one, never
7025 * mind the unlikelihood of actually having that much memory) or
7026 * the pool control fields have been corrupted (which could
7027 * conceivably happen in an extremely buggy user handler
7028 * function). Either way it isn't readily testable, so we
7029 * exclude it from the coverage statistics.
7030 */
7031 return XML_FALSE; /* LCOV_EXCL_LINE */
7032 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007033
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007034 if (blockSize < INIT_BLOCK_SIZE)
7035 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02007036 else {
7037 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7038 if ((int)((unsigned)blockSize * 2U) < 0) {
7039 return XML_FALSE;
7040 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007041 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02007042 }
7043
7044 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7045 if (bytesToAllocate == 0)
7046 return XML_FALSE;
7047
7048 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007049 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007050 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007051 tem->size = blockSize;
7052 tem->next = pool->blocks;
7053 pool->blocks = tem;
7054 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007055 memcpy(tem->s, pool->start,
7056 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007057 pool->ptr = tem->s + (pool->ptr - pool->start);
7058 pool->start = tem->s;
7059 pool->end = tem->s + blockSize;
7060 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007061 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007062}
7063
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007064static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007065nextScaffoldPart(XML_Parser parser)
7066{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007067 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007068 CONTENT_SCAFFOLD * me;
7069 int next;
7070
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007071 if (!dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007072 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007073 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007074 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007075 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007076 }
7077
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007078 if (dtd->scaffCount >= dtd->scaffSize) {
7079 CONTENT_SCAFFOLD *temp;
7080 if (dtd->scaffold) {
7081 temp = (CONTENT_SCAFFOLD *)
Benjamin Peterson4e211002018-06-26 19:25:45 -07007082 REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007083 if (temp == NULL)
7084 return -1;
7085 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007086 }
7087 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007088 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007089 * sizeof(CONTENT_SCAFFOLD));
7090 if (temp == NULL)
7091 return -1;
7092 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007093 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007094 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007095 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007096 next = dtd->scaffCount++;
7097 me = &dtd->scaffold[next];
7098 if (dtd->scaffLevel) {
7099 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007100 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007101 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007102 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007103 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007104 parent->firstchild = next;
7105 parent->lastchild = next;
7106 parent->childcnt++;
7107 }
7108 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7109 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007110}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007111
7112static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007113build_node(XML_Parser parser,
7114 int src_node,
7115 XML_Content *dest,
7116 XML_Content **contpos,
7117 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007118{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007119 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007120 dest->type = dtd->scaffold[src_node].type;
7121 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007122 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007123 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007124 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007125 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007126 for (;;) {
7127 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007128 if (!*src)
7129 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007130 src++;
7131 }
7132 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007133 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007134 }
7135 else {
7136 unsigned int i;
7137 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007138 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007139 dest->children = *contpos;
7140 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007141 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7142 i < dest->numchildren;
7143 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007144 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7145 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007146 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007147 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007148}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007149
7150static XML_Content *
7151build_model (XML_Parser parser)
7152{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007153 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007154 XML_Content *ret;
7155 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007156 XML_Char * str;
7157 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7158 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007159
Benjamin Peterson4e211002018-06-26 19:25:45 -07007160 ret = (XML_Content *)MALLOC(parser, allocsize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007161 if (!ret)
7162 return NULL;
7163
7164 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007165 cpos = &ret[1];
7166
7167 build_node(parser, 0, ret, &cpos, &str);
7168 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007169}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007170
7171static ELEMENT_TYPE *
7172getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007173 const ENCODING *enc,
7174 const char *ptr,
7175 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007176{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007177 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007178 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007179 ELEMENT_TYPE *ret;
7180
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007181 if (!name)
7182 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07007183 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007184 if (!ret)
7185 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007186 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007187 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007188 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007189 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007190 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007191 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007192 }
7193 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007194}
Victor Stinner93d0cb52017-08-18 23:43:54 +02007195
7196static XML_Char *
7197copyString(const XML_Char *s,
7198 const XML_Memory_Handling_Suite *memsuite)
7199{
7200 int charsRequired = 0;
7201 XML_Char *result;
7202
7203 /* First determine how long the string is */
7204 while (s[charsRequired] != 0) {
7205 charsRequired++;
7206 }
7207 /* Include the terminator */
7208 charsRequired++;
7209
7210 /* Now allocate space for the copy */
7211 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7212 if (result == NULL)
7213 return NULL;
7214 /* Copy the original into place */
7215 memcpy(result, s, charsRequired * sizeof(XML_Char));
7216 return result;
7217}