blob: 90a237f30eb8cdcee92874d2cdc4b3d5e42e757c [file] [log] [blame]
Benjamin Peterson4e211002018-06-26 19:25:45 -07001/* 4b74aa710b4ed5ce464b0ce544852cb47bf905c85a49c7bae2749f5885cb966d (2.2.5+)
Victor Stinner759e30e2017-09-05 01:58:08 +02002 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
Victor Stinner5ff71322017-06-21 14:39:22 +02008
Victor Stinner759e30e2017-09-05 01:58:08 +02009 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000031*/
32
Victor Stinner93d0cb52017-08-18 23:43:54 +020033#if !defined(_GNU_SOURCE)
34# define _GNU_SOURCE 1 /* syscall prototype */
35#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020036
Victor Stinner23ec4b52017-06-15 00:54:36 +020037#include <stddef.h>
38#include <string.h> /* memset(), memcpy() */
39#include <assert.h>
40#include <limits.h> /* UINT_MAX */
Victor Stinner5ff71322017-06-21 14:39:22 +020041#include <stdio.h> /* fprintf */
42#include <stdlib.h> /* getenv */
Victor Stinner23ec4b52017-06-15 00:54:36 +020043
Victor Stinner5ff71322017-06-21 14:39:22 +020044#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +020045#define getpid GetCurrentProcessId
46#else
47#include <sys/time.h> /* gettimeofday() */
48#include <sys/types.h> /* getpid() */
49#include <unistd.h> /* getpid() */
Victor Stinner93d0cb52017-08-18 23:43:54 +020050#include <fcntl.h> /* O_RDONLY */
51#include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020052#endif
53
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070054#define XML_BUILDING_EXPAT 1
55
Victor Stinner5ff71322017-06-21 14:39:22 +020056#ifdef _WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070057#include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070058#elif defined(HAVE_EXPAT_CONFIG_H)
59#include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020060#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010061
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070062#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000063#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020064#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000065
Victor Stinner93d0cb52017-08-18 23:43:54 +020066#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67# if defined(HAVE_GETRANDOM)
68# include <sys/random.h> /* getrandom */
69# else
70# include <unistd.h> /* syscall */
71# include <sys/syscall.h> /* SYS_getrandom */
72# endif
73# if ! defined(GRND_NONBLOCK)
74# define GRND_NONBLOCK 0x0001
75# endif /* defined(GRND_NONBLOCK) */
76#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
77
78#if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80# include <bsd/stdlib.h>
81#endif
82
83#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
85#endif
86
87#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
90 && !defined(_WIN32) \
91 && !defined(XML_POOR_ENTROPY)
92# error \
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
95 \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
105 \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
108 \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
111#endif
112
113
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000114#ifdef XML_UNICODE
115#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116#define XmlConvert XmlUtf16Convert
117#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700120/* Using pointer subtraction to convert to integer type. */
121#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000122typedef unsigned short ICHAR;
123#else
124#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125#define XmlConvert XmlUtf8Convert
126#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128#define XmlEncode XmlUtf8Encode
129#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
130typedef char ICHAR;
131#endif
132
133
134#ifndef XML_NS
135
136#define XmlInitEncodingNS XmlInitEncoding
137#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138#undef XmlGetInternalEncodingNS
139#define XmlGetInternalEncodingNS XmlGetInternalEncoding
140#define XmlParseXmlDeclNS XmlParseXmlDecl
141
142#endif
143
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000144#ifdef XML_UNICODE
145
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000146#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000147#define XML_T(x) (const wchar_t)x
148#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000149#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000150#define XML_T(x) (const unsigned short)x
151#define XML_L(x) x
152#endif
153
154#else
155
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000156#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000157#define XML_L(x) x
158
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000159#endif
160
161/* Round up n to be a multiple of sz, where sz is a power of 2. */
162#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
163
Fred Drake08317ae2003-10-21 15:38:55 +0000164/* Handle the case where memmove() doesn't exist. */
165#ifndef HAVE_MEMMOVE
166#ifdef HAVE_BCOPY
167#define memmove(d,s,l) bcopy((s),(d),(l))
168#else
169#error memmove does not exist on this platform, nor is a substitute available
170#endif /* HAVE_BCOPY */
171#endif /* HAVE_MEMMOVE */
172
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000173#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000174#include "xmltok.h"
175#include "xmlrole.h"
176
177typedef const XML_Char *KEY;
178
179typedef struct {
180 KEY name;
181} NAMED;
182
183typedef struct {
184 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000185 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000186 size_t size;
187 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000188 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000189} HASH_TABLE;
190
Victor Stinner5ff71322017-06-21 14:39:22 +0200191static size_t
192keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000193
Victor Stinner5ff71322017-06-21 14:39:22 +0200194static void
195copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
Fred Drake08317ae2003-10-21 15:38:55 +0000196
197/* For probing (after a collision) we need a step size relative prime
198 to the hash table size, which is a power of 2. We use double-hashing,
199 since we can calculate a second hash value cheaply by taking those bits
200 of the first hash value that were discarded (masked out) when the table
201 index was calculated: index = hash & mask, where mask = table->size - 1.
202 We limit the maximum step size to table->size / 4 (mask >> 2) and make
203 it odd, since odd numbers are always relative prime to a power of 2.
204*/
205#define SECOND_HASH(hash, mask, power) \
206 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
207#define PROBE_STEP(hash, mask, power) \
208 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
209
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000210typedef struct {
211 NAMED **p;
212 NAMED **end;
213} HASH_TABLE_ITER;
214
215#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
216#define INIT_DATA_BUF_SIZE 1024
217#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000218#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000219#define INIT_BLOCK_SIZE 1024
220#define INIT_BUFFER_SIZE 1024
221
222#define EXPAND_SPARE 24
223
224typedef struct binding {
225 struct prefix *prefix;
226 struct binding *nextTagBinding;
227 struct binding *prevPrefixBinding;
228 const struct attribute_id *attId;
229 XML_Char *uri;
230 int uriLen;
231 int uriAlloc;
232} BINDING;
233
234typedef struct prefix {
235 const XML_Char *name;
236 BINDING *binding;
237} PREFIX;
238
239typedef struct {
240 const XML_Char *str;
241 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000242 const XML_Char *prefix;
243 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000244 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000245 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000246} TAG_NAME;
247
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000248/* TAG represents an open element.
249 The name of the element is stored in both the document and API
250 encodings. The memory buffer 'buf' is a separately-allocated
251 memory area which stores the name. During the XML_Parse()/
252 XMLParseBuffer() when the element is open, the memory for the 'raw'
253 version of the name (in the document encoding) is shared with the
254 document buffer. If the element is open across calls to
255 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
256 contain the 'raw' name as well.
257
258 A parser re-uses these structures, maintaining a list of allocated
259 TAG objects in a free list.
260*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000261typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000262 struct tag *parent; /* parent of this element */
263 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000264 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000265 TAG_NAME name; /* tagName in the API encoding */
266 char *buf; /* buffer for name components */
267 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000268 BINDING *bindings;
269} TAG;
270
271typedef struct {
272 const XML_Char *name;
273 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000274 int textLen; /* length in XML_Chars */
275 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000276 const XML_Char *systemId;
277 const XML_Char *base;
278 const XML_Char *publicId;
279 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000280 XML_Bool open;
281 XML_Bool is_param;
282 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000283} ENTITY;
284
285typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000286 enum XML_Content_Type type;
287 enum XML_Content_Quant quant;
288 const XML_Char * name;
289 int firstchild;
290 int lastchild;
291 int childcnt;
292 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000293} CONTENT_SCAFFOLD;
294
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000295#define INIT_SCAFFOLD_ELEMENTS 32
296
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000297typedef struct block {
298 struct block *next;
299 int size;
300 XML_Char s[1];
301} BLOCK;
302
303typedef struct {
304 BLOCK *blocks;
305 BLOCK *freeBlocks;
306 const XML_Char *end;
307 XML_Char *ptr;
308 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000309 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000310} STRING_POOL;
311
312/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000313 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000314typedef struct attribute_id {
315 XML_Char *name;
316 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000317 XML_Bool maybeTokenized;
318 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000319} ATTRIBUTE_ID;
320
321typedef struct {
322 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000323 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000324 const XML_Char *value;
325} DEFAULT_ATTRIBUTE;
326
327typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000328 unsigned long version;
329 unsigned long hash;
330 const XML_Char *uriName;
331} NS_ATT;
332
333typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000334 const XML_Char *name;
335 PREFIX *prefix;
336 const ATTRIBUTE_ID *idAtt;
337 int nDefaultAtts;
338 int allocDefaultAtts;
339 DEFAULT_ATTRIBUTE *defaultAtts;
340} ELEMENT_TYPE;
341
342typedef struct {
343 HASH_TABLE generalEntities;
344 HASH_TABLE elementTypes;
345 HASH_TABLE attributeIds;
346 HASH_TABLE prefixes;
347 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000348 STRING_POOL entityValuePool;
349 /* false once a parameter entity reference has been skipped */
350 XML_Bool keepProcessing;
351 /* true once an internal or external PE reference has been encountered;
352 this includes the reference to an external subset */
353 XML_Bool hasParamEntityRefs;
354 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000355#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000356 /* indicates if external PE has been read */
357 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000358 HASH_TABLE paramEntities;
359#endif /* XML_DTD */
360 PREFIX defaultPrefix;
361 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000362 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000363 CONTENT_SCAFFOLD *scaffold;
364 unsigned contentStringLen;
365 unsigned scaffSize;
366 unsigned scaffCount;
367 int scaffLevel;
368 int *scaffIndex;
369} DTD;
370
371typedef struct open_internal_entity {
372 const char *internalEventPtr;
373 const char *internalEventEndPtr;
374 struct open_internal_entity *next;
375 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000376 int startTagLevel;
377 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000378} OPEN_INTERNAL_ENTITY;
379
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000380typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
381 const char *start,
382 const char *end,
383 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000384
385static Processor prologProcessor;
386static Processor prologInitProcessor;
387static Processor contentProcessor;
388static Processor cdataSectionProcessor;
389#ifdef XML_DTD
390static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000391static Processor externalParEntProcessor;
392static Processor externalParEntInitProcessor;
393static Processor entityValueProcessor;
394static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000395#endif /* XML_DTD */
396static Processor epilogProcessor;
397static Processor errorProcessor;
398static Processor externalEntityInitProcessor;
399static Processor externalEntityInitProcessor2;
400static Processor externalEntityInitProcessor3;
401static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000402static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000403
404static enum XML_Error
405handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
406static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000407processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000408 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000409static enum XML_Error
410initializeEncoding(XML_Parser parser);
411static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700412doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
413 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000414 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000415static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700416processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000417 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000418static enum XML_Error
419doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700420 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000421 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000422static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000423doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000424 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000425#ifdef XML_DTD
426static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000427doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000428 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000429#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000430
Victor Stinner5ff71322017-06-21 14:39:22 +0200431static void
432freeBindings(XML_Parser parser, BINDING *bindings);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000433static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000434storeAtts(XML_Parser parser, const ENCODING *, const char *s,
435 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000436static enum XML_Error
437addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
438 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000439static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700440defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000441 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000442static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000443storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
444 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000445static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000446appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
447 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000448static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000449getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
450 const char *end);
451static int
452setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000453static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000454storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
455 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000456static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000457reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
458 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000459static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000460reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
461 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000462static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000463reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
464 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000465
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466static const XML_Char * getContext(XML_Parser parser);
467static XML_Bool
468setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000469
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000470static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000471
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000472static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
Benjamin Peterson4e211002018-06-26 19:25:45 -0700473/* do not call if m_parentParser != NULL */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000474static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
475static void
476dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
477static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700478dtdCopy(XML_Parser oldParser,
479 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000480static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700481copyEntityTable(XML_Parser oldParser,
482 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000483static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700484lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000485static void FASTCALL
486hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
487static void FASTCALL hashTableClear(HASH_TABLE *);
488static void FASTCALL hashTableDestroy(HASH_TABLE *);
489static void FASTCALL
490hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
491static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000492
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000493static void FASTCALL
494poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
495static void FASTCALL poolClear(STRING_POOL *);
496static void FASTCALL poolDestroy(STRING_POOL *);
497static XML_Char *
498poolAppend(STRING_POOL *pool, const ENCODING *enc,
499 const char *ptr, const char *end);
500static XML_Char *
501poolStoreString(STRING_POOL *pool, const ENCODING *enc,
502 const char *ptr, const char *end);
503static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
504static const XML_Char * FASTCALL
505poolCopyString(STRING_POOL *pool, const XML_Char *s);
506static const XML_Char *
507poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
508static const XML_Char * FASTCALL
509poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000510
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000511static int FASTCALL nextScaffoldPart(XML_Parser parser);
512static XML_Content * build_model(XML_Parser parser);
513static ELEMENT_TYPE *
514getElementType(XML_Parser parser, const ENCODING *enc,
515 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000516
Victor Stinner93d0cb52017-08-18 23:43:54 +0200517static XML_Char *copyString(const XML_Char *s,
518 const XML_Memory_Handling_Suite *memsuite);
519
Victor Stinner23ec4b52017-06-15 00:54:36 +0200520static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700521static XML_Bool startParsing(XML_Parser parser);
522
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000523static XML_Parser
524parserCreate(const XML_Char *encodingName,
525 const XML_Memory_Handling_Suite *memsuite,
526 const XML_Char *nameSep,
527 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700528
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000529static void
530parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000531
532#define poolStart(pool) ((pool)->start)
533#define poolEnd(pool) ((pool)->ptr)
534#define poolLength(pool) ((pool)->ptr - (pool)->start)
535#define poolChop(pool) ((void)--(pool->ptr))
536#define poolLastChar(pool) (((pool)->ptr)[-1])
537#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
538#define poolFinish(pool) ((pool)->start = (pool)->ptr)
539#define poolAppendChar(pool, c) \
540 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
541 ? 0 \
542 : ((*((pool)->ptr)++ = c), 1))
543
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000544struct XML_ParserStruct {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700545 /* The first member must be m_userData so that the XML_GetUserData
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000546 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000547 void *m_userData;
548 void *m_handlerArg;
549 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000550 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000551 /* first character to be parsed */
552 const char *m_bufferPtr;
553 /* past last character to be parsed */
554 char *m_bufferEnd;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700555 /* allocated end of m_buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000556 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000557 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000558 const char *m_parseEndPtr;
559 XML_Char *m_dataBuf;
560 XML_Char *m_dataBufEnd;
561 XML_StartElementHandler m_startElementHandler;
562 XML_EndElementHandler m_endElementHandler;
563 XML_CharacterDataHandler m_characterDataHandler;
564 XML_ProcessingInstructionHandler m_processingInstructionHandler;
565 XML_CommentHandler m_commentHandler;
566 XML_StartCdataSectionHandler m_startCdataSectionHandler;
567 XML_EndCdataSectionHandler m_endCdataSectionHandler;
568 XML_DefaultHandler m_defaultHandler;
569 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
570 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
571 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
572 XML_NotationDeclHandler m_notationDeclHandler;
573 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
574 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
575 XML_NotStandaloneHandler m_notStandaloneHandler;
576 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000577 XML_Parser m_externalEntityRefHandlerArg;
578 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000579 XML_UnknownEncodingHandler m_unknownEncodingHandler;
580 XML_ElementDeclHandler m_elementDeclHandler;
581 XML_AttlistDeclHandler m_attlistDeclHandler;
582 XML_EntityDeclHandler m_entityDeclHandler;
583 XML_XmlDeclHandler m_xmlDeclHandler;
584 const ENCODING *m_encoding;
585 INIT_ENCODING m_initEncoding;
586 const ENCODING *m_internalEncoding;
587 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000588 XML_Bool m_ns;
589 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000590 void *m_unknownEncodingMem;
591 void *m_unknownEncodingData;
592 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000593 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000594 PROLOG_STATE m_prologState;
595 Processor *m_processor;
596 enum XML_Error m_errorCode;
597 const char *m_eventPtr;
598 const char *m_eventEndPtr;
599 const char *m_positionPtr;
600 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000601 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000602 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000603 int m_tagLevel;
604 ENTITY *m_declEntity;
605 const XML_Char *m_doctypeName;
606 const XML_Char *m_doctypeSysid;
607 const XML_Char *m_doctypePubid;
608 const XML_Char *m_declAttributeType;
609 const XML_Char *m_declNotationName;
610 const XML_Char *m_declNotationPublicId;
611 ELEMENT_TYPE *m_declElementType;
612 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000613 XML_Bool m_declAttributeIsCdata;
614 XML_Bool m_declAttributeIsId;
615 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000616 const XML_Char *m_curBase;
617 TAG *m_tagStack;
618 TAG *m_freeTagList;
619 BINDING *m_inheritedBindings;
620 BINDING *m_freeBindingList;
621 int m_attsSize;
622 int m_nSpecifiedAtts;
623 int m_idAttIndex;
624 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000625 NS_ATT *m_nsAtts;
626 unsigned long m_nsAttsVersion;
627 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700628#ifdef XML_ATTR_INFO
629 XML_AttrInfo *m_attInfo;
630#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000631 POSITION m_position;
632 STRING_POOL m_tempPool;
633 STRING_POOL m_temp2Pool;
634 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000635 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000636 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000637 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000638 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000639#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000640 XML_Bool m_isParamEntity;
641 XML_Bool m_useForeignDTD;
642 enum XML_ParamEntityParsing m_paramEntityParsing;
643#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700644 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000645};
646
Benjamin Peterson4e211002018-06-26 19:25:45 -0700647#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
648#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p),(s)))
649#define FREE(parser, p) (parser->m_mem.free_fcn((p)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000650
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000651
Fred Drake08317ae2003-10-21 15:38:55 +0000652XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000653XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000654{
655 return XML_ParserCreate_MM(encodingName, NULL, NULL);
656}
657
Fred Drake08317ae2003-10-21 15:38:55 +0000658XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000659XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000660{
661 XML_Char tmp[2];
662 *tmp = nsSep;
663 return XML_ParserCreate_MM(encodingName, NULL, tmp);
664}
665
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000666static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700667 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
668 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
669 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
670 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
671 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
672 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000673};
674
Victor Stinner5ff71322017-06-21 14:39:22 +0200675
Benjamin Peterson4e211002018-06-26 19:25:45 -0700676/* To avoid warnings about unused functions: */
677#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
678
Victor Stinner5ff71322017-06-21 14:39:22 +0200679#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200680
681/* Obtain entropy on Linux 3.17+ */
682static int
Victor Stinner93d0cb52017-08-18 23:43:54 +0200683writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200684 int success = 0; /* full count bytes written? */
685 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200686 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200687
688 do {
689 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
690 const size_t bytesToWrite = count - bytesWrittenTotal;
691
692 const int bytesWrittenMore =
693#if defined(HAVE_GETRANDOM)
694 getrandom(currentTarget, bytesToWrite, getrandomFlags);
695#else
696 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
697#endif
698
699 if (bytesWrittenMore > 0) {
700 bytesWrittenTotal += bytesWrittenMore;
701 if (bytesWrittenTotal >= count)
702 success = 1;
703 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200704 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200705
706 return success;
707}
708
709#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
710
711
Victor Stinner93d0cb52017-08-18 23:43:54 +0200712#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
713
714/* Extract entropy from /dev/urandom */
715static int
716writeRandomBytes_dev_urandom(void * target, size_t count) {
717 int success = 0; /* full count bytes written? */
718 size_t bytesWrittenTotal = 0;
719
720 const int fd = open("/dev/urandom", O_RDONLY);
721 if (fd < 0) {
722 return 0;
723 }
724
725 do {
726 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
727 const size_t bytesToWrite = count - bytesWrittenTotal;
728
729 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
730
731 if (bytesWrittenMore > 0) {
732 bytesWrittenTotal += bytesWrittenMore;
733 if (bytesWrittenTotal >= count)
734 success = 1;
735 }
736 } while (! success && (errno == EINTR));
737
738 close(fd);
739 return success;
740}
741
742#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
743
Benjamin Peterson4e211002018-06-26 19:25:45 -0700744#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
745
Victor Stinner93d0cb52017-08-18 23:43:54 +0200746
747#if defined(HAVE_ARC4RANDOM)
748
749static void
750writeRandomBytes_arc4random(void * target, size_t count) {
751 size_t bytesWrittenTotal = 0;
752
753 while (bytesWrittenTotal < count) {
754 const uint32_t random32 = arc4random();
755 size_t i = 0;
756
757 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
758 i++, bytesWrittenTotal++) {
759 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
760 ((uint8_t *)target)[bytesWrittenTotal] = random8;
761 }
762 }
763}
764
765#endif /* defined(HAVE_ARC4RANDOM) */
766
767
Victor Stinner5ff71322017-06-21 14:39:22 +0200768#ifdef _WIN32
769
770typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200771HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
Victor Stinner5ff71322017-06-21 14:39:22 +0200772
773/* Obtain entropy on Windows XP / Windows Server 2003 and later.
Victor Stinner93d0cb52017-08-18 23:43:54 +0200774 * Hint on RtlGenRandom and the following article from libsodium.
Victor Stinner5ff71322017-06-21 14:39:22 +0200775 *
776 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
777 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
778 */
779static int
780writeRandomBytes_RtlGenRandom(void * target, size_t count) {
781 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200782 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
Victor Stinner5ff71322017-06-21 14:39:22 +0200783
784 if (advapi32) {
785 const RTLGENRANDOM_FUNC RtlGenRandom
786 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
787 if (RtlGenRandom) {
788 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
789 success = 1;
790 }
791 }
792 FreeLibrary(advapi32);
793 }
794
795 return success;
796}
797
798#endif /* _WIN32 */
799
800
Victor Stinner93d0cb52017-08-18 23:43:54 +0200801#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
802
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700803static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200804gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000805{
Victor Stinner5ff71322017-06-21 14:39:22 +0200806#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200807 FILETIME ft;
808 GetSystemTimeAsFileTime(&ft); /* never fails */
809 return ft.dwHighDateTime ^ ft.dwLowDateTime;
810#else
811 struct timeval tv;
812 int gettimeofday_res;
813
814 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200815
816#if defined(NDEBUG)
817 (void)gettimeofday_res;
818#else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200819 assert (gettimeofday_res == 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200820#endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200821
822 /* Microseconds time is <20 bits entropy */
823 return tv.tv_usec;
824#endif
825}
826
Victor Stinner93d0cb52017-08-18 23:43:54 +0200827#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
828
Victor Stinner5ff71322017-06-21 14:39:22 +0200829
830static unsigned long
831ENTROPY_DEBUG(const char * label, unsigned long entropy) {
832 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
833 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
834 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
835 label,
836 (int)sizeof(entropy) * 2, entropy,
837 (unsigned long)sizeof(entropy));
838 }
839 return entropy;
840}
841
Victor Stinner23ec4b52017-06-15 00:54:36 +0200842static unsigned long
843generate_hash_secret_salt(XML_Parser parser)
844{
Victor Stinner5ff71322017-06-21 14:39:22 +0200845 unsigned long entropy;
846 (void)parser;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700847
848 /* "Failproof" high quality providers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200849#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200850 arc4random_buf(&entropy, sizeof(entropy));
851 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200852#elif defined(HAVE_ARC4RANDOM)
853 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
854 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200855#else
856 /* Try high quality providers first .. */
857#ifdef _WIN32
858 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
859 return ENTROPY_DEBUG("RtlGenRandom", entropy);
860 }
861#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200862 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200863 return ENTROPY_DEBUG("getrandom", entropy);
864 }
865#endif
Victor Stinner93d0cb52017-08-18 23:43:54 +0200866#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
867 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
868 return ENTROPY_DEBUG("/dev/urandom", entropy);
869 }
870#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200871 /* .. and self-made low quality for backup: */
872
873 /* Process ID is 0 bits entropy if attacker has local access */
874 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200875
876 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
877 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200878 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200879 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200880 return ENTROPY_DEBUG("fallback(8)",
Victor Stinner93d0cb52017-08-18 23:43:54 +0200881 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200882 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200883#endif
884}
885
886static unsigned long
887get_hash_secret_salt(XML_Parser parser) {
888 if (parser->m_parentParser != NULL)
889 return get_hash_secret_salt(parser->m_parentParser);
890 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700891}
892
893static XML_Bool /* only valid for root parser */
894startParsing(XML_Parser parser)
895{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700896 /* hash functions must be initialized before setContext() is called */
Benjamin Peterson4e211002018-06-26 19:25:45 -0700897 if (parser->m_hash_secret_salt == 0)
898 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
899 if (parser->m_ns) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700900 /* implicit context only set for root parser, since child
901 parsers (i.e. external entity parsers) will inherit it
902 */
903 return setContext(parser, implicitContext);
904 }
905 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700906}
907
908XML_Parser XMLCALL
909XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700910 const XML_Memory_Handling_Suite *memsuite,
911 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700912{
913 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000914}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000915
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000916static XML_Parser
917parserCreate(const XML_Char *encodingName,
918 const XML_Memory_Handling_Suite *memsuite,
919 const XML_Char *nameSep,
920 DTD *dtd)
921{
922 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000923
924 if (memsuite) {
925 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000926 parser = (XML_Parser)
927 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
928 if (parser != NULL) {
929 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
930 mtemp->malloc_fcn = memsuite->malloc_fcn;
931 mtemp->realloc_fcn = memsuite->realloc_fcn;
932 mtemp->free_fcn = memsuite->free_fcn;
933 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000934 }
935 else {
936 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000937 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
938 if (parser != NULL) {
939 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
940 mtemp->malloc_fcn = malloc;
941 mtemp->realloc_fcn = realloc;
942 mtemp->free_fcn = free;
943 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000944 }
945
946 if (!parser)
947 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000948
Benjamin Peterson4e211002018-06-26 19:25:45 -0700949 parser->m_buffer = NULL;
950 parser->m_bufferLim = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000951
Benjamin Peterson4e211002018-06-26 19:25:45 -0700952 parser->m_attsSize = INIT_ATTS_SIZE;
953 parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
954 if (parser->m_atts == NULL) {
955 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000956 return NULL;
957 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700958#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700959 parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
960 if (parser->m_attInfo == NULL) {
961 FREE(parser, parser->m_atts);
962 FREE(parser, parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700963 return NULL;
964 }
965#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700966 parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
967 if (parser->m_dataBuf == NULL) {
968 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700969#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700970 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700971#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700972 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000973 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000974 }
Benjamin Peterson4e211002018-06-26 19:25:45 -0700975 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000976
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000977 if (dtd)
Benjamin Peterson4e211002018-06-26 19:25:45 -0700978 parser->m_dtd = dtd;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000979 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700980 parser->m_dtd = dtdCreate(&parser->m_mem);
981 if (parser->m_dtd == NULL) {
982 FREE(parser, parser->m_dataBuf);
983 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700984#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700985 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700986#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700987 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000988 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000989 }
990 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000991
Benjamin Peterson4e211002018-06-26 19:25:45 -0700992 parser->m_freeBindingList = NULL;
993 parser->m_freeTagList = NULL;
994 parser->m_freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000995
Benjamin Peterson4e211002018-06-26 19:25:45 -0700996 parser->m_groupSize = 0;
997 parser->m_groupConnector = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000998
Benjamin Peterson4e211002018-06-26 19:25:45 -0700999 parser->m_unknownEncodingHandler = NULL;
1000 parser->m_unknownEncodingHandlerData = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001001
Benjamin Peterson4e211002018-06-26 19:25:45 -07001002 parser->m_namespaceSeparator = ASCII_EXCL;
1003 parser->m_ns = XML_FALSE;
1004 parser->m_ns_triplets = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001005
Benjamin Peterson4e211002018-06-26 19:25:45 -07001006 parser->m_nsAtts = NULL;
1007 parser->m_nsAttsVersion = 0;
1008 parser->m_nsAttsPower = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00001009
Benjamin Peterson4e211002018-06-26 19:25:45 -07001010 parser->m_protocolEncodingName = NULL;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001011
Benjamin Peterson4e211002018-06-26 19:25:45 -07001012 poolInit(&parser->m_tempPool, &(parser->m_mem));
1013 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001014 parserInit(parser, encodingName);
1015
Benjamin Peterson4e211002018-06-26 19:25:45 -07001016 if (encodingName && !parser->m_protocolEncodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001017 XML_ParserFree(parser);
1018 return NULL;
1019 }
1020
1021 if (nameSep) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001022 parser->m_ns = XML_TRUE;
1023 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1024 parser->m_namespaceSeparator = *nameSep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001025 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001026 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001027 parser->m_internalEncoding = XmlGetInternalEncoding();
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001028 }
1029
1030 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001031}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001032
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001033static void
1034parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001035{
Benjamin Peterson4e211002018-06-26 19:25:45 -07001036 parser->m_processor = prologInitProcessor;
1037 XmlPrologStateInit(&parser->m_prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001038 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001039 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Victor Stinner93d0cb52017-08-18 23:43:54 +02001040 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001041 parser->m_curBase = NULL;
1042 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1043 parser->m_userData = NULL;
1044 parser->m_handlerArg = NULL;
1045 parser->m_startElementHandler = NULL;
1046 parser->m_endElementHandler = NULL;
1047 parser->m_characterDataHandler = NULL;
1048 parser->m_processingInstructionHandler = NULL;
1049 parser->m_commentHandler = NULL;
1050 parser->m_startCdataSectionHandler = NULL;
1051 parser->m_endCdataSectionHandler = NULL;
1052 parser->m_defaultHandler = NULL;
1053 parser->m_startDoctypeDeclHandler = NULL;
1054 parser->m_endDoctypeDeclHandler = NULL;
1055 parser->m_unparsedEntityDeclHandler = NULL;
1056 parser->m_notationDeclHandler = NULL;
1057 parser->m_startNamespaceDeclHandler = NULL;
1058 parser->m_endNamespaceDeclHandler = NULL;
1059 parser->m_notStandaloneHandler = NULL;
1060 parser->m_externalEntityRefHandler = NULL;
1061 parser->m_externalEntityRefHandlerArg = parser;
1062 parser->m_skippedEntityHandler = NULL;
1063 parser->m_elementDeclHandler = NULL;
1064 parser->m_attlistDeclHandler = NULL;
1065 parser->m_entityDeclHandler = NULL;
1066 parser->m_xmlDeclHandler = NULL;
1067 parser->m_bufferPtr = parser->m_buffer;
1068 parser->m_bufferEnd = parser->m_buffer;
1069 parser->m_parseEndByteIndex = 0;
1070 parser->m_parseEndPtr = NULL;
1071 parser->m_declElementType = NULL;
1072 parser->m_declAttributeId = NULL;
1073 parser->m_declEntity = NULL;
1074 parser->m_doctypeName = NULL;
1075 parser->m_doctypeSysid = NULL;
1076 parser->m_doctypePubid = NULL;
1077 parser->m_declAttributeType = NULL;
1078 parser->m_declNotationName = NULL;
1079 parser->m_declNotationPublicId = NULL;
1080 parser->m_declAttributeIsCdata = XML_FALSE;
1081 parser->m_declAttributeIsId = XML_FALSE;
1082 memset(&parser->m_position, 0, sizeof(POSITION));
1083 parser->m_errorCode = XML_ERROR_NONE;
1084 parser->m_eventPtr = NULL;
1085 parser->m_eventEndPtr = NULL;
1086 parser->m_positionPtr = NULL;
1087 parser->m_openInternalEntities = NULL;
1088 parser->m_defaultExpandInternalEntities = XML_TRUE;
1089 parser->m_tagLevel = 0;
1090 parser->m_tagStack = NULL;
1091 parser->m_inheritedBindings = NULL;
1092 parser->m_nSpecifiedAtts = 0;
1093 parser->m_unknownEncodingMem = NULL;
1094 parser->m_unknownEncodingRelease = NULL;
1095 parser->m_unknownEncodingData = NULL;
1096 parser->m_parentParser = NULL;
1097 parser->m_parsingStatus.parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001098#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001099 parser->m_isParamEntity = XML_FALSE;
1100 parser->m_useForeignDTD = XML_FALSE;
1101 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001102#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001103 parser->m_hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001104}
1105
Benjamin Peterson4e211002018-06-26 19:25:45 -07001106/* moves list of bindings to m_freeBindingList */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001107static void FASTCALL
1108moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1109{
1110 while (bindings) {
1111 BINDING *b = bindings;
1112 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001113 b->nextTagBinding = parser->m_freeBindingList;
1114 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001115 }
1116}
1117
Fred Drake08317ae2003-10-21 15:38:55 +00001118XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001119XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1120{
1121 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001122 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001123
1124 if (parser == NULL)
1125 return XML_FALSE;
1126
Benjamin Peterson4e211002018-06-26 19:25:45 -07001127 if (parser->m_parentParser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001128 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001129 /* move m_tagStack to m_freeTagList */
1130 tStk = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001131 while (tStk) {
1132 TAG *tag = tStk;
1133 tStk = tStk->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001134 tag->parent = parser->m_freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001135 moveToFreeBindingList(parser, tag->bindings);
1136 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001137 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001138 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001139 /* move m_openInternalEntities to m_freeInternalEntities */
1140 openEntityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001141 while (openEntityList) {
1142 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1143 openEntityList = openEntity->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001144 openEntity->next = parser->m_freeInternalEntities;
1145 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00001146 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001147 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1148 FREE(parser, parser->m_unknownEncodingMem);
1149 if (parser->m_unknownEncodingRelease)
1150 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1151 poolClear(&parser->m_tempPool);
1152 poolClear(&parser->m_temp2Pool);
1153 FREE(parser, (void *)parser->m_protocolEncodingName);
1154 parser->m_protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001155 parserInit(parser, encodingName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001156 dtdReset(parser->m_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001157 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001158}
1159
Fred Drake08317ae2003-10-21 15:38:55 +00001160enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001161XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1162{
Victor Stinner5ff71322017-06-21 14:39:22 +02001163 if (parser == NULL)
1164 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001165 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1166 XXX There's no way for the caller to determine which of the
1167 XXX possible error cases caused the XML_STATUS_ERROR return.
1168 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001169 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001170 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001171
1172 /* Get rid of any previous encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001173 FREE(parser, (void *)parser->m_protocolEncodingName);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001174
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001175 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001176 /* No new encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001177 parser->m_protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001178 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001179 /* Copy the new encoding name into allocated memory */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001180 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1181 if (!parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001182 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001183 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001184 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001185}
1186
Fred Drake08317ae2003-10-21 15:38:55 +00001187XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001188XML_ExternalEntityParserCreate(XML_Parser oldParser,
1189 const XML_Char *context,
1190 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001191{
1192 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001193 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001194 DTD *oldDtd;
1195 XML_StartElementHandler oldStartElementHandler;
1196 XML_EndElementHandler oldEndElementHandler;
1197 XML_CharacterDataHandler oldCharacterDataHandler;
1198 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1199 XML_CommentHandler oldCommentHandler;
1200 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1201 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1202 XML_DefaultHandler oldDefaultHandler;
1203 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1204 XML_NotationDeclHandler oldNotationDeclHandler;
1205 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1206 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1207 XML_NotStandaloneHandler oldNotStandaloneHandler;
1208 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1209 XML_SkippedEntityHandler oldSkippedEntityHandler;
1210 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1211 XML_ElementDeclHandler oldElementDeclHandler;
1212 XML_AttlistDeclHandler oldAttlistDeclHandler;
1213 XML_EntityDeclHandler oldEntityDeclHandler;
1214 XML_XmlDeclHandler oldXmlDeclHandler;
1215 ELEMENT_TYPE * oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001216
Victor Stinner5ff71322017-06-21 14:39:22 +02001217 void *oldUserData;
1218 void *oldHandlerArg;
1219 XML_Bool oldDefaultExpandInternalEntities;
1220 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001221#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001222 enum XML_ParamEntityParsing oldParamEntityParsing;
1223 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001224#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001225 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001226 /* Note that the new parser shares the same hash secret as the old
1227 parser, so that dtdCopy and copyEntityTable can lookup values
1228 from hash tables associated with either parser without us having
1229 to worry which hash secrets each table has.
1230 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001231 unsigned long oldhash_secret_salt;
1232
1233 /* Validate the oldParser parameter before we pull everything out of it */
1234 if (oldParser == NULL)
1235 return NULL;
1236
1237 /* Stash the original parser contents on the stack */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001238 oldDtd = parser->m_dtd;
1239 oldStartElementHandler = parser->m_startElementHandler;
1240 oldEndElementHandler = parser->m_endElementHandler;
1241 oldCharacterDataHandler = parser->m_characterDataHandler;
1242 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1243 oldCommentHandler = parser->m_commentHandler;
1244 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1245 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1246 oldDefaultHandler = parser->m_defaultHandler;
1247 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1248 oldNotationDeclHandler = parser->m_notationDeclHandler;
1249 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1250 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1251 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1252 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1253 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1254 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1255 oldElementDeclHandler = parser->m_elementDeclHandler;
1256 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1257 oldEntityDeclHandler = parser->m_entityDeclHandler;
1258 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1259 oldDeclElementType = parser->m_declElementType;
Victor Stinner5ff71322017-06-21 14:39:22 +02001260
Benjamin Peterson4e211002018-06-26 19:25:45 -07001261 oldUserData = parser->m_userData;
1262 oldHandlerArg = parser->m_handlerArg;
1263 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1264 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
Victor Stinner5ff71322017-06-21 14:39:22 +02001265#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001266 oldParamEntityParsing = parser->m_paramEntityParsing;
1267 oldInEntityValue = parser->m_prologState.inEntityValue;
Victor Stinner5ff71322017-06-21 14:39:22 +02001268#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001269 oldns_triplets = parser->m_ns_triplets;
Victor Stinner5ff71322017-06-21 14:39:22 +02001270 /* Note that the new parser shares the same hash secret as the old
1271 parser, so that dtdCopy and copyEntityTable can lookup values
1272 from hash tables associated with either parser without us having
1273 to worry which hash secrets each table has.
1274 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001275 oldhash_secret_salt = parser->m_hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001276
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001277#ifdef XML_DTD
1278 if (!context)
1279 newDtd = oldDtd;
1280#endif /* XML_DTD */
1281
1282 /* Note that the magical uses of the pre-processor to make field
1283 access look more like C++ require that `parser' be overwritten
1284 here. This makes this function more painful to follow than it
1285 would be otherwise.
1286 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001287 if (parser->m_ns) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001288 XML_Char tmp[2];
Benjamin Peterson4e211002018-06-26 19:25:45 -07001289 *tmp = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001290 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001291 }
1292 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001293 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001294 }
1295
1296 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001297 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001298
Benjamin Peterson4e211002018-06-26 19:25:45 -07001299 parser->m_startElementHandler = oldStartElementHandler;
1300 parser->m_endElementHandler = oldEndElementHandler;
1301 parser->m_characterDataHandler = oldCharacterDataHandler;
1302 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1303 parser->m_commentHandler = oldCommentHandler;
1304 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1305 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1306 parser->m_defaultHandler = oldDefaultHandler;
1307 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1308 parser->m_notationDeclHandler = oldNotationDeclHandler;
1309 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1310 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1311 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1312 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1313 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1314 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1315 parser->m_elementDeclHandler = oldElementDeclHandler;
1316 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1317 parser->m_entityDeclHandler = oldEntityDeclHandler;
1318 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1319 parser->m_declElementType = oldDeclElementType;
1320 parser->m_userData = oldUserData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001321 if (oldUserData == oldHandlerArg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001322 parser->m_handlerArg = parser->m_userData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001323 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001324 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001325 if (oldExternalEntityRefHandlerArg != oldParser)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001326 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1327 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1328 parser->m_ns_triplets = oldns_triplets;
1329 parser->m_hash_secret_salt = oldhash_secret_salt;
1330 parser->m_parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001331#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001332 parser->m_paramEntityParsing = oldParamEntityParsing;
1333 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001334 if (context) {
1335#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001336 if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001337 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001338 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001339 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001340 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001341 parser->m_processor = externalEntityInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001342#ifdef XML_DTD
1343 }
1344 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001345 /* The DTD instance referenced by parser->m_dtd is shared between the document's
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001346 root parser and external PE parsers, therefore one does not need to
1347 call setContext. In addition, one also *must* not call setContext,
1348 because this would overwrite existing prefix->binding pointers in
Benjamin Peterson4e211002018-06-26 19:25:45 -07001349 parser->m_dtd with ones that get destroyed with the external PE parser.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001350 This would leave those prefixes with dangling pointers.
1351 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001352 parser->m_isParamEntity = XML_TRUE;
1353 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1354 parser->m_processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001355 }
1356#endif /* XML_DTD */
1357 return parser;
1358}
1359
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001360static void FASTCALL
1361destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001362{
1363 for (;;) {
1364 BINDING *b = bindings;
1365 if (!b)
1366 break;
1367 bindings = b->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001368 FREE(parser, b->uri);
1369 FREE(parser, b);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001370 }
1371}
1372
Fred Drake08317ae2003-10-21 15:38:55 +00001373void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001374XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001375{
Fred Drake31d485c2004-08-03 07:06:22 +00001376 TAG *tagList;
1377 OPEN_INTERNAL_ENTITY *entityList;
1378 if (parser == NULL)
1379 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001380 /* free m_tagStack and m_freeTagList */
1381 tagList = parser->m_tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001382 for (;;) {
1383 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001384 if (tagList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001385 if (parser->m_freeTagList == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001386 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001387 tagList = parser->m_freeTagList;
1388 parser->m_freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001389 }
Fred Drake31d485c2004-08-03 07:06:22 +00001390 p = tagList;
1391 tagList = tagList->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001392 FREE(parser, p->buf);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001393 destroyBindings(p->bindings, parser);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001394 FREE(parser, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001395 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001396 /* free m_openInternalEntities and m_freeInternalEntities */
1397 entityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001398 for (;;) {
1399 OPEN_INTERNAL_ENTITY *openEntity;
1400 if (entityList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001401 if (parser->m_freeInternalEntities == NULL)
Fred Drake31d485c2004-08-03 07:06:22 +00001402 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001403 entityList = parser->m_freeInternalEntities;
1404 parser->m_freeInternalEntities = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001405 }
1406 openEntity = entityList;
1407 entityList = entityList->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001408 FREE(parser, openEntity);
Fred Drake31d485c2004-08-03 07:06:22 +00001409 }
1410
Benjamin Peterson4e211002018-06-26 19:25:45 -07001411 destroyBindings(parser->m_freeBindingList, parser);
1412 destroyBindings(parser->m_inheritedBindings, parser);
1413 poolDestroy(&parser->m_tempPool);
1414 poolDestroy(&parser->m_temp2Pool);
1415 FREE(parser, (void *)parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001416#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001417 /* external parameter entity parsers share the DTD structure
1418 parser->m_dtd with the root parser, so we must not destroy it
1419 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001420 if (!parser->m_isParamEntity && parser->m_dtd)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001421#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001422 if (parser->m_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001423#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001424 dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
1425 FREE(parser, (void *)parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001426#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001427 FREE(parser, (void *)parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001428#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001429 FREE(parser, parser->m_groupConnector);
1430 FREE(parser, parser->m_buffer);
1431 FREE(parser, parser->m_dataBuf);
1432 FREE(parser, parser->m_nsAtts);
1433 FREE(parser, parser->m_unknownEncodingMem);
1434 if (parser->m_unknownEncodingRelease)
1435 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1436 FREE(parser, parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001437}
1438
Fred Drake08317ae2003-10-21 15:38:55 +00001439void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001440XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001441{
Victor Stinner5ff71322017-06-21 14:39:22 +02001442 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001443 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001444}
1445
Fred Drake08317ae2003-10-21 15:38:55 +00001446enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001447XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1448{
Victor Stinner5ff71322017-06-21 14:39:22 +02001449 if (parser == NULL)
1450 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001451#ifdef XML_DTD
1452 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001453 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001454 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001455 parser->m_useForeignDTD = useDTD;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001456 return XML_ERROR_NONE;
1457#else
1458 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1459#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001460}
1461
Fred Drake08317ae2003-10-21 15:38:55 +00001462void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001463XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1464{
Victor Stinner5ff71322017-06-21 14:39:22 +02001465 if (parser == NULL)
1466 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001467 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001468 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001469 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001470 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001471}
1472
Fred Drake08317ae2003-10-21 15:38:55 +00001473void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001474XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001475{
Victor Stinner5ff71322017-06-21 14:39:22 +02001476 if (parser == NULL)
1477 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001478 if (parser->m_handlerArg == parser->m_userData)
1479 parser->m_handlerArg = parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001480 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001481 parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001482}
1483
Fred Drake08317ae2003-10-21 15:38:55 +00001484enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001485XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001486{
Victor Stinner5ff71322017-06-21 14:39:22 +02001487 if (parser == NULL)
1488 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001489 if (p) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001490 p = poolCopyString(&parser->m_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001491 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001492 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001493 parser->m_curBase = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001494 }
1495 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001496 parser->m_curBase = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001497 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001498}
1499
Fred Drake08317ae2003-10-21 15:38:55 +00001500const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001501XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001502{
Victor Stinner5ff71322017-06-21 14:39:22 +02001503 if (parser == NULL)
1504 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001505 return parser->m_curBase;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001506}
1507
Fred Drake08317ae2003-10-21 15:38:55 +00001508int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001509XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001510{
Victor Stinner5ff71322017-06-21 14:39:22 +02001511 if (parser == NULL)
1512 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001513 return parser->m_nSpecifiedAtts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001514}
1515
Fred Drake08317ae2003-10-21 15:38:55 +00001516int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001517XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001518{
Victor Stinner5ff71322017-06-21 14:39:22 +02001519 if (parser == NULL)
1520 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001521 return parser->m_idAttIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001522}
1523
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001524#ifdef XML_ATTR_INFO
1525const XML_AttrInfo * XMLCALL
1526XML_GetAttributeInfo(XML_Parser parser)
1527{
Victor Stinner5ff71322017-06-21 14:39:22 +02001528 if (parser == NULL)
1529 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001530 return parser->m_attInfo;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001531}
1532#endif
1533
Fred Drake08317ae2003-10-21 15:38:55 +00001534void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001535XML_SetElementHandler(XML_Parser parser,
1536 XML_StartElementHandler start,
1537 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001538{
Victor Stinner5ff71322017-06-21 14:39:22 +02001539 if (parser == NULL)
1540 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001541 parser->m_startElementHandler = start;
1542 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001543}
1544
Fred Drake08317ae2003-10-21 15:38:55 +00001545void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001546XML_SetStartElementHandler(XML_Parser parser,
1547 XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001548 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001549 parser->m_startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001550}
1551
Fred Drake08317ae2003-10-21 15:38:55 +00001552void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001553XML_SetEndElementHandler(XML_Parser parser,
1554 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001555 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001556 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001557}
1558
Fred Drake08317ae2003-10-21 15:38:55 +00001559void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001560XML_SetCharacterDataHandler(XML_Parser parser,
1561 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001562{
Victor Stinner5ff71322017-06-21 14:39:22 +02001563 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001564 parser->m_characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001565}
1566
Fred Drake08317ae2003-10-21 15:38:55 +00001567void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001568XML_SetProcessingInstructionHandler(XML_Parser parser,
1569 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001570{
Victor Stinner5ff71322017-06-21 14:39:22 +02001571 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001572 parser->m_processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001573}
1574
Fred Drake08317ae2003-10-21 15:38:55 +00001575void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001576XML_SetCommentHandler(XML_Parser parser,
1577 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001578{
Victor Stinner5ff71322017-06-21 14:39:22 +02001579 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001580 parser->m_commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001581}
1582
Fred Drake08317ae2003-10-21 15:38:55 +00001583void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001584XML_SetCdataSectionHandler(XML_Parser parser,
1585 XML_StartCdataSectionHandler start,
1586 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001587{
Victor Stinner5ff71322017-06-21 14:39:22 +02001588 if (parser == NULL)
1589 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001590 parser->m_startCdataSectionHandler = start;
1591 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001592}
1593
Fred Drake08317ae2003-10-21 15:38:55 +00001594void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001595XML_SetStartCdataSectionHandler(XML_Parser parser,
1596 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001597 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001598 parser->m_startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001599}
1600
Fred Drake08317ae2003-10-21 15:38:55 +00001601void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001602XML_SetEndCdataSectionHandler(XML_Parser parser,
1603 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001604 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001605 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001606}
1607
Fred Drake08317ae2003-10-21 15:38:55 +00001608void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001609XML_SetDefaultHandler(XML_Parser parser,
1610 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001611{
Victor Stinner5ff71322017-06-21 14:39:22 +02001612 if (parser == NULL)
1613 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001614 parser->m_defaultHandler = handler;
1615 parser->m_defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001616}
1617
Fred Drake08317ae2003-10-21 15:38:55 +00001618void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001619XML_SetDefaultHandlerExpand(XML_Parser parser,
1620 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001621{
Victor Stinner5ff71322017-06-21 14:39:22 +02001622 if (parser == NULL)
1623 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001624 parser->m_defaultHandler = handler;
1625 parser->m_defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001626}
1627
Fred Drake08317ae2003-10-21 15:38:55 +00001628void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001629XML_SetDoctypeDeclHandler(XML_Parser parser,
1630 XML_StartDoctypeDeclHandler start,
1631 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001632{
Victor Stinner5ff71322017-06-21 14:39:22 +02001633 if (parser == NULL)
1634 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001635 parser->m_startDoctypeDeclHandler = start;
1636 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001637}
1638
Fred Drake08317ae2003-10-21 15:38:55 +00001639void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001640XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1641 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001642 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001643 parser->m_startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001644}
1645
Fred Drake08317ae2003-10-21 15:38:55 +00001646void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001647XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1648 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001649 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001650 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001651}
1652
Fred Drake08317ae2003-10-21 15:38:55 +00001653void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001654XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1655 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001656{
Victor Stinner5ff71322017-06-21 14:39:22 +02001657 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001658 parser->m_unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001659}
1660
Fred Drake08317ae2003-10-21 15:38:55 +00001661void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001662XML_SetNotationDeclHandler(XML_Parser parser,
1663 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001664{
Victor Stinner5ff71322017-06-21 14:39:22 +02001665 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001666 parser->m_notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001667}
1668
Fred Drake08317ae2003-10-21 15:38:55 +00001669void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001670XML_SetNamespaceDeclHandler(XML_Parser parser,
1671 XML_StartNamespaceDeclHandler start,
1672 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001673{
Victor Stinner5ff71322017-06-21 14:39:22 +02001674 if (parser == NULL)
1675 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001676 parser->m_startNamespaceDeclHandler = start;
1677 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001678}
1679
Fred Drake08317ae2003-10-21 15:38:55 +00001680void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001681XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1682 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001683 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001684 parser->m_startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001685}
1686
Fred Drake08317ae2003-10-21 15:38:55 +00001687void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001688XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1689 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001690 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001691 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001692}
1693
Fred Drake08317ae2003-10-21 15:38:55 +00001694void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001695XML_SetNotStandaloneHandler(XML_Parser parser,
1696 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001697{
Victor Stinner5ff71322017-06-21 14:39:22 +02001698 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001699 parser->m_notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001700}
1701
Fred Drake08317ae2003-10-21 15:38:55 +00001702void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001703XML_SetExternalEntityRefHandler(XML_Parser parser,
1704 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001705{
Victor Stinner5ff71322017-06-21 14:39:22 +02001706 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001707 parser->m_externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001708}
1709
Fred Drake08317ae2003-10-21 15:38:55 +00001710void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001711XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001712{
Victor Stinner5ff71322017-06-21 14:39:22 +02001713 if (parser == NULL)
1714 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001715 if (arg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001716 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001717 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001718 parser->m_externalEntityRefHandlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001719}
1720
Fred Drake08317ae2003-10-21 15:38:55 +00001721void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001722XML_SetSkippedEntityHandler(XML_Parser parser,
1723 XML_SkippedEntityHandler handler)
1724{
Victor Stinner5ff71322017-06-21 14:39:22 +02001725 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001726 parser->m_skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001727}
1728
Fred Drake08317ae2003-10-21 15:38:55 +00001729void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001730XML_SetUnknownEncodingHandler(XML_Parser parser,
1731 XML_UnknownEncodingHandler handler,
1732 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001733{
Victor Stinner5ff71322017-06-21 14:39:22 +02001734 if (parser == NULL)
1735 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001736 parser->m_unknownEncodingHandler = handler;
1737 parser->m_unknownEncodingHandlerData = data;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001738}
1739
Fred Drake08317ae2003-10-21 15:38:55 +00001740void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001741XML_SetElementDeclHandler(XML_Parser parser,
1742 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001743{
Victor Stinner5ff71322017-06-21 14:39:22 +02001744 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001745 parser->m_elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001746}
1747
Fred Drake08317ae2003-10-21 15:38:55 +00001748void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001749XML_SetAttlistDeclHandler(XML_Parser parser,
1750 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001751{
Victor Stinner5ff71322017-06-21 14:39:22 +02001752 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001753 parser->m_attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001754}
1755
Fred Drake08317ae2003-10-21 15:38:55 +00001756void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001757XML_SetEntityDeclHandler(XML_Parser parser,
1758 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001759{
Victor Stinner5ff71322017-06-21 14:39:22 +02001760 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001761 parser->m_entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001762}
1763
Fred Drake08317ae2003-10-21 15:38:55 +00001764void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001765XML_SetXmlDeclHandler(XML_Parser parser,
1766 XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001767 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001768 parser->m_xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001769}
1770
Fred Drake08317ae2003-10-21 15:38:55 +00001771int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001772XML_SetParamEntityParsing(XML_Parser parser,
1773 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001774{
Victor Stinner5ff71322017-06-21 14:39:22 +02001775 if (parser == NULL)
1776 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001777 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001778 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001779 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001780#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001781 parser->m_paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001782 return 1;
1783#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001784 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001785#endif
1786}
1787
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001788int XMLCALL
1789XML_SetHashSalt(XML_Parser parser,
1790 unsigned long hash_salt)
1791{
Victor Stinner5ff71322017-06-21 14:39:22 +02001792 if (parser == NULL)
1793 return 0;
1794 if (parser->m_parentParser)
1795 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001796 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001797 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001798 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001799 parser->m_hash_secret_salt = hash_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001800 return 1;
1801}
1802
Fred Drake08317ae2003-10-21 15:38:55 +00001803enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001804XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001805{
Victor Stinner5ff71322017-06-21 14:39:22 +02001806 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001807 if (parser != NULL)
1808 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001809 return XML_STATUS_ERROR;
1810 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001811 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001812 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001813 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001814 return XML_STATUS_ERROR;
1815 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001816 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001817 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001818 case XML_INITIALIZED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001819 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1820 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001821 return XML_STATUS_ERROR;
1822 }
Fred Drake31d485c2004-08-03 07:06:22 +00001823 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001824 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001825 }
1826
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001827 if (len == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001828 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001829 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001830 return XML_STATUS_OK;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001831 parser->m_positionPtr = parser->m_bufferPtr;
1832 parser->m_parseEndPtr = parser->m_bufferEnd;
Fred Drake31d485c2004-08-03 07:06:22 +00001833
1834 /* If data are left over from last buffer, and we now know that these
1835 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001836 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001837 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001838 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001839
Benjamin Peterson4e211002018-06-26 19:25:45 -07001840 if (parser->m_errorCode == XML_ERROR_NONE) {
1841 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001842 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001843 /* It is hard to be certain, but it seems that this case
1844 * cannot occur. This code is cleaning up a previous parse
1845 * with no new data (since len == 0). Changing the parsing
1846 * state requires getting to execute a handler function, and
1847 * there doesn't seem to be an opportunity for that while in
1848 * this circumstance.
1849 *
1850 * Given the uncertainty, we retain the code but exclude it
1851 * from coverage tests.
1852 *
1853 * LCOV_EXCL_START
1854 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001855 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
1856 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001857 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001858 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001859 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001860 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001861 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001862 /* fall through */
1863 default:
1864 return XML_STATUS_OK;
1865 }
1866 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001867 parser->m_eventEndPtr = parser->m_eventPtr;
1868 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001869 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001870 }
1871#ifndef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07001872 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001873 const char *end;
1874 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001875 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001876 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001877 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1878 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1879 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1880 parser->m_processor = errorProcessor;
Victor Stinner5ff71322017-06-21 14:39:22 +02001881 return XML_STATUS_ERROR;
1882 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001883 parser->m_parseEndByteIndex += len;
1884 parser->m_positionPtr = s;
1885 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001886
Benjamin Peterson4e211002018-06-26 19:25:45 -07001887 parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001888
Benjamin Peterson4e211002018-06-26 19:25:45 -07001889 if (parser->m_errorCode != XML_ERROR_NONE) {
1890 parser->m_eventEndPtr = parser->m_eventPtr;
1891 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001892 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001893 }
Fred Drake31d485c2004-08-03 07:06:22 +00001894 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001895 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001896 case XML_SUSPENDED:
1897 result = XML_STATUS_SUSPENDED;
1898 break;
1899 case XML_INITIALIZED:
1900 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001901 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001902 parser->m_parsingStatus.parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001903 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001904 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001905 /* fall through */
1906 default:
1907 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001908 }
1909 }
1910
Benjamin Peterson4e211002018-06-26 19:25:45 -07001911 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001912 nLeftOver = s + len - end;
1913 if (nLeftOver) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001914 if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001915 /* avoid _signed_ integer overflow */
1916 char *temp = NULL;
1917 const int bytesToAllocate = (int)((unsigned)len * 2U);
1918 if (bytesToAllocate > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001919 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
Victor Stinner5ff71322017-06-21 14:39:22 +02001920 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001921 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001922 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1923 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1924 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001925 return XML_STATUS_ERROR;
1926 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001927 parser->m_buffer = temp;
1928 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001929 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001930 memcpy(parser->m_buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001931 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001932 parser->m_bufferPtr = parser->m_buffer;
1933 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1934 parser->m_positionPtr = parser->m_bufferPtr;
1935 parser->m_parseEndPtr = parser->m_bufferEnd;
1936 parser->m_eventPtr = parser->m_bufferPtr;
1937 parser->m_eventEndPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001938 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001939 }
1940#endif /* not defined XML_CONTEXT_BYTES */
1941 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001942 void *buff = XML_GetBuffer(parser, len);
1943 if (buff == NULL)
1944 return XML_STATUS_ERROR;
1945 else {
1946 memcpy(buff, s, len);
1947 return XML_ParseBuffer(parser, len, isFinal);
1948 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001949 }
1950}
1951
Fred Drake08317ae2003-10-21 15:38:55 +00001952enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001953XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001954{
Fred Drake31d485c2004-08-03 07:06:22 +00001955 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001956 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001957
Victor Stinner5ff71322017-06-21 14:39:22 +02001958 if (parser == NULL)
1959 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001960 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001961 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001962 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001963 return XML_STATUS_ERROR;
1964 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001965 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001966 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001967 case XML_INITIALIZED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001968 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1969 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001970 return XML_STATUS_ERROR;
1971 }
Fred Drake31d485c2004-08-03 07:06:22 +00001972 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001973 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001974 }
1975
Benjamin Peterson4e211002018-06-26 19:25:45 -07001976 start = parser->m_bufferPtr;
1977 parser->m_positionPtr = start;
1978 parser->m_bufferEnd += len;
1979 parser->m_parseEndPtr = parser->m_bufferEnd;
1980 parser->m_parseEndByteIndex += len;
1981 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001982
Benjamin Peterson4e211002018-06-26 19:25:45 -07001983 parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001984
Benjamin Peterson4e211002018-06-26 19:25:45 -07001985 if (parser->m_errorCode != XML_ERROR_NONE) {
1986 parser->m_eventEndPtr = parser->m_eventPtr;
1987 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001988 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001989 }
Fred Drake31d485c2004-08-03 07:06:22 +00001990 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001991 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001992 case XML_SUSPENDED:
1993 result = XML_STATUS_SUSPENDED;
1994 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001995 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001996 case XML_PARSING:
1997 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001998 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001999 return result;
2000 }
2001 default: ; /* should not happen */
2002 }
2003 }
2004
Benjamin Peterson4e211002018-06-26 19:25:45 -07002005 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2006 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002007 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002008}
2009
Fred Drake08317ae2003-10-21 15:38:55 +00002010void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002011XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002012{
Victor Stinner5ff71322017-06-21 14:39:22 +02002013 if (parser == NULL)
2014 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002015 if (len < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002016 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002017 return NULL;
2018 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002019 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002020 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002021 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002022 return NULL;
2023 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002024 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002025 return NULL;
2026 default: ;
2027 }
2028
Benjamin Peterson4e211002018-06-26 19:25:45 -07002029 if (len > parser->m_bufferLim - parser->m_bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002030#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002031 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002032#endif /* defined XML_CONTEXT_BYTES */
2033 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002034 int neededSize = (int) ((unsigned)len + (unsigned)(parser->m_bufferEnd - parser->m_bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002035 if (neededSize < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002036 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002037 return NULL;
2038 }
2039#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002040 keep = (int)(parser->m_bufferPtr - parser->m_buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002041 if (keep > XML_CONTEXT_BYTES)
2042 keep = XML_CONTEXT_BYTES;
2043 neededSize += keep;
2044#endif /* defined XML_CONTEXT_BYTES */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002045 if (neededSize <= parser->m_bufferLim - parser->m_buffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002046#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002047 if (keep < parser->m_bufferPtr - parser->m_buffer) {
2048 int offset = (int)(parser->m_bufferPtr - parser->m_buffer) - keep;
2049 memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2050 parser->m_bufferEnd -= offset;
2051 parser->m_bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002052 }
2053#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002054 memmove(parser->m_buffer, parser->m_bufferPtr, parser->m_bufferEnd - parser->m_bufferPtr);
2055 parser->m_bufferEnd = parser->m_buffer + (parser->m_bufferEnd - parser->m_bufferPtr);
2056 parser->m_bufferPtr = parser->m_buffer;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002057#endif /* not defined XML_CONTEXT_BYTES */
2058 }
2059 else {
2060 char *newBuf;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002061 int bufferSize = (int)(parser->m_bufferLim - parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002062 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002063 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002064 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02002065 /* Do not invoke signed arithmetic overflow: */
2066 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002067 } while (bufferSize < neededSize && bufferSize > 0);
2068 if (bufferSize <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002069 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002070 return NULL;
2071 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002072 newBuf = (char *)MALLOC(parser, bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002073 if (newBuf == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002074 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002075 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002076 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002077 parser->m_bufferLim = newBuf + bufferSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002078#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002079 if (parser->m_bufferPtr) {
2080 int keep = (int)(parser->m_bufferPtr - parser->m_buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002081 if (keep > XML_CONTEXT_BYTES)
2082 keep = XML_CONTEXT_BYTES;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002083 memcpy(newBuf, &parser->m_bufferPtr[-keep], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2084 FREE(parser, parser->m_buffer);
2085 parser->m_buffer = newBuf;
2086 parser->m_bufferEnd = parser->m_buffer + (parser->m_bufferEnd - parser->m_bufferPtr) + keep;
2087 parser->m_bufferPtr = parser->m_buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002088 }
2089 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002090 parser->m_bufferEnd = newBuf + (parser->m_bufferEnd - parser->m_bufferPtr);
2091 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002092 }
2093#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002094 if (parser->m_bufferPtr) {
2095 memcpy(newBuf, parser->m_bufferPtr, parser->m_bufferEnd - parser->m_bufferPtr);
2096 FREE(parser, parser->m_buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002097 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002098 parser->m_bufferEnd = newBuf + (parser->m_bufferEnd - parser->m_bufferPtr);
2099 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002100#endif /* not defined XML_CONTEXT_BYTES */
2101 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002102 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2103 parser->m_positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002104 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002105 return parser->m_bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002106}
2107
Fred Drake31d485c2004-08-03 07:06:22 +00002108enum XML_Status XMLCALL
2109XML_StopParser(XML_Parser parser, XML_Bool resumable)
2110{
Victor Stinner5ff71322017-06-21 14:39:22 +02002111 if (parser == NULL)
2112 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002113 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002114 case XML_SUSPENDED:
2115 if (resumable) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002116 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002117 return XML_STATUS_ERROR;
2118 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002119 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002120 break;
2121 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002122 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002123 return XML_STATUS_ERROR;
2124 default:
2125 if (resumable) {
2126#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07002127 if (parser->m_isParamEntity) {
2128 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
Fred Drake31d485c2004-08-03 07:06:22 +00002129 return XML_STATUS_ERROR;
2130 }
2131#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07002132 parser->m_parsingStatus.parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002133 }
2134 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002135 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002136 }
2137 return XML_STATUS_OK;
2138}
2139
2140enum XML_Status XMLCALL
2141XML_ResumeParser(XML_Parser parser)
2142{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002143 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002144
Victor Stinner5ff71322017-06-21 14:39:22 +02002145 if (parser == NULL)
2146 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002147 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2148 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002149 return XML_STATUS_ERROR;
2150 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002151 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002152
Benjamin Peterson4e211002018-06-26 19:25:45 -07002153 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00002154
Benjamin Peterson4e211002018-06-26 19:25:45 -07002155 if (parser->m_errorCode != XML_ERROR_NONE) {
2156 parser->m_eventEndPtr = parser->m_eventPtr;
2157 parser->m_processor = errorProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00002158 return XML_STATUS_ERROR;
2159 }
2160 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002161 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002162 case XML_SUSPENDED:
2163 result = XML_STATUS_SUSPENDED;
2164 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002165 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002166 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002167 if (parser->m_parsingStatus.finalBuffer) {
2168 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002169 return result;
2170 }
2171 default: ;
2172 }
2173 }
2174
Benjamin Peterson4e211002018-06-26 19:25:45 -07002175 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2176 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002177 return result;
2178}
2179
2180void XMLCALL
2181XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2182{
Victor Stinner5ff71322017-06-21 14:39:22 +02002183 if (parser == NULL)
2184 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002185 assert(status != NULL);
2186 *status = parser->m_parsingStatus;
2187}
2188
Fred Drake08317ae2003-10-21 15:38:55 +00002189enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002190XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002191{
Victor Stinner5ff71322017-06-21 14:39:22 +02002192 if (parser == NULL)
2193 return XML_ERROR_INVALID_ARGUMENT;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002194 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002195}
2196
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002197XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002198XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002199{
Victor Stinner5ff71322017-06-21 14:39:22 +02002200 if (parser == NULL)
2201 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002202 if (parser->m_eventPtr)
2203 return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002204 return -1;
2205}
2206
Fred Drake08317ae2003-10-21 15:38:55 +00002207int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002208XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002209{
Victor Stinner5ff71322017-06-21 14:39:22 +02002210 if (parser == NULL)
2211 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002212 if (parser->m_eventEndPtr && parser->m_eventPtr)
2213 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002214 return 0;
2215}
2216
Fred Drake08317ae2003-10-21 15:38:55 +00002217const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002218XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002219{
2220#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002221 if (parser == NULL)
2222 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002223 if (parser->m_eventPtr && parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002224 if (offset != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002225 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
Victor Stinner5ff71322017-06-21 14:39:22 +02002226 if (size != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002227 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2228 return parser->m_buffer;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002229 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002230#else
2231 (void)parser;
2232 (void)offset;
2233 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002234#endif /* defined XML_CONTEXT_BYTES */
2235 return (char *) 0;
2236}
2237
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002238XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002239XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002240{
Victor Stinner5ff71322017-06-21 14:39:22 +02002241 if (parser == NULL)
2242 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002243 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2244 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2245 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002246 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002247 return parser->m_position.lineNumber + 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002248}
2249
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002250XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002251XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002252{
Victor Stinner5ff71322017-06-21 14:39:22 +02002253 if (parser == NULL)
2254 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002255 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2256 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2257 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002258 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002259 return parser->m_position.columnNumber;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002260}
2261
Fred Drake08317ae2003-10-21 15:38:55 +00002262void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002263XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2264{
Victor Stinner5ff71322017-06-21 14:39:22 +02002265 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002266 FREE(parser, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002267}
2268
Fred Drake08317ae2003-10-21 15:38:55 +00002269void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002270XML_MemMalloc(XML_Parser parser, size_t size)
2271{
Victor Stinner5ff71322017-06-21 14:39:22 +02002272 if (parser == NULL)
2273 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002274 return MALLOC(parser, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002275}
2276
Fred Drake08317ae2003-10-21 15:38:55 +00002277void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002278XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2279{
Victor Stinner5ff71322017-06-21 14:39:22 +02002280 if (parser == NULL)
2281 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002282 return REALLOC(parser, ptr, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002283}
2284
Fred Drake08317ae2003-10-21 15:38:55 +00002285void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002286XML_MemFree(XML_Parser parser, void *ptr)
2287{
Victor Stinner5ff71322017-06-21 14:39:22 +02002288 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002289 FREE(parser, ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002290}
2291
Fred Drake08317ae2003-10-21 15:38:55 +00002292void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002293XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002294{
Victor Stinner5ff71322017-06-21 14:39:22 +02002295 if (parser == NULL)
2296 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002297 if (parser->m_defaultHandler) {
2298 if (parser->m_openInternalEntities)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002299 reportDefault(parser,
Benjamin Peterson4e211002018-06-26 19:25:45 -07002300 parser->m_internalEncoding,
2301 parser->m_openInternalEntities->internalEventPtr,
2302 parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002303 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002304 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002305 }
2306}
2307
Fred Drake08317ae2003-10-21 15:38:55 +00002308const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002309XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002310{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002311 switch (code) {
2312 case XML_ERROR_NONE:
2313 return NULL;
2314 case XML_ERROR_NO_MEMORY:
2315 return XML_L("out of memory");
2316 case XML_ERROR_SYNTAX:
2317 return XML_L("syntax error");
2318 case XML_ERROR_NO_ELEMENTS:
2319 return XML_L("no element found");
2320 case XML_ERROR_INVALID_TOKEN:
2321 return XML_L("not well-formed (invalid token)");
2322 case XML_ERROR_UNCLOSED_TOKEN:
2323 return XML_L("unclosed token");
2324 case XML_ERROR_PARTIAL_CHAR:
2325 return XML_L("partial character");
2326 case XML_ERROR_TAG_MISMATCH:
2327 return XML_L("mismatched tag");
2328 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2329 return XML_L("duplicate attribute");
2330 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2331 return XML_L("junk after document element");
2332 case XML_ERROR_PARAM_ENTITY_REF:
2333 return XML_L("illegal parameter entity reference");
2334 case XML_ERROR_UNDEFINED_ENTITY:
2335 return XML_L("undefined entity");
2336 case XML_ERROR_RECURSIVE_ENTITY_REF:
2337 return XML_L("recursive entity reference");
2338 case XML_ERROR_ASYNC_ENTITY:
2339 return XML_L("asynchronous entity");
2340 case XML_ERROR_BAD_CHAR_REF:
2341 return XML_L("reference to invalid character number");
2342 case XML_ERROR_BINARY_ENTITY_REF:
2343 return XML_L("reference to binary entity");
2344 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2345 return XML_L("reference to external entity in attribute");
2346 case XML_ERROR_MISPLACED_XML_PI:
2347 return XML_L("XML or text declaration not at start of entity");
2348 case XML_ERROR_UNKNOWN_ENCODING:
2349 return XML_L("unknown encoding");
2350 case XML_ERROR_INCORRECT_ENCODING:
2351 return XML_L("encoding specified in XML declaration is incorrect");
2352 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2353 return XML_L("unclosed CDATA section");
2354 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2355 return XML_L("error in processing external entity reference");
2356 case XML_ERROR_NOT_STANDALONE:
2357 return XML_L("document is not standalone");
2358 case XML_ERROR_UNEXPECTED_STATE:
2359 return XML_L("unexpected parser state - please send a bug report");
2360 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2361 return XML_L("entity declared in parameter entity");
2362 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2363 return XML_L("requested feature requires XML_DTD support in Expat");
2364 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2365 return XML_L("cannot change setting once parsing has begun");
2366 /* Added in 1.95.7. */
2367 case XML_ERROR_UNBOUND_PREFIX:
2368 return XML_L("unbound prefix");
2369 /* Added in 1.95.8. */
2370 case XML_ERROR_UNDECLARING_PREFIX:
2371 return XML_L("must not undeclare prefix");
2372 case XML_ERROR_INCOMPLETE_PE:
2373 return XML_L("incomplete markup in parameter entity");
2374 case XML_ERROR_XML_DECL:
2375 return XML_L("XML declaration not well-formed");
2376 case XML_ERROR_TEXT_DECL:
2377 return XML_L("text declaration not well-formed");
2378 case XML_ERROR_PUBLICID:
2379 return XML_L("illegal character(s) in public id");
2380 case XML_ERROR_SUSPENDED:
2381 return XML_L("parser suspended");
2382 case XML_ERROR_NOT_SUSPENDED:
2383 return XML_L("parser not suspended");
2384 case XML_ERROR_ABORTED:
2385 return XML_L("parsing aborted");
2386 case XML_ERROR_FINISHED:
2387 return XML_L("parsing finished");
2388 case XML_ERROR_SUSPEND_PE:
2389 return XML_L("cannot suspend in external parameter entity");
2390 /* Added in 2.0.0. */
2391 case XML_ERROR_RESERVED_PREFIX_XML:
2392 return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name");
2393 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2394 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2395 case XML_ERROR_RESERVED_NAMESPACE_URI:
2396 return XML_L("prefix must not be bound to one of the reserved namespace names");
2397 /* Added in 2.2.5. */
2398 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2399 return XML_L("invalid argument");
2400 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002401 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002402}
2403
Fred Drake08317ae2003-10-21 15:38:55 +00002404const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002405XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002406
2407 /* V1 is used to string-ize the version number. However, it would
2408 string-ize the actual version macro *names* unless we get them
2409 substituted before being passed to V1. CPP is defined to expand
2410 a macro, then rescan for more expansions. Thus, we use V2 to expand
2411 the version macros, then CPP will expand the resulting V1() macro
2412 with the correct numerals. */
2413 /* ### I'm assuming cpp is portable in this respect... */
2414
2415#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2416#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2417
2418 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2419
2420#undef V1
2421#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002422}
2423
Fred Drake08317ae2003-10-21 15:38:55 +00002424XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002425XML_ExpatVersionInfo(void)
2426{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002427 XML_Expat_Version version;
2428
2429 version.major = XML_MAJOR_VERSION;
2430 version.minor = XML_MINOR_VERSION;
2431 version.micro = XML_MICRO_VERSION;
2432
2433 return version;
2434}
2435
Fred Drake08317ae2003-10-21 15:38:55 +00002436const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002437XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002438{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002439 static const XML_Feature features[] = {
2440 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2441 sizeof(XML_Char)},
2442 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2443 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002444#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002445 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002446#endif
2447#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002448 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002449#endif
2450#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002451 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002452#endif
2453#ifdef XML_CONTEXT_BYTES
2454 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2455 XML_CONTEXT_BYTES},
2456#endif
2457#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002458 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002459#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002460#ifdef XML_NS
2461 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2462#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002463#ifdef XML_LARGE_SIZE
2464 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2465#endif
2466#ifdef XML_ATTR_INFO
2467 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2468#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002469 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002470 };
2471
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002472 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002473}
2474
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002475/* Initially tag->rawName always points into the parse buffer;
2476 for those TAG instances opened while the current parse buffer was
2477 processed, and not yet closed, we need to store tag->rawName in a more
2478 permanent location, since the parse buffer is about to be discarded.
2479*/
2480static XML_Bool
2481storeRawNames(XML_Parser parser)
2482{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002483 TAG *tag = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002484 while (tag) {
2485 int bufSize;
2486 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2487 char *rawNameBuf = tag->buf + nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002488 /* Stop if already stored. Since m_tagStack is a stack, we can stop
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002489 at the first entry that has already been copied; everything
2490 below it in the stack is already been accounted for in a
2491 previous call to this function.
2492 */
2493 if (tag->rawName == rawNameBuf)
2494 break;
2495 /* For re-use purposes we need to ensure that the
2496 size of tag->buf is a multiple of sizeof(XML_Char).
2497 */
2498 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2499 if (bufSize > tag->bufEnd - tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002500 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002501 if (temp == NULL)
2502 return XML_FALSE;
2503 /* if tag->name.str points to tag->buf (only when namespace
2504 processing is off) then we have to update it
2505 */
2506 if (tag->name.str == (XML_Char *)tag->buf)
2507 tag->name.str = (XML_Char *)temp;
2508 /* if tag->name.localPart is set (when namespace processing is on)
2509 then update it as well, since it will always point into tag->buf
2510 */
2511 if (tag->name.localPart)
2512 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2513 (XML_Char *)tag->buf);
2514 tag->buf = temp;
2515 tag->bufEnd = temp + bufSize;
2516 rawNameBuf = temp + nameLen;
2517 }
2518 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2519 tag->rawName = rawNameBuf;
2520 tag = tag->parent;
2521 }
2522 return XML_TRUE;
2523}
2524
2525static enum XML_Error PTRCALL
2526contentProcessor(XML_Parser parser,
2527 const char *start,
2528 const char *end,
2529 const char **endPtr)
2530{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002531 enum XML_Error result = doContent(parser, 0, parser->m_encoding, start, end,
2532 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002533 if (result == XML_ERROR_NONE) {
2534 if (!storeRawNames(parser))
2535 return XML_ERROR_NO_MEMORY;
2536 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002537 return result;
2538}
2539
2540static enum XML_Error PTRCALL
2541externalEntityInitProcessor(XML_Parser parser,
2542 const char *start,
2543 const char *end,
2544 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002545{
2546 enum XML_Error result = initializeEncoding(parser);
2547 if (result != XML_ERROR_NONE)
2548 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002549 parser->m_processor = externalEntityInitProcessor2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002550 return externalEntityInitProcessor2(parser, start, end, endPtr);
2551}
2552
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002553static enum XML_Error PTRCALL
2554externalEntityInitProcessor2(XML_Parser parser,
2555 const char *start,
2556 const char *end,
2557 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002558{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002559 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002560 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002561 switch (tok) {
2562 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002563 /* If we are at the end of the buffer, this would cause the next stage,
2564 i.e. externalEntityInitProcessor3, to pass control directly to
2565 doContent (by detecting XML_TOK_NONE) without processing any xml text
2566 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2567 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002568 if (next == end && !parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002569 *endPtr = next;
2570 return XML_ERROR_NONE;
2571 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002572 start = next;
2573 break;
2574 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002575 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002576 *endPtr = start;
2577 return XML_ERROR_NONE;
2578 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002579 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002580 return XML_ERROR_UNCLOSED_TOKEN;
2581 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002582 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002583 *endPtr = start;
2584 return XML_ERROR_NONE;
2585 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002586 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002587 return XML_ERROR_PARTIAL_CHAR;
2588 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002589 parser->m_processor = externalEntityInitProcessor3;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002590 return externalEntityInitProcessor3(parser, start, end, endPtr);
2591}
2592
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002593static enum XML_Error PTRCALL
2594externalEntityInitProcessor3(XML_Parser parser,
2595 const char *start,
2596 const char *end,
2597 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002598{
Fred Drake31d485c2004-08-03 07:06:22 +00002599 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002600 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002601 parser->m_eventPtr = start;
2602 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2603 parser->m_eventEndPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00002604
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002605 switch (tok) {
2606 case XML_TOK_XML_DECL:
2607 {
Fred Drake31d485c2004-08-03 07:06:22 +00002608 enum XML_Error result;
2609 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002610 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002611 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002612 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002613 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002614 *endPtr = next;
2615 return XML_ERROR_NONE;
2616 case XML_FINISHED:
2617 return XML_ERROR_ABORTED;
2618 default:
2619 start = next;
2620 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002621 }
2622 break;
2623 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002624 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002625 *endPtr = start;
2626 return XML_ERROR_NONE;
2627 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002628 return XML_ERROR_UNCLOSED_TOKEN;
2629 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002630 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002631 *endPtr = start;
2632 return XML_ERROR_NONE;
2633 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002634 return XML_ERROR_PARTIAL_CHAR;
2635 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002636 parser->m_processor = externalEntityContentProcessor;
2637 parser->m_tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002638 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002639}
2640
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002641static enum XML_Error PTRCALL
2642externalEntityContentProcessor(XML_Parser parser,
2643 const char *start,
2644 const char *end,
2645 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002646{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002647 enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end,
2648 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002649 if (result == XML_ERROR_NONE) {
2650 if (!storeRawNames(parser))
2651 return XML_ERROR_NO_MEMORY;
2652 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002653 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002654}
2655
2656static enum XML_Error
2657doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002658 int startTagLevel,
2659 const ENCODING *enc,
2660 const char *s,
2661 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002662 const char **nextPtr,
2663 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002664{
Fred Drake31d485c2004-08-03 07:06:22 +00002665 /* save one level of indirection */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002666 DTD * const dtd = parser->m_dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002667
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002668 const char **eventPP;
2669 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002670 if (enc == parser->m_encoding) {
2671 eventPP = &parser->m_eventPtr;
2672 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002673 }
2674 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002675 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2676 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002677 }
2678 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002679
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002680 for (;;) {
2681 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2682 int tok = XmlContentTok(enc, s, end, &next);
2683 *eventEndPP = next;
2684 switch (tok) {
2685 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002686 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002687 *nextPtr = s;
2688 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002689 }
2690 *eventEndPP = end;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002691 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002692 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002693 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002694 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002695 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002696 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002697 /* We are at the end of the final buffer, should we check for
2698 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002699 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002700 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002701 return XML_ERROR_NO_ELEMENTS;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002702 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002703 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002704 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002705 return XML_ERROR_NONE;
2706 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002707 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002708 *nextPtr = s;
2709 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002710 }
2711 if (startTagLevel > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002712 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002713 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002714 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002715 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002716 }
2717 return XML_ERROR_NO_ELEMENTS;
2718 case XML_TOK_INVALID:
2719 *eventPP = next;
2720 return XML_ERROR_INVALID_TOKEN;
2721 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002722 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002723 *nextPtr = s;
2724 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002725 }
2726 return XML_ERROR_UNCLOSED_TOKEN;
2727 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002728 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002729 *nextPtr = s;
2730 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002731 }
2732 return XML_ERROR_PARTIAL_CHAR;
2733 case XML_TOK_ENTITY_REF:
2734 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002735 const XML_Char *name;
2736 ENTITY *entity;
2737 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2738 s + enc->minBytesPerChar,
2739 next - enc->minBytesPerChar);
2740 if (ch) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002741 if (parser->m_characterDataHandler)
2742 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2743 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002744 reportDefault(parser, enc, s, next);
2745 break;
2746 }
2747 name = poolStoreString(&dtd->pool, enc,
2748 s + enc->minBytesPerChar,
2749 next - enc->minBytesPerChar);
2750 if (!name)
2751 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002752 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002753 poolDiscard(&dtd->pool);
2754 /* First, determine if a check for an existing declaration is needed;
2755 if yes, check that the entity exists, and that it is internal,
2756 otherwise call the skipped entity or default handler.
2757 */
2758 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2759 if (!entity)
2760 return XML_ERROR_UNDEFINED_ENTITY;
2761 else if (!entity->is_internal)
2762 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2763 }
2764 else if (!entity) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002765 if (parser->m_skippedEntityHandler)
2766 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2767 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002768 reportDefault(parser, enc, s, next);
2769 break;
2770 }
2771 if (entity->open)
2772 return XML_ERROR_RECURSIVE_ENTITY_REF;
2773 if (entity->notation)
2774 return XML_ERROR_BINARY_ENTITY_REF;
2775 if (entity->textPtr) {
2776 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002777 if (!parser->m_defaultExpandInternalEntities) {
2778 if (parser->m_skippedEntityHandler)
2779 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
2780 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002781 reportDefault(parser, enc, s, next);
2782 break;
2783 }
Fred Drake31d485c2004-08-03 07:06:22 +00002784 result = processInternalEntity(parser, entity, XML_FALSE);
2785 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002786 return result;
2787 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002788 else if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002789 const XML_Char *context;
2790 entity->open = XML_TRUE;
2791 context = getContext(parser);
2792 entity->open = XML_FALSE;
2793 if (!context)
2794 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002795 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002796 context,
2797 entity->base,
2798 entity->systemId,
2799 entity->publicId))
2800 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002801 poolDiscard(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002802 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002803 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002804 reportDefault(parser, enc, s, next);
2805 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002806 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002807 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002808 /* fall through */
2809 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002810 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002811 TAG *tag;
2812 enum XML_Error result;
2813 XML_Char *toPtr;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002814 if (parser->m_freeTagList) {
2815 tag = parser->m_freeTagList;
2816 parser->m_freeTagList = parser->m_freeTagList->parent;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002817 }
2818 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002819 tag = (TAG *)MALLOC(parser, sizeof(TAG));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002820 if (!tag)
2821 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002822 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002823 if (!tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002824 FREE(parser, tag);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002825 return XML_ERROR_NO_MEMORY;
2826 }
2827 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2828 }
2829 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002830 tag->parent = parser->m_tagStack;
2831 parser->m_tagStack = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002832 tag->name.localPart = NULL;
2833 tag->name.prefix = NULL;
2834 tag->rawName = s + enc->minBytesPerChar;
2835 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002836 ++parser->m_tagLevel;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002837 {
2838 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2839 const char *fromPtr = tag->rawName;
2840 toPtr = (XML_Char *)tag->buf;
2841 for (;;) {
2842 int bufSize;
2843 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002844 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002845 &fromPtr, rawNameEnd,
2846 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002847 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner5ff71322017-06-21 14:39:22 +02002848 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002849 tag->name.strLen = convLen;
2850 break;
2851 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002852 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002853 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002854 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002855 if (temp == NULL)
2856 return XML_ERROR_NO_MEMORY;
2857 tag->buf = temp;
2858 tag->bufEnd = temp + bufSize;
2859 toPtr = (XML_Char *)temp + convLen;
2860 }
2861 }
2862 }
2863 tag->name.str = (XML_Char *)tag->buf;
2864 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002865 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2866 if (result)
2867 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002868 if (parser->m_startElementHandler)
2869 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2870 (const XML_Char **)parser->m_atts);
2871 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002872 reportDefault(parser, enc, s, next);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002873 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002874 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002875 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002876 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002877 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002878 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2879 {
2880 const char *rawName = s + enc->minBytesPerChar;
2881 enum XML_Error result;
2882 BINDING *bindings = NULL;
2883 XML_Bool noElmHandlers = XML_TRUE;
2884 TAG_NAME name;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002885 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002886 rawName + XmlNameLength(enc, rawName));
2887 if (!name.str)
2888 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002889 poolFinish(&parser->m_tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002890 result = storeAtts(parser, enc, s, &name, &bindings);
Victor Stinner5ff71322017-06-21 14:39:22 +02002891 if (result != XML_ERROR_NONE) {
2892 freeBindings(parser, bindings);
Fred Drake4faea012003-01-28 06:42:40 +00002893 return result;
Victor Stinner5ff71322017-06-21 14:39:22 +02002894 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002895 poolFinish(&parser->m_tempPool);
2896 if (parser->m_startElementHandler) {
2897 parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002898 noElmHandlers = XML_FALSE;
2899 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002900 if (parser->m_endElementHandler) {
2901 if (parser->m_startElementHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002902 *eventPP = *eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002903 parser->m_endElementHandler(parser->m_handlerArg, name.str);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002904 noElmHandlers = XML_FALSE;
2905 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002906 if (noElmHandlers && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002907 reportDefault(parser, enc, s, next);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002908 poolClear(&parser->m_tempPool);
Victor Stinner5ff71322017-06-21 14:39:22 +02002909 freeBindings(parser, bindings);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002910 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002911 if ((parser->m_tagLevel == 0) &&
2912 !((parser->m_parsingStatus.parsing == XML_FINISHED) || (parser->m_parsingStatus.parsing == XML_SUSPENDED))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002913 return epilogProcessor(parser, next, end, nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002914 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002915 break;
2916 case XML_TOK_END_TAG:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002917 if (parser->m_tagLevel == startTagLevel)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002918 return XML_ERROR_ASYNC_ENTITY;
2919 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002920 int len;
2921 const char *rawName;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002922 TAG *tag = parser->m_tagStack;
2923 parser->m_tagStack = tag->parent;
2924 tag->parent = parser->m_freeTagList;
2925 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002926 rawName = s + enc->minBytesPerChar*2;
2927 len = XmlNameLength(enc, rawName);
2928 if (len != tag->rawNameLength
2929 || memcmp(tag->rawName, rawName, len) != 0) {
2930 *eventPP = rawName;
2931 return XML_ERROR_TAG_MISMATCH;
2932 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002933 --parser->m_tagLevel;
2934 if (parser->m_endElementHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002935 const XML_Char *localPart;
2936 const XML_Char *prefix;
2937 XML_Char *uri;
2938 localPart = tag->name.localPart;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002939 if (parser->m_ns && localPart) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002940 /* localPart and prefix may have been overwritten in
2941 tag->name.str, since this points to the binding->uri
2942 buffer which gets re-used; so we have to add them again
2943 */
2944 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2945 /* don't need to check for space - already done in storeAtts() */
2946 while (*localPart) *uri++ = *localPart++;
2947 prefix = (XML_Char *)tag->name.prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002948 if (parser->m_ns_triplets && prefix) {
2949 *uri++ = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002950 while (*prefix) *uri++ = *prefix++;
2951 }
2952 *uri = XML_T('\0');
2953 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002954 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002955 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002956 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002957 reportDefault(parser, enc, s, next);
2958 while (tag->bindings) {
2959 BINDING *b = tag->bindings;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002960 if (parser->m_endNamespaceDeclHandler)
2961 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002962 tag->bindings = tag->bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002963 b->nextTagBinding = parser->m_freeBindingList;
2964 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002965 b->prefix->binding = b->prevPrefixBinding;
2966 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002967 if (parser->m_tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002968 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002969 }
2970 break;
2971 case XML_TOK_CHAR_REF:
2972 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002973 int n = XmlCharRefNumber(enc, s);
2974 if (n < 0)
2975 return XML_ERROR_BAD_CHAR_REF;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002976 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002977 XML_Char buf[XML_ENCODE_MAX];
Benjamin Peterson4e211002018-06-26 19:25:45 -07002978 parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002979 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002980 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002981 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002982 }
2983 break;
2984 case XML_TOK_XML_DECL:
2985 return XML_ERROR_MISPLACED_XML_PI;
2986 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002987 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002988 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002989 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002990 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002991 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002992 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002993 break;
2994 case XML_TOK_CDATA_SECT_OPEN:
2995 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002996 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002997 if (parser->m_startCdataSectionHandler)
2998 parser->m_startCdataSectionHandler(parser->m_handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002999#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003000 /* Suppose you doing a transformation on a document that involves
3001 changing only the character data. You set up a defaultHandler
3002 and a characterDataHandler. The defaultHandler simply copies
3003 characters through. The characterDataHandler does the
3004 transformation and writes the characters out escaping them as
3005 necessary. This case will fail to work if we leave out the
3006 following two lines (because & and < inside CDATA sections will
3007 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003008
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003009 However, now we have a start/endCdataSectionHandler, so it seems
3010 easier to let the user deal with this.
3011 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003012 else if (parser->m_characterDataHandler)
3013 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003014#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003015 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003016 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00003017 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3018 if (result != XML_ERROR_NONE)
3019 return result;
3020 else if (!next) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003021 parser->m_processor = cdataSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003022 return result;
3023 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003024 }
3025 break;
3026 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00003027 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003028 *nextPtr = s;
3029 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003030 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003031 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003032 if (MUST_CONVERT(enc, s)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003033 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3034 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3035 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3036 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003037 }
3038 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003039 parser->m_characterDataHandler(parser->m_handlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003040 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003041 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003042 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003043 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003044 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003045 /* We are at the end of the final buffer, should we check for
3046 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00003047 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003048 if (startTagLevel == 0) {
3049 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003050 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003051 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003052 if (parser->m_tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003053 *eventPP = end;
3054 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003055 }
Fred Drake31d485c2004-08-03 07:06:22 +00003056 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003057 return XML_ERROR_NONE;
3058 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003059 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003060 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003061 if (charDataHandler) {
3062 if (MUST_CONVERT(enc, s)) {
3063 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003064 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3065 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003066 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003067 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3068 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003069 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003070 break;
3071 *eventPP = s;
3072 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003073 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003074 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003075 charDataHandler(parser->m_handlerArg,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003076 (XML_Char *)s,
3077 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003078 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003079 else if (parser->m_defaultHandler)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003080 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003081 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003082 break;
3083 case XML_TOK_PI:
3084 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003085 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003086 break;
3087 case XML_TOK_COMMENT:
3088 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003089 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003090 break;
3091 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003092 /* All of the tokens produced by XmlContentTok() have their own
3093 * explicit cases, so this default is not strictly necessary.
3094 * However it is a useful safety net, so we retain the code and
3095 * simply exclude it from the coverage tests.
3096 *
3097 * LCOV_EXCL_START
3098 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003099 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003100 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003101 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003102 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003103 }
3104 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003105 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003106 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003107 *nextPtr = next;
3108 return XML_ERROR_NONE;
3109 case XML_FINISHED:
3110 return XML_ERROR_ABORTED;
3111 default: ;
3112 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003113 }
3114 /* not reached */
3115}
3116
Victor Stinner5ff71322017-06-21 14:39:22 +02003117/* This function does not call free() on the allocated memory, merely
Benjamin Peterson4e211002018-06-26 19:25:45 -07003118 * moving it to the parser's m_freeBindingList where it can be freed or
Victor Stinner5ff71322017-06-21 14:39:22 +02003119 * reused as appropriate.
3120 */
3121static void
3122freeBindings(XML_Parser parser, BINDING *bindings)
3123{
3124 while (bindings) {
3125 BINDING *b = bindings;
3126
Benjamin Peterson4e211002018-06-26 19:25:45 -07003127 /* m_startNamespaceDeclHandler will have been called for this
Victor Stinner5ff71322017-06-21 14:39:22 +02003128 * binding in addBindings(), so call the end handler now.
3129 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003130 if (parser->m_endNamespaceDeclHandler)
3131 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Victor Stinner5ff71322017-06-21 14:39:22 +02003132
3133 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003134 b->nextTagBinding = parser->m_freeBindingList;
3135 parser->m_freeBindingList = b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003136 b->prefix->binding = b->prevPrefixBinding;
3137 }
3138}
3139
Fred Drake4faea012003-01-28 06:42:40 +00003140/* Precondition: all arguments must be non-NULL;
3141 Purpose:
3142 - normalize attributes
3143 - check attributes for well-formedness
3144 - generate namespace aware attribute names (URI, prefix)
3145 - build list of attributes for startElementHandler
3146 - default attributes
3147 - process namespace declarations (check and report them)
3148 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003149*/
3150static enum XML_Error
3151storeAtts(XML_Parser parser, const ENCODING *enc,
3152 const char *attStr, TAG_NAME *tagNamePtr,
3153 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003154{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003155 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003156 ELEMENT_TYPE *elementType;
3157 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003158 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003159 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003160 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003161 int i;
3162 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003163 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003164 int nPrefixes = 0;
3165 BINDING *binding;
3166 const XML_Char *localPart;
3167
3168 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003169 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00003170 if (!elementType) {
3171 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3172 if (!name)
3173 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003174 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003175 sizeof(ELEMENT_TYPE));
3176 if (!elementType)
3177 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003178 if (parser->m_ns && !setElementTypePrefix(parser, elementType))
Fred Drake4faea012003-01-28 06:42:40 +00003179 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003180 }
Fred Drake4faea012003-01-28 06:42:40 +00003181 nDefaultAtts = elementType->nDefaultAtts;
3182
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003183 /* get the attributes from the tokenizer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003184 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3185 if (n + nDefaultAtts > parser->m_attsSize) {
3186 int oldAttsSize = parser->m_attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003187 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003188#ifdef XML_ATTR_INFO
3189 XML_AttrInfo *temp2;
3190#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003191 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3192 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003193 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003194 parser->m_attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003195 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003196 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003197 parser->m_atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003198#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003199 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003200 if (temp2 == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003201 parser->m_attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003202 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003203 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003204 parser->m_attInfo = temp2;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003205#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003206 if (n > oldAttsSize)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003207 XmlGetAttributes(enc, attStr, n, parser->m_atts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003208 }
Fred Drake4faea012003-01-28 06:42:40 +00003209
Benjamin Peterson4e211002018-06-26 19:25:45 -07003210 appAtts = (const XML_Char **)parser->m_atts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003211 for (i = 0; i < n; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003212 ATTRIBUTE *currAtt = &parser->m_atts[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003213#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003214 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003215#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003216 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003217 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3218 currAtt->name
3219 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003220 if (!attId)
3221 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003222#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003223 currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003224 currAttInfo->nameEnd = currAttInfo->nameStart +
3225 XmlNameLength(enc, currAtt->name);
Benjamin Peterson4e211002018-06-26 19:25:45 -07003226 currAttInfo->valueStart = parser->m_parseEndByteIndex -
3227 (parser->m_parseEndPtr - currAtt->valuePtr);
3228 currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003229#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003230 /* Detect duplicate attributes by their QNames. This does not work when
3231 namespace processing is turned on and different prefixes for the same
3232 namespace are used. For this case we have a check further down.
3233 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003234 if ((attId->name)[-1]) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003235 if (enc == parser->m_encoding)
3236 parser->m_eventPtr = parser->m_atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003237 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3238 }
3239 (attId->name)[-1] = 1;
3240 appAtts[attIndex++] = attId->name;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003241 if (!parser->m_atts[i].normalized) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003242 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003243 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003244
3245 /* figure out whether declared as other than CDATA */
3246 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003247 int j;
3248 for (j = 0; j < nDefaultAtts; j++) {
3249 if (attId == elementType->defaultAtts[j].id) {
3250 isCdata = elementType->defaultAtts[j].isCdata;
3251 break;
3252 }
3253 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003254 }
3255
3256 /* normalize the attribute value */
3257 result = storeAttributeValue(parser, enc, isCdata,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003258 parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd,
3259 &parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003260 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003261 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003262 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3263 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003264 }
Fred Drake4faea012003-01-28 06:42:40 +00003265 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003266 /* the value did not need normalizing */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003267 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr,
3268 parser->m_atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003269 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003270 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003271 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003272 }
3273 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003274 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003275 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003276 /* deal with namespace declarations here */
3277 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3278 appAtts[attIndex], bindingsPtr);
3279 if (result)
3280 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003281 --attIndex;
3282 }
3283 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003284 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003285 attIndex++;
3286 nPrefixes++;
3287 (attId->name)[-1] = 2;
3288 }
3289 }
3290 else
3291 attIndex++;
3292 }
Fred Drake4faea012003-01-28 06:42:40 +00003293
3294 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003295 parser->m_nSpecifiedAtts = attIndex;
Fred Drake4faea012003-01-28 06:42:40 +00003296 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3297 for (i = 0; i < attIndex; i += 2)
3298 if (appAtts[i] == elementType->idAtt->name) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003299 parser->m_idAttIndex = i;
Fred Drake4faea012003-01-28 06:42:40 +00003300 break;
3301 }
3302 }
3303 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003304 parser->m_idAttIndex = -1;
Fred Drake4faea012003-01-28 06:42:40 +00003305
3306 /* do attribute defaulting */
3307 for (i = 0; i < nDefaultAtts; i++) {
3308 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3309 if (!(da->id->name)[-1] && da->value) {
3310 if (da->id->prefix) {
3311 if (da->id->xmlns) {
3312 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3313 da->value, bindingsPtr);
3314 if (result)
3315 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003316 }
3317 else {
Fred Drake4faea012003-01-28 06:42:40 +00003318 (da->id->name)[-1] = 2;
3319 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003320 appAtts[attIndex++] = da->id->name;
3321 appAtts[attIndex++] = da->value;
3322 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003323 }
Fred Drake4faea012003-01-28 06:42:40 +00003324 else {
3325 (da->id->name)[-1] = 1;
3326 appAtts[attIndex++] = da->id->name;
3327 appAtts[attIndex++] = da->value;
3328 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003329 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003330 }
Fred Drake4faea012003-01-28 06:42:40 +00003331 appAtts[attIndex] = 0;
3332
Fred Drake08317ae2003-10-21 15:38:55 +00003333 /* expand prefixed attribute names, check for duplicates,
3334 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003335 i = 0;
3336 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00003337 int j; /* hash table index */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003338 unsigned long version = parser->m_nsAttsVersion;
3339 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3340 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003341 /* size of hash table must be at least 2 * (# of prefixed attributes) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003342 if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
Fred Drake08317ae2003-10-21 15:38:55 +00003343 NS_ATT *temp;
3344 /* hash table size must also be a power of 2 and >= 8 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003345 while (nPrefixes >> parser->m_nsAttsPower++);
3346 if (parser->m_nsAttsPower < 3)
3347 parser->m_nsAttsPower = 3;
3348 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3349 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003350 if (!temp) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003351 /* Restore actual size of memory in m_nsAtts */
3352 parser->m_nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003353 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003354 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003355 parser->m_nsAtts = temp;
3356 version = 0; /* force re-initialization of m_nsAtts hash table */
Fred Drake08317ae2003-10-21 15:38:55 +00003357 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003358 /* using a version flag saves us from initializing m_nsAtts every time */
Fred Drake08317ae2003-10-21 15:38:55 +00003359 if (!version) { /* initialize version flags when version wraps around */
3360 version = INIT_ATTS_VERSION;
3361 for (j = nsAttsSize; j != 0; )
Benjamin Peterson4e211002018-06-26 19:25:45 -07003362 parser->m_nsAtts[--j].version = version;
Fred Drake08317ae2003-10-21 15:38:55 +00003363 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003364 parser->m_nsAttsVersion = --version;
Fred Drake08317ae2003-10-21 15:38:55 +00003365
3366 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003367 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003368 const XML_Char *s = appAtts[i];
3369 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003370 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003371 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003372 unsigned long uriHash;
3373 struct siphash sip_state;
3374 struct sipkey sip_key;
3375
3376 copy_salt_to_sipkey(parser, &sip_key);
3377 sip24_init(&sip_state, &sip_key);
3378
Fred Drake08317ae2003-10-21 15:38:55 +00003379 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003380 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003381 if (!id || !id->prefix) {
3382 /* This code is walking through the appAtts array, dealing
3383 * with (in this case) a prefixed attribute name. To be in
3384 * the array, the attribute must have already been bound, so
3385 * has to have passed through the hash table lookup once
3386 * already. That implies that an entry for it already
3387 * exists, so the lookup above will return a pointer to
3388 * already allocated memory. There is no opportunaity for
3389 * the allocator to fail, so the condition above cannot be
3390 * fulfilled.
3391 *
3392 * Since it is difficult to be certain that the above
3393 * analysis is complete, we retain the test and merely
3394 * remove the code from coverage tests.
3395 */
3396 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3397 }
Fred Drake08317ae2003-10-21 15:38:55 +00003398 b = id->prefix->binding;
3399 if (!b)
3400 return XML_ERROR_UNBOUND_PREFIX;
3401
Fred Drake08317ae2003-10-21 15:38:55 +00003402 for (j = 0; j < b->uriLen; j++) {
3403 const XML_Char c = b->uri[j];
Benjamin Peterson4e211002018-06-26 19:25:45 -07003404 if (!poolAppendChar(&parser->m_tempPool, c))
Fred Drake08317ae2003-10-21 15:38:55 +00003405 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003406 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003407
3408 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3409
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003410 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003411 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003412
3413 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3414
Fred Drake08317ae2003-10-21 15:38:55 +00003415 do { /* copies null terminator */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003416 if (!poolAppendChar(&parser->m_tempPool, *s))
Fred Drake08317ae2003-10-21 15:38:55 +00003417 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003418 } while (*s++);
3419
Victor Stinner5ff71322017-06-21 14:39:22 +02003420 uriHash = (unsigned long)sip24_final(&sip_state);
3421
Fred Drake08317ae2003-10-21 15:38:55 +00003422 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003423 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003424 */
3425 unsigned char step = 0;
3426 unsigned long mask = nsAttsSize - 1;
3427 j = uriHash & mask; /* index into hash table */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003428 while (parser->m_nsAtts[j].version == version) {
Fred Drake08317ae2003-10-21 15:38:55 +00003429 /* for speed we compare stored hash values first */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003430 if (uriHash == parser->m_nsAtts[j].hash) {
3431 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3432 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
Fred Drake08317ae2003-10-21 15:38:55 +00003433 /* s1 is null terminated, but not s2 */
3434 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3435 if (*s1 == 0)
3436 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3437 }
3438 if (!step)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003439 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003440 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003441 }
Fred Drake08317ae2003-10-21 15:38:55 +00003442 }
3443
Benjamin Peterson4e211002018-06-26 19:25:45 -07003444 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3445 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
Fred Drake08317ae2003-10-21 15:38:55 +00003446 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003447 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003448 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003449 return XML_ERROR_NO_MEMORY;
3450 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003451 }
Fred Drake08317ae2003-10-21 15:38:55 +00003452
3453 /* store expanded name in attribute list */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003454 s = poolStart(&parser->m_tempPool);
3455 poolFinish(&parser->m_tempPool);
Fred Drake08317ae2003-10-21 15:38:55 +00003456 appAtts[i] = s;
3457
3458 /* fill empty slot with new version, uriName and hash value */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003459 parser->m_nsAtts[j].version = version;
3460 parser->m_nsAtts[j].hash = uriHash;
3461 parser->m_nsAtts[j].uriName = s;
Fred Drake08317ae2003-10-21 15:38:55 +00003462
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003463 if (!--nPrefixes) {
3464 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003465 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003466 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003467 }
Fred Drake08317ae2003-10-21 15:38:55 +00003468 else /* not prefixed */
3469 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003470 }
3471 }
Fred Drake08317ae2003-10-21 15:38:55 +00003472 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003473 for (; i < attIndex; i += 2)
3474 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003475 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3476 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003477
Benjamin Peterson4e211002018-06-26 19:25:45 -07003478 if (!parser->m_ns)
Fred Drake08317ae2003-10-21 15:38:55 +00003479 return XML_ERROR_NONE;
3480
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003481 /* expand the element type name */
3482 if (elementType->prefix) {
3483 binding = elementType->prefix->binding;
3484 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003485 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003486 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003487 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003488 ;
3489 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003490 else if (dtd->defaultPrefix.binding) {
3491 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003492 localPart = tagNamePtr->str;
3493 }
3494 else
3495 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003496 prefixLen = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003497 if (parser->m_ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003498 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003499 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003500 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003501 tagNamePtr->localPart = localPart;
3502 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003503 tagNamePtr->prefix = binding->prefix->name;
3504 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003505 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003506 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003507 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003508 if (n > binding->uriAlloc) {
3509 TAG *p;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003510 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003511 if (!uri)
3512 return XML_ERROR_NO_MEMORY;
3513 binding->uriAlloc = n + EXPAND_SPARE;
3514 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003515 for (p = parser->m_tagStack; p; p = p->parent)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003516 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003517 p->name.str = uri;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003518 FREE(parser, binding->uri);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003519 binding->uri = uri;
3520 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003521 /* if m_namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003522 uri = binding->uri + binding->uriLen;
3523 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003524 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003525 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003526 uri += i - 1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003527 *uri = parser->m_namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003528 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3529 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003530 tagNamePtr->str = binding->uri;
3531 return XML_ERROR_NONE;
3532}
3533
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003534/* addBinding() overwrites the value of prefix->binding without checking.
3535 Therefore one must keep track of the old value outside of addBinding().
3536*/
3537static enum XML_Error
3538addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3539 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003540{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003541 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003542 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3543 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3544 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3545 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3546 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3547 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003548 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003549 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003550 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3551 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003552 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3553 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3554 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3555 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3556 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003557 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003558 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003559 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3560
3561 XML_Bool mustBeXML = XML_FALSE;
3562 XML_Bool isXML = XML_TRUE;
3563 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003564
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003565 BINDING *b;
3566 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003567
Fred Drake31d485c2004-08-03 07:06:22 +00003568 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003569 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003570 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003571
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003572 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003573 && prefix->name[0] == XML_T(ASCII_x)
3574 && prefix->name[1] == XML_T(ASCII_m)
3575 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003576
3577 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003578 if (prefix->name[3] == XML_T(ASCII_n)
3579 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003580 && prefix->name[5] == XML_T('\0'))
3581 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3582
3583 if (prefix->name[3] == XML_T('\0'))
3584 mustBeXML = XML_TRUE;
3585 }
3586
3587 for (len = 0; uri[len]; len++) {
3588 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3589 isXML = XML_FALSE;
3590
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003591 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003592 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3593 isXMLNS = XML_FALSE;
3594 }
3595 isXML = isXML && len == xmlLen;
3596 isXMLNS = isXMLNS && len == xmlnsLen;
3597
3598 if (mustBeXML != isXML)
3599 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3600 : XML_ERROR_RESERVED_NAMESPACE_URI;
3601
3602 if (isXMLNS)
3603 return XML_ERROR_RESERVED_NAMESPACE_URI;
3604
Benjamin Peterson4e211002018-06-26 19:25:45 -07003605 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003606 len++;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003607 if (parser->m_freeBindingList) {
3608 b = parser->m_freeBindingList;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003609 if (len > b->uriAlloc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003610 XML_Char *temp = (XML_Char *)REALLOC(parser, b->uri,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003611 sizeof(XML_Char) * (len + EXPAND_SPARE));
3612 if (temp == NULL)
3613 return XML_ERROR_NO_MEMORY;
3614 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003615 b->uriAlloc = len + EXPAND_SPARE;
3616 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003617 parser->m_freeBindingList = b->nextTagBinding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003618 }
3619 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003620 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003621 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003622 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003623 b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003624 if (!b->uri) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003625 FREE(parser, b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003626 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003627 }
3628 b->uriAlloc = len + EXPAND_SPARE;
3629 }
3630 b->uriLen = len;
3631 memcpy(b->uri, uri, len * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003632 if (parser->m_namespaceSeparator)
3633 b->uri[len - 1] = parser->m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003634 b->prefix = prefix;
3635 b->attId = attId;
3636 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003637 /* NULL binding when default namespace undeclared */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003638 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003639 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003640 else
3641 prefix->binding = b;
3642 b->nextTagBinding = *bindingsPtr;
3643 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003644 /* if attId == NULL then we are not starting a namespace scope */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003645 if (attId && parser->m_startNamespaceDeclHandler)
3646 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003647 prefix->binding ? uri : 0);
3648 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003649}
3650
3651/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003652 the whole file is parsed with one call.
3653*/
3654static enum XML_Error PTRCALL
3655cdataSectionProcessor(XML_Parser parser,
3656 const char *start,
3657 const char *end,
3658 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003659{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003660 enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end,
3661 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003662 if (result != XML_ERROR_NONE)
3663 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003664 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003665 if (parser->m_parentParser) { /* we are parsing an external entity */
3666 parser->m_processor = externalEntityContentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003667 return externalEntityContentProcessor(parser, start, end, endPtr);
3668 }
3669 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003670 parser->m_processor = contentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003671 return contentProcessor(parser, start, end, endPtr);
3672 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003673 }
3674 return result;
3675}
3676
Fred Drake31d485c2004-08-03 07:06:22 +00003677/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003678 the section is not yet closed.
3679*/
3680static enum XML_Error
3681doCdataSection(XML_Parser parser,
3682 const ENCODING *enc,
3683 const char **startPtr,
3684 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003685 const char **nextPtr,
3686 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003687{
3688 const char *s = *startPtr;
3689 const char **eventPP;
3690 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003691 if (enc == parser->m_encoding) {
3692 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003693 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003694 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003695 }
3696 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003697 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3698 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003699 }
3700 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003701 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003702
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003703 for (;;) {
3704 const char *next;
3705 int tok = XmlCdataSectionTok(enc, s, end, &next);
3706 *eventEndPP = next;
3707 switch (tok) {
3708 case XML_TOK_CDATA_SECT_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003709 if (parser->m_endCdataSectionHandler)
3710 parser->m_endCdataSectionHandler(parser->m_handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003711#if 0
3712 /* see comment under XML_TOK_CDATA_SECT_OPEN */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003713 else if (parser->m_characterDataHandler)
3714 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003715#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003716 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003717 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003718 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003719 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003720 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003721 return XML_ERROR_ABORTED;
3722 else
3723 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003724 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003725 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003726 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003727 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003728 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003729 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003730 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003731 break;
3732 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003733 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003734 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003735 if (charDataHandler) {
3736 if (MUST_CONVERT(enc, s)) {
3737 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003738 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3739 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003740 *eventEndPP = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003741 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3742 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003743 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003744 break;
3745 *eventPP = s;
3746 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003747 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003748 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003749 charDataHandler(parser->m_handlerArg,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003750 (XML_Char *)s,
3751 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003752 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003753 else if (parser->m_defaultHandler)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003754 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003755 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003756 break;
3757 case XML_TOK_INVALID:
3758 *eventPP = next;
3759 return XML_ERROR_INVALID_TOKEN;
3760 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003761 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003762 *nextPtr = s;
3763 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003764 }
3765 return XML_ERROR_PARTIAL_CHAR;
3766 case XML_TOK_PARTIAL:
3767 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003768 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003769 *nextPtr = s;
3770 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003771 }
3772 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3773 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003774 /* Every token returned by XmlCdataSectionTok() has its own
3775 * explicit case, so this default case will never be executed.
3776 * We retain it as a safety net and exclude it from the coverage
3777 * statistics.
3778 *
3779 * LCOV_EXCL_START
3780 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003781 *eventPP = next;
3782 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003783 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003784 }
Fred Drake31d485c2004-08-03 07:06:22 +00003785
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003786 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003787 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003788 case XML_SUSPENDED:
3789 *nextPtr = next;
3790 return XML_ERROR_NONE;
3791 case XML_FINISHED:
3792 return XML_ERROR_ABORTED;
3793 default: ;
3794 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003795 }
3796 /* not reached */
3797}
3798
3799#ifdef XML_DTD
3800
3801/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003802 the whole file is parsed with one call.
3803*/
3804static enum XML_Error PTRCALL
3805ignoreSectionProcessor(XML_Parser parser,
3806 const char *start,
3807 const char *end,
3808 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003809{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003810 enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end,
3811 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003812 if (result != XML_ERROR_NONE)
3813 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003814 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003815 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003816 return prologProcessor(parser, start, end, endPtr);
3817 }
3818 return result;
3819}
3820
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003821/* startPtr gets set to non-null is the section is closed, and to null
3822 if the section is not yet closed.
3823*/
3824static enum XML_Error
3825doIgnoreSection(XML_Parser parser,
3826 const ENCODING *enc,
3827 const char **startPtr,
3828 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003829 const char **nextPtr,
3830 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003831{
3832 const char *next;
3833 int tok;
3834 const char *s = *startPtr;
3835 const char **eventPP;
3836 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003837 if (enc == parser->m_encoding) {
3838 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003839 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003840 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003841 }
3842 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003843 /* It's not entirely clear, but it seems the following two lines
3844 * of code cannot be executed. The only occasions on which 'enc'
Benjamin Peterson4e211002018-06-26 19:25:45 -07003845 * is not 'encoding' are when this function is called
Victor Stinner93d0cb52017-08-18 23:43:54 +02003846 * from the internal entity processing, and IGNORE sections are an
3847 * error in internal entities.
3848 *
3849 * Since it really isn't clear that this is true, we keep the code
3850 * and just remove it from our coverage tests.
3851 *
3852 * LCOV_EXCL_START
3853 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003854 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3855 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003856 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003857 }
3858 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003859 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003860 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3861 *eventEndPP = next;
3862 switch (tok) {
3863 case XML_TOK_IGNORE_SECT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003864 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003865 reportDefault(parser, enc, s, next);
3866 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003867 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003868 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003869 return XML_ERROR_ABORTED;
3870 else
3871 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003872 case XML_TOK_INVALID:
3873 *eventPP = next;
3874 return XML_ERROR_INVALID_TOKEN;
3875 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003876 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003877 *nextPtr = s;
3878 return XML_ERROR_NONE;
3879 }
3880 return XML_ERROR_PARTIAL_CHAR;
3881 case XML_TOK_PARTIAL:
3882 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003883 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003884 *nextPtr = s;
3885 return XML_ERROR_NONE;
3886 }
3887 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3888 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003889 /* All of the tokens that XmlIgnoreSectionTok() returns have
3890 * explicit cases to handle them, so this default case is never
3891 * executed. We keep it as a safety net anyway, and remove it
3892 * from our test coverage statistics.
3893 *
3894 * LCOV_EXCL_START
3895 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003896 *eventPP = next;
3897 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003898 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003899 }
3900 /* not reached */
3901}
3902
3903#endif /* XML_DTD */
3904
3905static enum XML_Error
3906initializeEncoding(XML_Parser parser)
3907{
3908 const char *s;
3909#ifdef XML_UNICODE
3910 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003911 /* See comments abount `protoclEncodingName` in parserInit() */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003912 if (!parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003913 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003914 else {
3915 int i;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003916 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003917 if (i == sizeof(encodingBuf) - 1
Benjamin Peterson4e211002018-06-26 19:25:45 -07003918 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003919 encodingBuf[0] = '\0';
3920 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003921 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003922 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003923 }
3924 encodingBuf[i] = '\0';
3925 s = encodingBuf;
3926 }
3927#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003928 s = parser->m_protocolEncodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003929#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003930 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003931 return XML_ERROR_NONE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003932 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003933}
3934
3935static enum XML_Error
3936processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003937 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003938{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003939 const char *encodingName = NULL;
3940 const XML_Char *storedEncName = NULL;
3941 const ENCODING *newEncoding = NULL;
3942 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003943 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003944 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003945 int standalone = -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003946 if (!(parser->m_ns
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003947 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003948 : XmlParseXmlDecl)(isGeneralTextEntity,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003949 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003950 s,
3951 next,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003952 &parser->m_eventPtr,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003953 &version,
3954 &versionend,
3955 &encodingName,
3956 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003957 &standalone)) {
3958 if (isGeneralTextEntity)
3959 return XML_ERROR_TEXT_DECL;
3960 else
3961 return XML_ERROR_XML_DECL;
3962 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003963 if (!isGeneralTextEntity && standalone == 1) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003964 parser->m_dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003965#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07003966 if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3967 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003968#endif /* XML_DTD */
3969 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003970 if (parser->m_xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003971 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003972 storedEncName = poolStoreString(&parser->m_temp2Pool,
3973 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003974 encodingName,
3975 encodingName
Benjamin Peterson4e211002018-06-26 19:25:45 -07003976 + XmlNameLength(parser->m_encoding, encodingName));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003977 if (!storedEncName)
3978 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003979 poolFinish(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003980 }
3981 if (version) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003982 storedversion = poolStoreString(&parser->m_temp2Pool,
3983 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003984 version,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003985 versionend - parser->m_encoding->minBytesPerChar);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003986 if (!storedversion)
3987 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003988 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003989 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003990 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003991 else if (parser->m_defaultHandler)
3992 reportDefault(parser, parser->m_encoding, s, next);
3993 if (parser->m_protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003994 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003995 /* Check that the specified encoding does not conflict with what
3996 * the parser has already deduced. Do we have the same number
3997 * of bytes in the smallest representation of a character? If
3998 * this is UTF-16, is it the same endianness?
3999 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004000 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
Victor Stinner93d0cb52017-08-18 23:43:54 +02004001 || (newEncoding->minBytesPerChar == 2 &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004002 newEncoding != parser->m_encoding)) {
4003 parser->m_eventPtr = encodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004004 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004005 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004006 parser->m_encoding = newEncoding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004007 }
4008 else if (encodingName) {
4009 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004010 if (!storedEncName) {
4011 storedEncName = poolStoreString(
Benjamin Peterson4e211002018-06-26 19:25:45 -07004012 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4013 encodingName + XmlNameLength(parser->m_encoding, encodingName));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004014 if (!storedEncName)
4015 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004016 }
4017 result = handleUnknownEncoding(parser, storedEncName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004018 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004019 if (result == XML_ERROR_UNKNOWN_ENCODING)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004020 parser->m_eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004021 return result;
4022 }
4023 }
4024
4025 if (storedEncName || storedversion)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004026 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004027
4028 return XML_ERROR_NONE;
4029}
4030
4031static enum XML_Error
4032handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4033{
Benjamin Peterson4e211002018-06-26 19:25:45 -07004034 if (parser->m_unknownEncodingHandler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004035 XML_Encoding info;
4036 int i;
4037 for (i = 0; i < 256; i++)
4038 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004039 info.convert = NULL;
4040 info.data = NULL;
4041 info.release = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004042 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004043 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004044 ENCODING *enc;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004045 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4046 if (!parser->m_unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004047 if (info.release)
4048 info.release(info.data);
4049 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004050 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004051 enc = (parser->m_ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004052 ? XmlInitUnknownEncodingNS
Benjamin Peterson4e211002018-06-26 19:25:45 -07004053 : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004054 info.map,
4055 info.convert,
4056 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004057 if (enc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004058 parser->m_unknownEncodingData = info.data;
4059 parser->m_unknownEncodingRelease = info.release;
4060 parser->m_encoding = enc;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004061 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004062 }
4063 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004064 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004065 info.release(info.data);
4066 }
4067 return XML_ERROR_UNKNOWN_ENCODING;
4068}
4069
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004070static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004071prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004072 const char *s,
4073 const char *end,
4074 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004075{
4076 enum XML_Error result = initializeEncoding(parser);
4077 if (result != XML_ERROR_NONE)
4078 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004079 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004080 return prologProcessor(parser, s, end, nextPtr);
4081}
4082
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004083#ifdef XML_DTD
4084
4085static enum XML_Error PTRCALL
4086externalParEntInitProcessor(XML_Parser parser,
4087 const char *s,
4088 const char *end,
4089 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004090{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004091 enum XML_Error result = initializeEncoding(parser);
4092 if (result != XML_ERROR_NONE)
4093 return result;
4094
4095 /* we know now that XML_Parse(Buffer) has been called,
4096 so we consider the external parameter entity read */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004097 parser->m_dtd->paramEntityRead = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004098
Benjamin Peterson4e211002018-06-26 19:25:45 -07004099 if (parser->m_prologState.inEntityValue) {
4100 parser->m_processor = entityValueInitProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004101 return entityValueInitProcessor(parser, s, end, nextPtr);
4102 }
4103 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004104 parser->m_processor = externalParEntProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004105 return externalParEntProcessor(parser, s, end, nextPtr);
4106 }
4107}
4108
4109static enum XML_Error PTRCALL
4110entityValueInitProcessor(XML_Parser parser,
4111 const char *s,
4112 const char *end,
4113 const char **nextPtr)
4114{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004115 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00004116 const char *start = s;
4117 const char *next = start;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004118 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004119
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004120 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004121 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4122 parser->m_eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004123 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004124 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00004125 *nextPtr = s;
4126 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004127 }
4128 switch (tok) {
4129 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004130 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004131 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004132 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004133 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004134 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004135 case XML_TOK_NONE: /* start == end */
4136 default:
4137 break;
4138 }
Fred Drake31d485c2004-08-03 07:06:22 +00004139 /* found end of entity value - can store it now */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004140 return storeEntityValue(parser, parser->m_encoding, s, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004141 }
4142 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00004143 enum XML_Error result;
4144 result = processXmlDecl(parser, 0, start, next);
4145 if (result != XML_ERROR_NONE)
4146 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004147 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For that
Victor Stinner93d0cb52017-08-18 23:43:54 +02004148 * to happen, a parameter entity parsing handler must have
4149 * attempted to suspend the parser, which fails and raises an
4150 * error. The parser can be aborted, but can't be suspended.
4151 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004152 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004153 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004154 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004155 /* stop scanning for text declaration - we found one */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004156 parser->m_processor = entityValueProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004157 return entityValueProcessor(parser, next, end, nextPtr);
4158 }
4159 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4160 return XML_TOK_NONE on the next call, which would then cause the
4161 function to exit with *nextPtr set to s - that is what we want for other
4162 tokens, but not for the BOM - we would rather like to skip it;
4163 then, when this routine is entered the next time, XmlPrologTok will
4164 return XML_TOK_INVALID, since the BOM is still in the buffer
4165 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004166 else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004167 *nextPtr = next;
4168 return XML_ERROR_NONE;
4169 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004170 /* If we get this token, we have the start of what might be a
4171 normal tag, but not a declaration (i.e. it doesn't begin with
4172 "<!"). In a DTD context, that isn't legal.
4173 */
4174 else if (tok == XML_TOK_INSTANCE_START) {
4175 *nextPtr = next;
4176 return XML_ERROR_SYNTAX;
4177 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004178 start = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004179 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004180 }
4181}
4182
4183static enum XML_Error PTRCALL
4184externalParEntProcessor(XML_Parser parser,
4185 const char *s,
4186 const char *end,
4187 const char **nextPtr)
4188{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004189 const char *next = s;
4190 int tok;
4191
Benjamin Peterson4e211002018-06-26 19:25:45 -07004192 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004193 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004194 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004195 *nextPtr = s;
4196 return XML_ERROR_NONE;
4197 }
4198 switch (tok) {
4199 case XML_TOK_INVALID:
4200 return XML_ERROR_INVALID_TOKEN;
4201 case XML_TOK_PARTIAL:
4202 return XML_ERROR_UNCLOSED_TOKEN;
4203 case XML_TOK_PARTIAL_CHAR:
4204 return XML_ERROR_PARTIAL_CHAR;
4205 case XML_TOK_NONE: /* start == end */
4206 default:
4207 break;
4208 }
4209 }
4210 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4211 However, when parsing an external subset, doProlog will not accept a BOM
4212 as valid, and report a syntax error, so we have to skip the BOM
4213 */
4214 else if (tok == XML_TOK_BOM) {
4215 s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004216 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004217 }
4218
Benjamin Peterson4e211002018-06-26 19:25:45 -07004219 parser->m_processor = prologProcessor;
4220 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4221 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004222}
4223
4224static enum XML_Error PTRCALL
4225entityValueProcessor(XML_Parser parser,
4226 const char *s,
4227 const char *end,
4228 const char **nextPtr)
4229{
4230 const char *start = s;
4231 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004232 const ENCODING *enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004233 int tok;
4234
4235 for (;;) {
4236 tok = XmlPrologTok(enc, start, end, &next);
4237 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004238 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004239 *nextPtr = s;
4240 return XML_ERROR_NONE;
4241 }
4242 switch (tok) {
4243 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004244 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004245 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004246 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004247 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004248 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004249 case XML_TOK_NONE: /* start == end */
4250 default:
4251 break;
4252 }
Fred Drake31d485c2004-08-03 07:06:22 +00004253 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004254 return storeEntityValue(parser, enc, s, end);
4255 }
4256 start = next;
4257 }
4258}
4259
4260#endif /* XML_DTD */
4261
4262static enum XML_Error PTRCALL
4263prologProcessor(XML_Parser parser,
4264 const char *s,
4265 const char *end,
4266 const char **nextPtr)
4267{
4268 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004269 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4270 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4271 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004272}
4273
4274static enum XML_Error
4275doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004276 const ENCODING *enc,
4277 const char *s,
4278 const char *end,
4279 int tok,
4280 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00004281 const char **nextPtr,
4282 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004283{
4284#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004285 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004286#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004287 static const XML_Char atypeCDATA[] =
4288 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4289 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4290 static const XML_Char atypeIDREF[] =
4291 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4292 static const XML_Char atypeIDREFS[] =
4293 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4294 static const XML_Char atypeENTITY[] =
4295 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4296 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4297 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004298 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004299 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4300 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4301 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4302 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4303 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4304 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4305 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004306
Fred Drake31d485c2004-08-03 07:06:22 +00004307 /* save one level of indirection */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004308 DTD * const dtd = parser->m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004309
4310 const char **eventPP;
4311 const char **eventEndPP;
4312 enum XML_Content_Quant quant;
4313
Benjamin Peterson4e211002018-06-26 19:25:45 -07004314 if (enc == parser->m_encoding) {
4315 eventPP = &parser->m_eventPtr;
4316 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004317 }
4318 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004319 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4320 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004321 }
Fred Drake31d485c2004-08-03 07:06:22 +00004322
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004323 for (;;) {
4324 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004325 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004326 *eventPP = s;
4327 *eventEndPP = next;
4328 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004329 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004330 *nextPtr = s;
4331 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004332 }
4333 switch (tok) {
4334 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004335 *eventPP = next;
4336 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004337 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004338 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004339 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004340 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004341 case -XML_TOK_PROLOG_S:
4342 tok = -tok;
4343 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004344 case XML_TOK_NONE:
4345#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004346 /* for internal PE NOT referenced between declarations */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004347 if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00004348 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004349 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004350 }
4351 /* WFC: PE Between Declarations - must check that PE contains
4352 complete markup, not only for external PEs, but also for
4353 internal PEs if the reference occurs between declarations.
4354 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004355 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4356 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004357 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004358 return XML_ERROR_INCOMPLETE_PE;
4359 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004360 return XML_ERROR_NONE;
4361 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004362#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004363 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004364 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004365 tok = -tok;
4366 next = end;
4367 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004368 }
4369 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004370 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004371 switch (role) {
4372 case XML_ROLE_XML_DECL:
4373 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004374 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4375 if (result != XML_ERROR_NONE)
4376 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004377 enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004378 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004379 }
4380 break;
4381 case XML_ROLE_DOCTYPE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004382 if (parser->m_startDoctypeDeclHandler) {
4383 parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next);
4384 if (!parser->m_doctypeName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004385 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004386 poolFinish(&parser->m_tempPool);
4387 parser->m_doctypePubid = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004388 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004389 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004390 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004391 break;
4392 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004393 if (parser->m_startDoctypeDeclHandler) {
4394 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4395 parser->m_doctypePubid, 1);
4396 parser->m_doctypeName = NULL;
4397 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004398 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004399 }
4400 break;
4401#ifdef XML_DTD
4402 case XML_ROLE_TEXT_DECL:
4403 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004404 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4405 if (result != XML_ERROR_NONE)
4406 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004407 enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004408 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004409 }
4410 break;
4411#endif /* XML_DTD */
4412 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004413#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004414 parser->m_useForeignDTD = XML_FALSE;
4415 parser->m_declEntity = (ENTITY *)lookup(parser,
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004416 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004417 externalSubsetName,
4418 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004419 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004420 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004421#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004422 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004423 if (parser->m_startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004424 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004425 if (!XmlIsPublicId(enc, s, next, eventPP))
4426 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004427 pubId = poolStoreString(&parser->m_tempPool, enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004428 s + enc->minBytesPerChar,
4429 next - enc->minBytesPerChar);
4430 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004431 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004432 normalizePublicId(pubId);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004433 poolFinish(&parser->m_tempPool);
4434 parser->m_doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004435 handleDefault = XML_FALSE;
4436 goto alreadyChecked;
4437 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004438 /* fall through */
4439 case XML_ROLE_ENTITY_PUBLIC_ID:
4440 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004441 return XML_ERROR_PUBLICID;
4442 alreadyChecked:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004443 if (dtd->keepProcessing && parser->m_declEntity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004444 XML_Char *tem = poolStoreString(&dtd->pool,
4445 enc,
4446 s + enc->minBytesPerChar,
4447 next - enc->minBytesPerChar);
4448 if (!tem)
4449 return XML_ERROR_NO_MEMORY;
4450 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004451 parser->m_declEntity->publicId = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004452 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004453 /* Don't suppress the default handler if we fell through from
4454 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4455 */
4456 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004457 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004458 }
4459 break;
4460 case XML_ROLE_DOCTYPE_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004461 if (parser->m_doctypeName) {
4462 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName,
4463 parser->m_doctypeSysid, parser->m_doctypePubid, 0);
4464 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004465 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004466 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004467 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4468 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004469 was not set, indicating an external subset
4470 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004471#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004472 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004473 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4474 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004475 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004476 ENTITY *entity = (ENTITY *)lookup(parser,
4477 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004478 externalSubsetName,
4479 sizeof(ENTITY));
Victor Stinner93d0cb52017-08-18 23:43:54 +02004480 if (!entity) {
4481 /* The external subset name "#" will have already been
4482 * inserted into the hash table at the start of the
4483 * external entity parsing, so no allocation will happen
4484 * and lookup() cannot fail.
4485 */
4486 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4487 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004488 if (parser->m_useForeignDTD)
4489 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004490 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004491 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004492 0,
4493 entity->base,
4494 entity->systemId,
4495 entity->publicId))
4496 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004497 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004498 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004499 parser->m_notStandaloneHandler &&
4500 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004501 return XML_ERROR_NOT_STANDALONE;
4502 }
4503 /* if we didn't read the foreign DTD then this means that there
4504 is no external subset and we must reset dtd->hasParamEntityRefs
4505 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004506 else if (!parser->m_doctypeSysid)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004507 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004508 /* end of DTD - no need to update dtd->keepProcessing */
4509 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004510 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004511 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004512#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004513 if (parser->m_endDoctypeDeclHandler) {
4514 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004515 handleDefault = XML_FALSE;
4516 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004517 break;
4518 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004519#ifdef XML_DTD
4520 /* if there is no DOCTYPE declaration then now is the
4521 last chance to read the foreign DTD
4522 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004523 if (parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004524 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004525 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004526 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004527 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004528 externalSubsetName,
4529 sizeof(ENTITY));
4530 if (!entity)
4531 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004532 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004533 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004534 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004535 0,
4536 entity->base,
4537 entity->systemId,
4538 entity->publicId))
4539 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004540 if (dtd->paramEntityRead) {
4541 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004542 parser->m_notStandaloneHandler &&
4543 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004544 return XML_ERROR_NOT_STANDALONE;
4545 }
4546 /* if we didn't read the foreign DTD then this means that there
4547 is no external subset and we must reset dtd->hasParamEntityRefs
4548 */
4549 else
4550 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004551 /* end of DTD - no need to update dtd->keepProcessing */
4552 }
4553 }
4554#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004555 parser->m_processor = contentProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004556 return contentProcessor(parser, s, end, nextPtr);
4557 case XML_ROLE_ATTLIST_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004558 parser->m_declElementType = getElementType(parser, enc, s, next);
4559 if (!parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004560 return XML_ERROR_NO_MEMORY;
4561 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004562 case XML_ROLE_ATTRIBUTE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004563 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4564 if (!parser->m_declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004565 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004566 parser->m_declAttributeIsCdata = XML_FALSE;
4567 parser->m_declAttributeType = NULL;
4568 parser->m_declAttributeIsId = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004569 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004570 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004571 parser->m_declAttributeIsCdata = XML_TRUE;
4572 parser->m_declAttributeType = atypeCDATA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004573 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004574 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004575 parser->m_declAttributeIsId = XML_TRUE;
4576 parser->m_declAttributeType = atypeID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004577 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004578 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004579 parser->m_declAttributeType = atypeIDREF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004580 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004581 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004582 parser->m_declAttributeType = atypeIDREFS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004583 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004584 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004585 parser->m_declAttributeType = atypeENTITY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004586 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004587 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004588 parser->m_declAttributeType = atypeENTITIES;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004589 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004590 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004591 parser->m_declAttributeType = atypeNMTOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004592 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004593 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004594 parser->m_declAttributeType = atypeNMTOKENS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004595 checkAttListDeclHandler:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004596 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004597 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004598 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004599 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4600 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004601 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004602 const XML_Char *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004603 if (parser->m_declAttributeType) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004604 prefix = enumValueSep;
4605 }
4606 else {
4607 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4608 ? notationPrefix
4609 : enumValueStart);
4610 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004611 if (!poolAppendString(&parser->m_tempPool, prefix))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004612 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004613 if (!poolAppend(&parser->m_tempPool, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004614 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004615 parser->m_declAttributeType = parser->m_tempPool.start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004616 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004617 }
4618 break;
4619 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4620 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004621 if (dtd->keepProcessing) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004622 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4623 parser->m_declAttributeIsCdata, parser->m_declAttributeIsId,
Fred Drake08317ae2003-10-21 15:38:55 +00004624 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004625 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004626 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4627 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4628 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4629 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004630 /* Enumerated or Notation type */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004631 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4632 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004633 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004634 parser->m_declAttributeType = parser->m_tempPool.start;
4635 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004636 }
4637 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004638 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4639 parser->m_declAttributeId->name, parser->m_declAttributeType,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004640 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004641 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004642 handleDefault = XML_FALSE;
4643 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004644 }
4645 break;
4646 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4647 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004648 if (dtd->keepProcessing) {
4649 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004650 enum XML_Error result =
Benjamin Peterson4e211002018-06-26 19:25:45 -07004651 storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata,
Fred Drake08317ae2003-10-21 15:38:55 +00004652 s + enc->minBytesPerChar,
4653 next - enc->minBytesPerChar,
4654 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004655 if (result)
4656 return result;
4657 attVal = poolStart(&dtd->pool);
4658 poolFinish(&dtd->pool);
4659 /* ID attributes aren't allowed to have a default */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004660 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4661 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004662 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004663 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4664 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4665 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4666 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004667 /* Enumerated or Notation type */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004668 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4669 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004670 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004671 parser->m_declAttributeType = parser->m_tempPool.start;
4672 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004673 }
4674 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004675 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4676 parser->m_declAttributeId->name, parser->m_declAttributeType,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004677 attVal,
4678 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004679 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004680 handleDefault = XML_FALSE;
4681 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004682 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004683 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004684 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004685 if (dtd->keepProcessing) {
4686 enum XML_Error result = storeEntityValue(parser, enc,
4687 s + enc->minBytesPerChar,
4688 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004689 if (parser->m_declEntity) {
4690 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4691 parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004692 poolFinish(&dtd->entityValuePool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004693 if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004694 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004695 parser->m_entityDeclHandler(parser->m_handlerArg,
4696 parser->m_declEntity->name,
4697 parser->m_declEntity->is_param,
4698 parser->m_declEntity->textPtr,
4699 parser->m_declEntity->textLen,
4700 parser->m_curBase, 0, 0, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004701 handleDefault = XML_FALSE;
4702 }
4703 }
4704 else
4705 poolDiscard(&dtd->entityValuePool);
4706 if (result != XML_ERROR_NONE)
4707 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004708 }
4709 break;
4710 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004711#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004712 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004713#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004714 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004715 if (parser->m_startDoctypeDeclHandler) {
4716 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004717 s + enc->minBytesPerChar,
4718 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004719 if (parser->m_doctypeSysid == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004720 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004721 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004722 handleDefault = XML_FALSE;
4723 }
4724#ifdef XML_DTD
4725 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07004726 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4727 for the case where no parser->m_startDoctypeDeclHandler is set */
4728 parser->m_doctypeSysid = externalSubsetName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004729#endif /* XML_DTD */
4730 if (!dtd->standalone
4731#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004732 && !parser->m_paramEntityParsing
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004733#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004734 && parser->m_notStandaloneHandler
4735 && !parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004736 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004737#ifndef XML_DTD
4738 break;
4739#else /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004740 if (!parser->m_declEntity) {
4741 parser->m_declEntity = (ENTITY *)lookup(parser,
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004742 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004743 externalSubsetName,
4744 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004745 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004746 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004747 parser->m_declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004748 }
4749 /* fall through */
4750#endif /* XML_DTD */
4751 case XML_ROLE_ENTITY_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004752 if (dtd->keepProcessing && parser->m_declEntity) {
4753 parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004754 s + enc->minBytesPerChar,
4755 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004756 if (!parser->m_declEntity->systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004757 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004758 parser->m_declEntity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004759 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004760 /* Don't suppress the default handler if we fell through from
4761 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4762 */
4763 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004764 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004765 }
4766 break;
4767 case XML_ROLE_ENTITY_COMPLETE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004768 if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004769 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004770 parser->m_entityDeclHandler(parser->m_handlerArg,
4771 parser->m_declEntity->name,
4772 parser->m_declEntity->is_param,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004773 0,0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004774 parser->m_declEntity->base,
4775 parser->m_declEntity->systemId,
4776 parser->m_declEntity->publicId,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004777 0);
4778 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004779 }
4780 break;
4781 case XML_ROLE_ENTITY_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004782 if (dtd->keepProcessing && parser->m_declEntity) {
4783 parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4784 if (!parser->m_declEntity->notation)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004785 return XML_ERROR_NO_MEMORY;
4786 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004787 if (parser->m_unparsedEntityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004788 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004789 parser->m_unparsedEntityDeclHandler(parser->m_handlerArg,
4790 parser->m_declEntity->name,
4791 parser->m_declEntity->base,
4792 parser->m_declEntity->systemId,
4793 parser->m_declEntity->publicId,
4794 parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004795 handleDefault = XML_FALSE;
4796 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004797 else if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004798 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004799 parser->m_entityDeclHandler(parser->m_handlerArg,
4800 parser->m_declEntity->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004801 0,0,0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004802 parser->m_declEntity->base,
4803 parser->m_declEntity->systemId,
4804 parser->m_declEntity->publicId,
4805 parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004806 handleDefault = XML_FALSE;
4807 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004808 }
4809 break;
4810 case XML_ROLE_GENERAL_ENTITY_NAME:
4811 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004812 if (XmlPredefinedEntityName(enc, s, next)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004813 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004814 break;
4815 }
4816 if (dtd->keepProcessing) {
4817 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4818 if (!name)
4819 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004820 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004821 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004822 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004823 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004824 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004825 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004826 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004827 }
4828 else {
4829 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004830 parser->m_declEntity->publicId = NULL;
4831 parser->m_declEntity->is_param = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004832 /* if we have a parent parser or are reading an internal parameter
4833 entity, then the entity declaration is not considered "internal"
4834 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004835 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4836 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004837 handleDefault = XML_FALSE;
4838 }
4839 }
4840 else {
4841 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004842 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004843 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004844 }
4845 break;
4846 case XML_ROLE_PARAM_ENTITY_NAME:
4847#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004848 if (dtd->keepProcessing) {
4849 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4850 if (!name)
4851 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004852 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004853 name, sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004854 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004855 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004856 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004857 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004858 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004859 }
4860 else {
4861 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004862 parser->m_declEntity->publicId = NULL;
4863 parser->m_declEntity->is_param = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004864 /* if we have a parent parser or are reading an internal parameter
4865 entity, then the entity declaration is not considered "internal"
4866 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004867 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4868 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004869 handleDefault = XML_FALSE;
4870 }
4871 }
4872 else {
4873 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004874 parser->m_declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004875 }
4876#else /* not XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004877 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004878#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004879 break;
4880 case XML_ROLE_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004881 parser->m_declNotationPublicId = NULL;
4882 parser->m_declNotationName = NULL;
4883 if (parser->m_notationDeclHandler) {
4884 parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next);
4885 if (!parser->m_declNotationName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004886 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004887 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004888 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004889 }
4890 break;
4891 case XML_ROLE_NOTATION_PUBLIC_ID:
4892 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004893 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004894 if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4895 XML_Char *tem = poolStoreString(&parser->m_tempPool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004896 enc,
4897 s + enc->minBytesPerChar,
4898 next - enc->minBytesPerChar);
4899 if (!tem)
4900 return XML_ERROR_NO_MEMORY;
4901 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004902 parser->m_declNotationPublicId = tem;
4903 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004904 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004905 }
4906 break;
4907 case XML_ROLE_NOTATION_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004908 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004909 const XML_Char *systemId
Benjamin Peterson4e211002018-06-26 19:25:45 -07004910 = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004911 s + enc->minBytesPerChar,
4912 next - enc->minBytesPerChar);
4913 if (!systemId)
4914 return XML_ERROR_NO_MEMORY;
4915 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004916 parser->m_notationDeclHandler(parser->m_handlerArg,
4917 parser->m_declNotationName,
4918 parser->m_curBase,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004919 systemId,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004920 parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004921 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004922 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004923 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004924 break;
4925 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004926 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004927 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004928 parser->m_notationDeclHandler(parser->m_handlerArg,
4929 parser->m_declNotationName,
4930 parser->m_curBase,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004931 0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004932 parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004933 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004934 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004935 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004936 break;
4937 case XML_ROLE_ERROR:
4938 switch (tok) {
4939 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004940 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004941 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004942 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004943 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004944 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004945 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004946 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004947 }
4948#ifdef XML_DTD
4949 case XML_ROLE_IGNORE_SECT:
4950 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004951 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004952 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004953 reportDefault(parser, enc, s, next);
4954 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004955 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4956 if (result != XML_ERROR_NONE)
4957 return result;
4958 else if (!next) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004959 parser->m_processor = ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004960 return result;
4961 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004962 }
4963 break;
4964#endif /* XML_DTD */
4965 case XML_ROLE_GROUP_OPEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004966 if (parser->m_prologState.level >= parser->m_groupSize) {
4967 if (parser->m_groupSize) {
4968 char *temp = (char *)REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2);
Victor Stinner93d0cb52017-08-18 23:43:54 +02004969 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004970 parser->m_groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004971 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004972 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004973 parser->m_groupConnector = temp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004974 if (dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004975 int *temp = (int *)REALLOC(parser, dtd->scaffIndex,
4976 parser->m_groupSize * sizeof(int));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004977 if (temp == NULL)
4978 return XML_ERROR_NO_MEMORY;
4979 dtd->scaffIndex = temp;
4980 }
4981 }
4982 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004983 parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32);
4984 if (!parser->m_groupConnector) {
4985 parser->m_groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004986 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004987 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004988 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004989 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004990 parser->m_groupConnector[parser->m_prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004991 if (dtd->in_eldecl) {
4992 int myindex = nextScaffoldPart(parser);
4993 if (myindex < 0)
4994 return XML_ERROR_NO_MEMORY;
4995 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4996 dtd->scaffLevel++;
4997 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004998 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004999 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005000 }
5001 break;
5002 case XML_ROLE_GROUP_SEQUENCE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005003 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005004 return XML_ERROR_SYNTAX;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005005 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5006 if (dtd->in_eldecl && parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005007 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005008 break;
5009 case XML_ROLE_GROUP_CHOICE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005010 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005011 return XML_ERROR_SYNTAX;
5012 if (dtd->in_eldecl
Benjamin Peterson4e211002018-06-26 19:25:45 -07005013 && !parser->m_groupConnector[parser->m_prologState.level]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005014 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5015 != XML_CTYPE_MIXED)
5016 ) {
5017 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5018 = XML_CTYPE_CHOICE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005019 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005020 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005021 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005022 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005023 break;
5024 case XML_ROLE_PARAM_ENTITY_REF:
5025#ifdef XML_DTD
5026 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005027 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005028 if (!parser->m_paramEntityParsing)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005029 dtd->keepProcessing = dtd->standalone;
5030 else {
5031 const XML_Char *name;
5032 ENTITY *entity;
5033 name = poolStoreString(&dtd->pool, enc,
5034 s + enc->minBytesPerChar,
5035 next - enc->minBytesPerChar);
5036 if (!name)
5037 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005038 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005039 poolDiscard(&dtd->pool);
5040 /* first, determine if a check for an existing declaration is needed;
5041 if yes, check that the entity exists, and that it is internal,
5042 otherwise call the skipped entity handler
5043 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005044 if (parser->m_prologState.documentEntity &&
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005045 (dtd->standalone
Benjamin Peterson4e211002018-06-26 19:25:45 -07005046 ? !parser->m_openInternalEntities
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005047 : !dtd->hasParamEntityRefs)) {
5048 if (!entity)
5049 return XML_ERROR_UNDEFINED_ENTITY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005050 else if (!entity->is_internal) {
5051 /* It's hard to exhaustively search the code to be sure,
5052 * but there doesn't seem to be a way of executing the
5053 * following line. There are two cases:
5054 *
5055 * If 'standalone' is false, the DTD must have no
5056 * parameter entities or we wouldn't have passed the outer
5057 * 'if' statement. That measn the only entity in the hash
5058 * table is the external subset name "#" which cannot be
5059 * given as a parameter entity name in XML syntax, so the
5060 * lookup must have returned NULL and we don't even reach
5061 * the test for an internal entity.
5062 *
5063 * If 'standalone' is true, it does not seem to be
5064 * possible to create entities taking this code path that
5065 * are not internal entities, so fail the test above.
5066 *
5067 * Because this analysis is very uncertain, the code is
5068 * being left in place and merely removed from the
5069 * coverage test statistics.
5070 */
5071 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5072 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005073 }
5074 else if (!entity) {
5075 dtd->keepProcessing = dtd->standalone;
5076 /* cannot report skipped entities in declarations */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005077 if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) {
5078 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005079 handleDefault = XML_FALSE;
5080 }
5081 break;
5082 }
5083 if (entity->open)
5084 return XML_ERROR_RECURSIVE_ENTITY_REF;
5085 if (entity->textPtr) {
5086 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005087 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00005088 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5089 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005090 if (result != XML_ERROR_NONE)
5091 return result;
5092 handleDefault = XML_FALSE;
5093 break;
5094 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005095 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005096 dtd->paramEntityRead = XML_FALSE;
5097 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005098 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005099 0,
5100 entity->base,
5101 entity->systemId,
5102 entity->publicId)) {
5103 entity->open = XML_FALSE;
5104 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5105 }
5106 entity->open = XML_FALSE;
5107 handleDefault = XML_FALSE;
5108 if (!dtd->paramEntityRead) {
5109 dtd->keepProcessing = dtd->standalone;
5110 break;
5111 }
5112 }
5113 else {
5114 dtd->keepProcessing = dtd->standalone;
5115 break;
5116 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005117 }
5118#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005119 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07005120 parser->m_notStandaloneHandler &&
5121 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005122 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005123 break;
5124
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005125 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005126
5127 case XML_ROLE_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005128 if (parser->m_elementDeclHandler) {
5129 parser->m_declElementType = getElementType(parser, enc, s, next);
5130 if (!parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005131 return XML_ERROR_NO_MEMORY;
5132 dtd->scaffLevel = 0;
5133 dtd->scaffCount = 0;
5134 dtd->in_eldecl = XML_TRUE;
5135 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005136 }
5137 break;
5138
5139 case XML_ROLE_CONTENT_ANY:
5140 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005141 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005142 if (parser->m_elementDeclHandler) {
5143 XML_Content * content = (XML_Content *) MALLOC(parser, sizeof(XML_Content));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005144 if (!content)
5145 return XML_ERROR_NO_MEMORY;
5146 content->quant = XML_CQUANT_NONE;
5147 content->name = NULL;
5148 content->numchildren = 0;
5149 content->children = NULL;
5150 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5151 XML_CTYPE_ANY :
5152 XML_CTYPE_EMPTY);
5153 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005154 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005155 handleDefault = XML_FALSE;
5156 }
5157 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005158 }
5159 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005160
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005161 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005162 if (dtd->in_eldecl) {
5163 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5164 = XML_CTYPE_MIXED;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005165 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005166 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005167 }
5168 break;
5169
5170 case XML_ROLE_CONTENT_ELEMENT:
5171 quant = XML_CQUANT_NONE;
5172 goto elementContent;
5173 case XML_ROLE_CONTENT_ELEMENT_OPT:
5174 quant = XML_CQUANT_OPT;
5175 goto elementContent;
5176 case XML_ROLE_CONTENT_ELEMENT_REP:
5177 quant = XML_CQUANT_REP;
5178 goto elementContent;
5179 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5180 quant = XML_CQUANT_PLUS;
5181 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005182 if (dtd->in_eldecl) {
5183 ELEMENT_TYPE *el;
5184 const XML_Char *name;
5185 int nameLen;
5186 const char *nxt = (quant == XML_CQUANT_NONE
5187 ? next
5188 : next - enc->minBytesPerChar);
5189 int myindex = nextScaffoldPart(parser);
5190 if (myindex < 0)
5191 return XML_ERROR_NO_MEMORY;
5192 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5193 dtd->scaffold[myindex].quant = quant;
5194 el = getElementType(parser, enc, s, nxt);
5195 if (!el)
5196 return XML_ERROR_NO_MEMORY;
5197 name = el->name;
5198 dtd->scaffold[myindex].name = name;
5199 nameLen = 0;
5200 for (; name[nameLen++]; );
5201 dtd->contentStringLen += nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005202 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005203 handleDefault = XML_FALSE;
5204 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005205 break;
5206
5207 case XML_ROLE_GROUP_CLOSE:
5208 quant = XML_CQUANT_NONE;
5209 goto closeGroup;
5210 case XML_ROLE_GROUP_CLOSE_OPT:
5211 quant = XML_CQUANT_OPT;
5212 goto closeGroup;
5213 case XML_ROLE_GROUP_CLOSE_REP:
5214 quant = XML_CQUANT_REP;
5215 goto closeGroup;
5216 case XML_ROLE_GROUP_CLOSE_PLUS:
5217 quant = XML_CQUANT_PLUS;
5218 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005219 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005220 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005221 handleDefault = XML_FALSE;
5222 dtd->scaffLevel--;
5223 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5224 if (dtd->scaffLevel == 0) {
5225 if (!handleDefault) {
5226 XML_Content *model = build_model(parser);
5227 if (!model)
5228 return XML_ERROR_NO_MEMORY;
5229 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005230 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005231 }
5232 dtd->in_eldecl = XML_FALSE;
5233 dtd->contentStringLen = 0;
5234 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005235 }
5236 break;
5237 /* End element declaration stuff */
5238
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005239 case XML_ROLE_PI:
5240 if (!reportProcessingInstruction(parser, enc, s, next))
5241 return XML_ERROR_NO_MEMORY;
5242 handleDefault = XML_FALSE;
5243 break;
5244 case XML_ROLE_COMMENT:
5245 if (!reportComment(parser, enc, s, next))
5246 return XML_ERROR_NO_MEMORY;
5247 handleDefault = XML_FALSE;
5248 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005249 case XML_ROLE_NONE:
5250 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005251 case XML_TOK_BOM:
5252 handleDefault = XML_FALSE;
5253 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005254 }
5255 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005256 case XML_ROLE_DOCTYPE_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005257 if (parser->m_startDoctypeDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005258 handleDefault = XML_FALSE;
5259 break;
5260 case XML_ROLE_ENTITY_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005261 if (dtd->keepProcessing && parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005262 handleDefault = XML_FALSE;
5263 break;
5264 case XML_ROLE_NOTATION_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005265 if (parser->m_notationDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005266 handleDefault = XML_FALSE;
5267 break;
5268 case XML_ROLE_ATTLIST_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005269 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005270 handleDefault = XML_FALSE;
5271 break;
5272 case XML_ROLE_ELEMENT_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005273 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005274 handleDefault = XML_FALSE;
5275 break;
5276 } /* end of big switch */
5277
Benjamin Peterson4e211002018-06-26 19:25:45 -07005278 if (handleDefault && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005279 reportDefault(parser, enc, s, next);
5280
Benjamin Peterson4e211002018-06-26 19:25:45 -07005281 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005282 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005283 *nextPtr = next;
5284 return XML_ERROR_NONE;
5285 case XML_FINISHED:
5286 return XML_ERROR_ABORTED;
5287 default:
5288 s = next;
5289 tok = XmlPrologTok(enc, s, end, &next);
5290 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005291 }
5292 /* not reached */
5293}
5294
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005295static enum XML_Error PTRCALL
5296epilogProcessor(XML_Parser parser,
5297 const char *s,
5298 const char *end,
5299 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005300{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005301 parser->m_processor = epilogProcessor;
5302 parser->m_eventPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005303 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005304 const char *next = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005305 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5306 parser->m_eventEndPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005307 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005308 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005309 case -XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005310 if (parser->m_defaultHandler) {
5311 reportDefault(parser, parser->m_encoding, s, next);
5312 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005313 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005314 }
Fred Drake31d485c2004-08-03 07:06:22 +00005315 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005316 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005317 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005318 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005319 return XML_ERROR_NONE;
5320 case XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005321 if (parser->m_defaultHandler)
5322 reportDefault(parser, parser->m_encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005323 break;
5324 case XML_TOK_PI:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005325 if (!reportProcessingInstruction(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005326 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005327 break;
5328 case XML_TOK_COMMENT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005329 if (!reportComment(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005330 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005331 break;
5332 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005333 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005334 return XML_ERROR_INVALID_TOKEN;
5335 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005336 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005337 *nextPtr = s;
5338 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005339 }
5340 return XML_ERROR_UNCLOSED_TOKEN;
5341 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005342 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005343 *nextPtr = s;
5344 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005345 }
5346 return XML_ERROR_PARTIAL_CHAR;
5347 default:
5348 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5349 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005350 parser->m_eventPtr = s = next;
5351 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005352 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005353 *nextPtr = next;
5354 return XML_ERROR_NONE;
5355 case XML_FINISHED:
5356 return XML_ERROR_ABORTED;
5357 default: ;
5358 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005359 }
5360}
5361
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005362static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00005363processInternalEntity(XML_Parser parser, ENTITY *entity,
5364 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005365{
Fred Drake31d485c2004-08-03 07:06:22 +00005366 const char *textStart, *textEnd;
5367 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005368 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005369 OPEN_INTERNAL_ENTITY *openEntity;
5370
Benjamin Peterson4e211002018-06-26 19:25:45 -07005371 if (parser->m_freeInternalEntities) {
5372 openEntity = parser->m_freeInternalEntities;
5373 parser->m_freeInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005374 }
5375 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005376 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
Fred Drake31d485c2004-08-03 07:06:22 +00005377 if (!openEntity)
5378 return XML_ERROR_NO_MEMORY;
5379 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005380 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005381 entity->processed = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005382 openEntity->next = parser->m_openInternalEntities;
5383 parser->m_openInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005384 openEntity->entity = entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005385 openEntity->startTagLevel = parser->m_tagLevel;
Fred Drake31d485c2004-08-03 07:06:22 +00005386 openEntity->betweenDecl = betweenDecl;
5387 openEntity->internalEventPtr = NULL;
5388 openEntity->internalEventEndPtr = NULL;
5389 textStart = (char *)entity->textPtr;
5390 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005391 /* Set a safe default value in case 'next' does not get set */
5392 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005393
5394#ifdef XML_DTD
5395 if (entity->is_param) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005396 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5397 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005398 next, &next, XML_FALSE);
5399 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005400 else
Fred Drake31d485c2004-08-03 07:06:22 +00005401#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005402 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00005403 textEnd, &next, XML_FALSE);
5404
5405 if (result == XML_ERROR_NONE) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005406 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005407 entity->processed = (int)(next - textStart);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005408 parser->m_processor = internalEntityProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005409 }
5410 else {
5411 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005412 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005413 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005414 openEntity->next = parser->m_freeInternalEntities;
5415 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005416 }
5417 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005418 return result;
5419}
5420
Fred Drake31d485c2004-08-03 07:06:22 +00005421static enum XML_Error PTRCALL
5422internalEntityProcessor(XML_Parser parser,
5423 const char *s,
5424 const char *end,
5425 const char **nextPtr)
5426{
5427 ENTITY *entity;
5428 const char *textStart, *textEnd;
5429 const char *next;
5430 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005431 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00005432 if (!openEntity)
5433 return XML_ERROR_UNEXPECTED_STATE;
5434
5435 entity = openEntity->entity;
5436 textStart = ((char *)entity->textPtr) + entity->processed;
5437 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005438 /* Set a safe default value in case 'next' does not get set */
5439 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005440
5441#ifdef XML_DTD
5442 if (entity->is_param) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005443 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5444 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005445 next, &next, XML_FALSE);
5446 }
5447 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005448#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005449 result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005450 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005451
5452 if (result != XML_ERROR_NONE)
5453 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005454 else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005455 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005456 return result;
5457 }
5458 else {
5459 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005460 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005461 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005462 openEntity->next = parser->m_freeInternalEntities;
5463 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005464 }
5465
5466#ifdef XML_DTD
5467 if (entity->is_param) {
5468 int tok;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005469 parser->m_processor = prologProcessor;
5470 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5471 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5472 (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00005473 }
5474 else
5475#endif /* XML_DTD */
5476 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005477 parser->m_processor = contentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005478 /* see externalEntityContentProcessor vs contentProcessor */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005479 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end,
5480 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005481 }
Fred Drake31d485c2004-08-03 07:06:22 +00005482}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005483
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005484static enum XML_Error PTRCALL
5485errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02005486 const char *UNUSED_P(s),
5487 const char *UNUSED_P(end),
5488 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005489{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005490 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005491}
5492
5493static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005494storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5495 const char *ptr, const char *end,
5496 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005497{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005498 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5499 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005500 if (result)
5501 return result;
5502 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5503 poolChop(pool);
5504 if (!poolAppendChar(pool, XML_T('\0')))
5505 return XML_ERROR_NO_MEMORY;
5506 return XML_ERROR_NONE;
5507}
5508
5509static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005510appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5511 const char *ptr, const char *end,
5512 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005513{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005514 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005515 for (;;) {
5516 const char *next;
5517 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5518 switch (tok) {
5519 case XML_TOK_NONE:
5520 return XML_ERROR_NONE;
5521 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005522 if (enc == parser->m_encoding)
5523 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005524 return XML_ERROR_INVALID_TOKEN;
5525 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005526 if (enc == parser->m_encoding)
5527 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005528 return XML_ERROR_INVALID_TOKEN;
5529 case XML_TOK_CHAR_REF:
5530 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005531 XML_Char buf[XML_ENCODE_MAX];
5532 int i;
5533 int n = XmlCharRefNumber(enc, ptr);
5534 if (n < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005535 if (enc == parser->m_encoding)
5536 parser->m_eventPtr = ptr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005537 return XML_ERROR_BAD_CHAR_REF;
5538 }
5539 if (!isCdata
5540 && n == 0x20 /* space */
5541 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5542 break;
5543 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005544 /* The XmlEncode() functions can never return 0 here. That
5545 * error return happens if the code point passed in is either
5546 * negative or greater than or equal to 0x110000. The
5547 * XmlCharRefNumber() functions will all return a number
5548 * strictly less than 0x110000 or a negative value if an error
5549 * occurred. The negative value is intercepted above, so
5550 * XmlEncode() is never passed a value it might return an
5551 * error for.
5552 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005553 for (i = 0; i < n; i++) {
5554 if (!poolAppendChar(pool, buf[i]))
5555 return XML_ERROR_NO_MEMORY;
5556 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005557 }
5558 break;
5559 case XML_TOK_DATA_CHARS:
5560 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005561 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005562 break;
5563 case XML_TOK_TRAILING_CR:
5564 next = ptr + enc->minBytesPerChar;
5565 /* fall through */
5566 case XML_TOK_ATTRIBUTE_VALUE_S:
5567 case XML_TOK_DATA_NEWLINE:
5568 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005569 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005570 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005571 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005572 break;
5573 case XML_TOK_ENTITY_REF:
5574 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005575 const XML_Char *name;
5576 ENTITY *entity;
5577 char checkEntityDecl;
5578 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5579 ptr + enc->minBytesPerChar,
5580 next - enc->minBytesPerChar);
5581 if (ch) {
5582 if (!poolAppendChar(pool, ch))
5583 return XML_ERROR_NO_MEMORY;
5584 break;
5585 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005586 name = poolStoreString(&parser->m_temp2Pool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005587 ptr + enc->minBytesPerChar,
5588 next - enc->minBytesPerChar);
5589 if (!name)
5590 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005591 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005592 poolDiscard(&parser->m_temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005593 /* First, determine if a check for an existing declaration is needed;
5594 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005595 */
5596 if (pool == &dtd->pool) /* are we called from prolog? */
5597 checkEntityDecl =
5598#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005599 parser->m_prologState.documentEntity &&
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005600#endif /* XML_DTD */
5601 (dtd->standalone
Benjamin Peterson4e211002018-06-26 19:25:45 -07005602 ? !parser->m_openInternalEntities
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005603 : !dtd->hasParamEntityRefs);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005604 else /* if (pool == &parser->m_tempPool): we are called from content */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005605 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5606 if (checkEntityDecl) {
5607 if (!entity)
5608 return XML_ERROR_UNDEFINED_ENTITY;
5609 else if (!entity->is_internal)
5610 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5611 }
5612 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005613 /* Cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005614 parser->m_skippedEntityHandler.
5615 if (parser->m_skippedEntityHandler)
5616 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005617 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005618 /* Cannot call the default handler because this would be
5619 out of sync with the call to the startElementHandler.
Benjamin Peterson4e211002018-06-26 19:25:45 -07005620 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005621 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005622 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005623 break;
5624 }
5625 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005626 if (enc == parser->m_encoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005627 /* It does not appear that this line can be executed.
5628 *
5629 * The "if (entity->open)" check catches recursive entity
5630 * definitions. In order to be called with an open
5631 * entity, it must have gone through this code before and
5632 * been through the recursive call to
5633 * appendAttributeValue() some lines below. That call
5634 * sets the local encoding ("enc") to the parser's
5635 * internal encoding (internal_utf8 or internal_utf16),
5636 * which can never be the same as the principle encoding.
5637 * It doesn't appear there is another code path that gets
5638 * here with entity->open being TRUE.
5639 *
5640 * Since it is not certain that this logic is watertight,
5641 * we keep the line and merely exclude it from coverage
5642 * tests.
5643 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005644 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
Victor Stinner93d0cb52017-08-18 23:43:54 +02005645 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005646 return XML_ERROR_RECURSIVE_ENTITY_REF;
5647 }
5648 if (entity->notation) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005649 if (enc == parser->m_encoding)
5650 parser->m_eventPtr = ptr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005651 return XML_ERROR_BINARY_ENTITY_REF;
5652 }
5653 if (!entity->textPtr) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005654 if (enc == parser->m_encoding)
5655 parser->m_eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005656 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005657 }
5658 else {
5659 enum XML_Error result;
5660 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5661 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005662 result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005663 (char *)entity->textPtr,
5664 (char *)textEnd, pool);
5665 entity->open = XML_FALSE;
5666 if (result)
5667 return result;
5668 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005669 }
5670 break;
5671 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005672 /* The only token returned by XmlAttributeValueTok() that does
5673 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5674 * Getting that would require an entity name to contain an
5675 * incomplete XML character (e.g. \xE2\x82); however previous
5676 * tokenisers will have already recognised and rejected such
5677 * names before XmlAttributeValueTok() gets a look-in. This
5678 * default case should be retained as a safety net, but the code
5679 * excluded from coverage tests.
5680 *
5681 * LCOV_EXCL_START
5682 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005683 if (enc == parser->m_encoding)
5684 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005685 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005686 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005687 }
5688 ptr = next;
5689 }
5690 /* not reached */
5691}
5692
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005693static enum XML_Error
5694storeEntityValue(XML_Parser parser,
5695 const ENCODING *enc,
5696 const char *entityTextPtr,
5697 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005698{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005699 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005700 STRING_POOL *pool = &(dtd->entityValuePool);
5701 enum XML_Error result = XML_ERROR_NONE;
5702#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005703 int oldInEntityValue = parser->m_prologState.inEntityValue;
5704 parser->m_prologState.inEntityValue = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005705#endif /* XML_DTD */
5706 /* never return Null for the value argument in EntityDeclHandler,
5707 since this would indicate an external entity; therefore we
5708 have to make sure that entityValuePool.start is not null */
5709 if (!pool->blocks) {
5710 if (!poolGrow(pool))
5711 return XML_ERROR_NO_MEMORY;
5712 }
5713
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005714 for (;;) {
5715 const char *next;
5716 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5717 switch (tok) {
5718 case XML_TOK_PARAM_ENTITY_REF:
5719#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005720 if (parser->m_isParamEntity || enc != parser->m_encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005721 const XML_Char *name;
5722 ENTITY *entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005723 name = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005724 entityTextPtr + enc->minBytesPerChar,
5725 next - enc->minBytesPerChar);
5726 if (!name) {
5727 result = XML_ERROR_NO_MEMORY;
5728 goto endEntityValue;
5729 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005730 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005731 poolDiscard(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005732 if (!entity) {
5733 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5734 /* cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005735 parser->m_skippedEntityHandler
5736 if (parser->m_skippedEntityHandler)
5737 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005738 */
5739 dtd->keepProcessing = dtd->standalone;
5740 goto endEntityValue;
5741 }
5742 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005743 if (enc == parser->m_encoding)
5744 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005745 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5746 goto endEntityValue;
5747 }
5748 if (entity->systemId) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005749 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005750 dtd->paramEntityRead = XML_FALSE;
5751 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005752 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005753 0,
5754 entity->base,
5755 entity->systemId,
5756 entity->publicId)) {
5757 entity->open = XML_FALSE;
5758 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5759 goto endEntityValue;
5760 }
5761 entity->open = XML_FALSE;
5762 if (!dtd->paramEntityRead)
5763 dtd->keepProcessing = dtd->standalone;
5764 }
5765 else
5766 dtd->keepProcessing = dtd->standalone;
5767 }
5768 else {
5769 entity->open = XML_TRUE;
5770 result = storeEntityValue(parser,
Benjamin Peterson4e211002018-06-26 19:25:45 -07005771 parser->m_internalEncoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005772 (char *)entity->textPtr,
5773 (char *)(entity->textPtr
5774 + entity->textLen));
5775 entity->open = XML_FALSE;
5776 if (result)
5777 goto endEntityValue;
5778 }
5779 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005780 }
5781#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005782 /* In the internal subset, PE references are not legal
5783 within markup declarations, e.g entity values in this case. */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005784 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005785 result = XML_ERROR_PARAM_ENTITY_REF;
5786 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005787 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005788 result = XML_ERROR_NONE;
5789 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005790 case XML_TOK_ENTITY_REF:
5791 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005792 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5793 result = XML_ERROR_NO_MEMORY;
5794 goto endEntityValue;
5795 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005796 break;
5797 case XML_TOK_TRAILING_CR:
5798 next = entityTextPtr + enc->minBytesPerChar;
5799 /* fall through */
5800 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005801 if (pool->end == pool->ptr && !poolGrow(pool)) {
5802 result = XML_ERROR_NO_MEMORY;
5803 goto endEntityValue;
5804 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005805 *(pool->ptr)++ = 0xA;
5806 break;
5807 case XML_TOK_CHAR_REF:
5808 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005809 XML_Char buf[XML_ENCODE_MAX];
5810 int i;
5811 int n = XmlCharRefNumber(enc, entityTextPtr);
5812 if (n < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005813 if (enc == parser->m_encoding)
5814 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005815 result = XML_ERROR_BAD_CHAR_REF;
5816 goto endEntityValue;
5817 }
5818 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005819 /* The XmlEncode() functions can never return 0 here. That
5820 * error return happens if the code point passed in is either
5821 * negative or greater than or equal to 0x110000. The
5822 * XmlCharRefNumber() functions will all return a number
5823 * strictly less than 0x110000 or a negative value if an error
5824 * occurred. The negative value is intercepted above, so
5825 * XmlEncode() is never passed a value it might return an
5826 * error for.
5827 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005828 for (i = 0; i < n; i++) {
5829 if (pool->end == pool->ptr && !poolGrow(pool)) {
5830 result = XML_ERROR_NO_MEMORY;
5831 goto endEntityValue;
5832 }
5833 *(pool->ptr)++ = buf[i];
5834 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005835 }
5836 break;
5837 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005838 if (enc == parser->m_encoding)
5839 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005840 result = XML_ERROR_INVALID_TOKEN;
5841 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005842 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005843 if (enc == parser->m_encoding)
5844 parser->m_eventPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005845 result = XML_ERROR_INVALID_TOKEN;
5846 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005847 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005848 /* This default case should be unnecessary -- all the tokens
5849 * that XmlEntityValueTok() can return have their own explicit
5850 * cases -- but should be retained for safety. We do however
5851 * exclude it from the coverage statistics.
5852 *
5853 * LCOV_EXCL_START
5854 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005855 if (enc == parser->m_encoding)
5856 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005857 result = XML_ERROR_UNEXPECTED_STATE;
5858 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005859 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005860 }
5861 entityTextPtr = next;
5862 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005863endEntityValue:
5864#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005865 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005866#endif /* XML_DTD */
5867 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005868}
5869
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005870static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005871normalizeLines(XML_Char *s)
5872{
5873 XML_Char *p;
5874 for (;; s++) {
5875 if (*s == XML_T('\0'))
5876 return;
5877 if (*s == 0xD)
5878 break;
5879 }
5880 p = s;
5881 do {
5882 if (*s == 0xD) {
5883 *p++ = 0xA;
5884 if (*++s == 0xA)
5885 s++;
5886 }
5887 else
5888 *p++ = *s++;
5889 } while (*s);
5890 *p = XML_T('\0');
5891}
5892
5893static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005894reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5895 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005896{
5897 const XML_Char *target;
5898 XML_Char *data;
5899 const char *tem;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005900 if (!parser->m_processingInstructionHandler) {
5901 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005902 reportDefault(parser, enc, start, end);
5903 return 1;
5904 }
5905 start += enc->minBytesPerChar * 2;
5906 tem = start + XmlNameLength(enc, start);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005907 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005908 if (!target)
5909 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005910 poolFinish(&parser->m_tempPool);
5911 data = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005912 XmlSkipS(enc, tem),
5913 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005914 if (!data)
5915 return 0;
5916 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005917 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5918 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005919 return 1;
5920}
5921
5922static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005923reportComment(XML_Parser parser, const ENCODING *enc,
5924 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005925{
5926 XML_Char *data;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005927 if (!parser->m_commentHandler) {
5928 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005929 reportDefault(parser, enc, start, end);
5930 return 1;
5931 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005932 data = poolStoreString(&parser->m_tempPool,
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005933 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005934 start + enc->minBytesPerChar * 4,
5935 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005936 if (!data)
5937 return 0;
5938 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005939 parser->m_commentHandler(parser->m_handlerArg, data);
5940 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005941 return 1;
5942}
5943
5944static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005945reportDefault(XML_Parser parser, const ENCODING *enc,
5946 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005947{
5948 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005949 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005950 const char **eventPP;
5951 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005952 if (enc == parser->m_encoding) {
5953 eventPP = &parser->m_eventPtr;
5954 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005955 }
5956 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005957 /* To get here, two things must be true; the parser must be
5958 * using a character encoding that is not the same as the
5959 * encoding passed in, and the encoding passed in must need
5960 * conversion to the internal format (UTF-8 unless XML_UNICODE
5961 * is defined). The only occasions on which the encoding passed
5962 * in is not the same as the parser's encoding are when it is
5963 * the internal encoding (e.g. a previously defined parameter
5964 * entity, already converted to internal format). This by
5965 * definition doesn't need conversion, so the whole branch never
5966 * gets executed.
5967 *
5968 * For safety's sake we don't delete these lines and merely
5969 * exclude them from coverage statistics.
5970 *
5971 * LCOV_EXCL_START
5972 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005973 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5974 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005975 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005976 }
5977 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005978 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
5979 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005980 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005981 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005982 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02005983 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005984 }
5985 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07005986 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005987}
5988
5989
5990static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005991defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
5992 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005993{
5994 DEFAULT_ATTRIBUTE *att;
5995 if (value || isId) {
5996 /* The handling of default attributes gets messed up if we have
5997 a default which duplicates a non-default. */
5998 int i;
5999 for (i = 0; i < type->nDefaultAtts; i++)
6000 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006001 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006002 if (isId && !type->idAtt && !attId->xmlns)
6003 type->idAtt = attId;
6004 }
6005 if (type->nDefaultAtts == type->allocDefaultAtts) {
6006 if (type->allocDefaultAtts == 0) {
6007 type->allocDefaultAtts = 8;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006008 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(parser, type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006009 * sizeof(DEFAULT_ATTRIBUTE));
Benjamin Peterson4e211002018-06-26 19:25:45 -07006010 if (!type->defaultAtts) {
6011 type->allocDefaultAtts = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006012 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006013 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006014 }
6015 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006016 DEFAULT_ATTRIBUTE *temp;
6017 int count = type->allocDefaultAtts * 2;
6018 temp = (DEFAULT_ATTRIBUTE *)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006019 REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006020 if (temp == NULL)
6021 return 0;
6022 type->allocDefaultAtts = count;
6023 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006024 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006025 }
6026 att = type->defaultAtts + type->nDefaultAtts;
6027 att->id = attId;
6028 att->value = value;
6029 att->isCdata = isCdata;
6030 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006031 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006032 type->nDefaultAtts += 1;
6033 return 1;
6034}
6035
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006036static int
6037setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006038{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006039 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006040 const XML_Char *name;
6041 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006042 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006043 PREFIX *prefix;
6044 const XML_Char *s;
6045 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006046 if (!poolAppendChar(&dtd->pool, *s))
6047 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006048 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006049 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6050 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006051 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006052 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006053 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006054 return 0;
6055 if (prefix->name == poolStart(&dtd->pool))
6056 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006057 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006058 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006059 elementType->prefix = prefix;
6060
6061 }
6062 }
6063 return 1;
6064}
6065
6066static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006067getAttributeId(XML_Parser parser, const ENCODING *enc,
6068 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006069{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006070 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006071 ATTRIBUTE_ID *id;
6072 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006073 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6074 return NULL;
6075 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006076 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006077 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006078 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006079 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006080 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006081 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006082 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006083 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006084 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006085 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006086 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07006087 if (!parser->m_ns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006088 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006089 else if (name[0] == XML_T(ASCII_x)
6090 && name[1] == XML_T(ASCII_m)
6091 && name[2] == XML_T(ASCII_l)
6092 && name[3] == XML_T(ASCII_n)
6093 && name[4] == XML_T(ASCII_s)
6094 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006095 if (name[5] == XML_T('\0'))
6096 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006097 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006098 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006099 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006100 }
6101 else {
6102 int i;
6103 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006104 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006105 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006106 int j;
6107 for (j = 0; j < i; j++) {
6108 if (!poolAppendChar(&dtd->pool, name[j]))
6109 return NULL;
6110 }
6111 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6112 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006113 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006114 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07006115 if (!id->prefix)
6116 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006117 if (id->prefix->name == poolStart(&dtd->pool))
6118 poolFinish(&dtd->pool);
6119 else
6120 poolDiscard(&dtd->pool);
6121 break;
6122 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006123 }
6124 }
6125 }
6126 return id;
6127}
6128
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006129#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006130
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006131static const XML_Char *
6132getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006133{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006134 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006135 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006136 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006137
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006138 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006139 int i;
6140 int len;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006141 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006142 return NULL;
6143 len = dtd->defaultPrefix.binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006144 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006145 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006146 for (i = 0; i < len; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006147 if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006148 /* Because of memory caching, I don't believe this line can be
6149 * executed.
6150 *
6151 * This is part of a loop copying the default prefix binding
6152 * URI into the parser's temporary string pool. Previously,
6153 * that URI was copied into the same string pool, with a
6154 * terminating NUL character, as part of setContext(). When
6155 * the pool was cleared, that leaves a block definitely big
6156 * enough to hold the URI on the free block list of the pool.
6157 * The URI copy in getContext() therefore cannot run out of
6158 * memory.
6159 *
6160 * If the pool is used between the setContext() and
6161 * getContext() calls, the worst it can do is leave a bigger
6162 * block on the front of the free list. Given that this is
6163 * all somewhat inobvious and program logic can be changed, we
6164 * don't delete the line but we do exclude it from the test
6165 * coverage statistics.
6166 */
6167 return NULL; /* LCOV_EXCL_LINE */
6168 }
6169 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006170 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006171 }
6172
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006173 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006174 for (;;) {
6175 int i;
6176 int len;
6177 const XML_Char *s;
6178 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6179 if (!prefix)
6180 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006181 if (!prefix->binding) {
6182 /* This test appears to be (justifiable) paranoia. There does
6183 * not seem to be a way of injecting a prefix without a binding
6184 * that doesn't get errored long before this function is called.
6185 * The test should remain for safety's sake, so we instead
6186 * exclude the following line from the coverage statistics.
6187 */
6188 continue; /* LCOV_EXCL_LINE */
6189 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006190 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006191 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006192 for (s = prefix->name; *s; s++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006193 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006194 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006195 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006196 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006197 len = prefix->binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006198 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006199 len--;
6200 for (i = 0; i < len; i++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006201 if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006202 return NULL;
6203 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006204 }
6205
6206
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006207 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006208 for (;;) {
6209 const XML_Char *s;
6210 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6211 if (!e)
6212 break;
6213 if (!e->open)
6214 continue;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006215 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006216 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006217 for (s = e->name; *s; s++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006218 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006219 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006220 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006221 }
6222
Benjamin Peterson4e211002018-06-26 19:25:45 -07006223 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006224 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006225 return parser->m_tempPool.start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006226}
6227
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006228static XML_Bool
6229setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006230{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006231 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006232 const XML_Char *s = context;
6233
6234 while (*context != XML_T('\0')) {
6235 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6236 ENTITY *e;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006237 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006238 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006239 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006240 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006241 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006242 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006243 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006244 context = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006245 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006246 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006247 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006248 PREFIX *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006249 if (poolLength(&parser->m_tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006250 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006251 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006252 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006253 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006254 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006255 sizeof(PREFIX));
6256 if (!prefix)
6257 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006258 if (prefix->name == poolStart(&parser->m_tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006259 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6260 if (!prefix->name)
6261 return XML_FALSE;
6262 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006263 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006264 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006265 for (context = s + 1;
6266 *context != CONTEXT_SEP && *context != XML_T('\0');
6267 context++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006268 if (!poolAppendChar(&parser->m_tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006269 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006270 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006271 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006272 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6273 &parser->m_inheritedBindings) != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006274 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006275 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006276 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006277 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006278 s = context;
6279 }
6280 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006281 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006282 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006283 s++;
6284 }
6285 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006286 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006287}
6288
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006289static void FASTCALL
6290normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006291{
6292 XML_Char *p = publicId;
6293 XML_Char *s;
6294 for (s = publicId; *s; s++) {
6295 switch (*s) {
6296 case 0x20:
6297 case 0xD:
6298 case 0xA:
6299 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006300 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301 break;
6302 default:
6303 *p++ = *s;
6304 }
6305 }
6306 if (p != publicId && p[-1] == 0x20)
6307 --p;
6308 *p = XML_T('\0');
6309}
6310
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006311static DTD *
6312dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006313{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006314 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6315 if (p == NULL)
6316 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006317 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006318 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006319 hashTableInit(&(p->generalEntities), ms);
6320 hashTableInit(&(p->elementTypes), ms);
6321 hashTableInit(&(p->attributeIds), ms);
6322 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006323#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006324 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006325 hashTableInit(&(p->paramEntities), ms);
6326#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006327 p->defaultPrefix.name = NULL;
6328 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006329
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006330 p->in_eldecl = XML_FALSE;
6331 p->scaffIndex = NULL;
6332 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006333 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006334 p->scaffSize = 0;
6335 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006336 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006337
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006338 p->keepProcessing = XML_TRUE;
6339 p->hasParamEntityRefs = XML_FALSE;
6340 p->standalone = XML_FALSE;
6341 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006342}
6343
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006344static void
6345dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006346{
6347 HASH_TABLE_ITER iter;
6348 hashTableIterInit(&iter, &(p->elementTypes));
6349 for (;;) {
6350 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6351 if (!e)
6352 break;
6353 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006354 ms->free_fcn(e->defaultAtts);
6355 }
6356 hashTableClear(&(p->generalEntities));
6357#ifdef XML_DTD
6358 p->paramEntityRead = XML_FALSE;
6359 hashTableClear(&(p->paramEntities));
6360#endif /* XML_DTD */
6361 hashTableClear(&(p->elementTypes));
6362 hashTableClear(&(p->attributeIds));
6363 hashTableClear(&(p->prefixes));
6364 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006365 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006366 p->defaultPrefix.name = NULL;
6367 p->defaultPrefix.binding = NULL;
6368
6369 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006370
6371 ms->free_fcn(p->scaffIndex);
6372 p->scaffIndex = NULL;
6373 ms->free_fcn(p->scaffold);
6374 p->scaffold = NULL;
6375
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006376 p->scaffLevel = 0;
6377 p->scaffSize = 0;
6378 p->scaffCount = 0;
6379 p->contentStringLen = 0;
6380
6381 p->keepProcessing = XML_TRUE;
6382 p->hasParamEntityRefs = XML_FALSE;
6383 p->standalone = XML_FALSE;
6384}
6385
6386static void
6387dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6388{
6389 HASH_TABLE_ITER iter;
6390 hashTableIterInit(&iter, &(p->elementTypes));
6391 for (;;) {
6392 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6393 if (!e)
6394 break;
6395 if (e->allocDefaultAtts != 0)
6396 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006397 }
6398 hashTableDestroy(&(p->generalEntities));
6399#ifdef XML_DTD
6400 hashTableDestroy(&(p->paramEntities));
6401#endif /* XML_DTD */
6402 hashTableDestroy(&(p->elementTypes));
6403 hashTableDestroy(&(p->attributeIds));
6404 hashTableDestroy(&(p->prefixes));
6405 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006406 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006407 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006408 ms->free_fcn(p->scaffIndex);
6409 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006410 }
6411 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006412}
6413
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006414/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6415 The new DTD has already been initialized.
6416*/
6417static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006418dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006419{
6420 HASH_TABLE_ITER iter;
6421
6422 /* Copy the prefix table. */
6423
6424 hashTableIterInit(&iter, &(oldDtd->prefixes));
6425 for (;;) {
6426 const XML_Char *name;
6427 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6428 if (!oldP)
6429 break;
6430 name = poolCopyString(&(newDtd->pool), oldP->name);
6431 if (!name)
6432 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006433 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006434 return 0;
6435 }
6436
6437 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6438
6439 /* Copy the attribute id table. */
6440
6441 for (;;) {
6442 ATTRIBUTE_ID *newA;
6443 const XML_Char *name;
6444 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6445
6446 if (!oldA)
6447 break;
6448 /* Remember to allocate the scratch byte before the name. */
6449 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6450 return 0;
6451 name = poolCopyString(&(newDtd->pool), oldA->name);
6452 if (!name)
6453 return 0;
6454 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006455 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006456 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006457 if (!newA)
6458 return 0;
6459 newA->maybeTokenized = oldA->maybeTokenized;
6460 if (oldA->prefix) {
6461 newA->xmlns = oldA->xmlns;
6462 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006463 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006464 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006465 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006466 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006467 }
6468 }
6469
6470 /* Copy the element type table. */
6471
6472 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6473
6474 for (;;) {
6475 int i;
6476 ELEMENT_TYPE *newE;
6477 const XML_Char *name;
6478 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6479 if (!oldE)
6480 break;
6481 name = poolCopyString(&(newDtd->pool), oldE->name);
6482 if (!name)
6483 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006484 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006485 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006486 if (!newE)
6487 return 0;
6488 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006489 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6490 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6491 if (!newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006492 return 0;
6493 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006494 }
6495 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006496 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006497 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006498 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6499 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006500 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006501 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006502 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006503 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006504 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006505 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6506 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006507 newE->defaultAtts[i].value
6508 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6509 if (!newE->defaultAtts[i].value)
6510 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006511 }
6512 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006513 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006514 }
6515 }
6516
6517 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006518 if (!copyEntityTable(oldParser,
6519 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006520 &(newDtd->pool),
6521 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006522 return 0;
6523
6524#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006525 if (!copyEntityTable(oldParser,
6526 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006527 &(newDtd->pool),
6528 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006529 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006530 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006531#endif /* XML_DTD */
6532
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006533 newDtd->keepProcessing = oldDtd->keepProcessing;
6534 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006535 newDtd->standalone = oldDtd->standalone;
6536
6537 /* Don't want deep copying for scaffolding */
6538 newDtd->in_eldecl = oldDtd->in_eldecl;
6539 newDtd->scaffold = oldDtd->scaffold;
6540 newDtd->contentStringLen = oldDtd->contentStringLen;
6541 newDtd->scaffSize = oldDtd->scaffSize;
6542 newDtd->scaffLevel = oldDtd->scaffLevel;
6543 newDtd->scaffIndex = oldDtd->scaffIndex;
6544
6545 return 1;
6546} /* End dtdCopy */
6547
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006548static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006549copyEntityTable(XML_Parser oldParser,
6550 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006551 STRING_POOL *newPool,
6552 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006553{
6554 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006555 const XML_Char *cachedOldBase = NULL;
6556 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006557
6558 hashTableIterInit(&iter, oldTable);
6559
6560 for (;;) {
6561 ENTITY *newE;
6562 const XML_Char *name;
6563 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6564 if (!oldE)
6565 break;
6566 name = poolCopyString(newPool, oldE->name);
6567 if (!name)
6568 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006569 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006570 if (!newE)
6571 return 0;
6572 if (oldE->systemId) {
6573 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6574 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006575 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006576 newE->systemId = tem;
6577 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006578 if (oldE->base == cachedOldBase)
6579 newE->base = cachedNewBase;
6580 else {
6581 cachedOldBase = oldE->base;
6582 tem = poolCopyString(newPool, cachedOldBase);
6583 if (!tem)
6584 return 0;
6585 cachedNewBase = newE->base = tem;
6586 }
6587 }
6588 if (oldE->publicId) {
6589 tem = poolCopyString(newPool, oldE->publicId);
6590 if (!tem)
6591 return 0;
6592 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006593 }
6594 }
6595 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006596 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6597 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006598 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006599 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006600 newE->textPtr = tem;
6601 newE->textLen = oldE->textLen;
6602 }
6603 if (oldE->notation) {
6604 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6605 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006606 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006607 newE->notation = tem;
6608 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006609 newE->is_param = oldE->is_param;
6610 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006611 }
6612 return 1;
6613}
6614
Fred Drake08317ae2003-10-21 15:38:55 +00006615#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006616
Fred Drake08317ae2003-10-21 15:38:55 +00006617static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006618keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006619{
6620 for (; *s1 == *s2; s1++, s2++)
6621 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006622 return XML_TRUE;
6623 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006624}
6625
Victor Stinner5ff71322017-06-21 14:39:22 +02006626static size_t
6627keylen(KEY s)
6628{
6629 size_t len = 0;
6630 for (; *s; s++, len++);
6631 return len;
6632}
6633
6634static void
6635copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6636{
6637 key->k[0] = 0;
6638 key->k[1] = get_hash_secret_salt(parser);
6639}
6640
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006641static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006642hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006643{
Victor Stinner5ff71322017-06-21 14:39:22 +02006644 struct siphash state;
6645 struct sipkey key;
6646 (void)sip_tobin;
6647 (void)sip24_valid;
6648 copy_salt_to_sipkey(parser, &key);
6649 sip24_init(&state, &key);
6650 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6651 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006652}
6653
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006654static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006655lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006656{
6657 size_t i;
6658 if (table->size == 0) {
6659 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006660 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006661 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006662 table->power = INIT_POWER;
6663 /* table->size is a power of 2 */
6664 table->size = (size_t)1 << INIT_POWER;
6665 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006666 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006667 if (!table->v) {
6668 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006669 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006670 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006671 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006672 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006673 }
6674 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006675 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006676 unsigned long mask = (unsigned long)table->size - 1;
6677 unsigned char step = 0;
6678 i = h & mask;
6679 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006680 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006681 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006682 if (!step)
6683 step = PROBE_STEP(h, mask, table->power);
6684 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006685 }
6686 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006687 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006688
6689 /* check for overflow (table is half full) */
6690 if (table->used >> (table->power - 1)) {
6691 unsigned char newPower = table->power + 1;
6692 size_t newSize = (size_t)1 << newPower;
6693 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006694 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006695 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006696 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006697 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006698 memset(newV, 0, tsize);
6699 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006700 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006701 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006702 size_t j = newHash & newMask;
6703 step = 0;
6704 while (newV[j]) {
6705 if (!step)
6706 step = PROBE_STEP(newHash, newMask, newPower);
6707 j < step ? (j += newSize - step) : (j -= step);
6708 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006709 newV[j] = table->v[i];
6710 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006711 table->mem->free_fcn(table->v);
6712 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006713 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006714 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006715 i = h & newMask;
6716 step = 0;
6717 while (table->v[i]) {
6718 if (!step)
6719 step = PROBE_STEP(h, newMask, newPower);
6720 i < step ? (i += newSize - step) : (i -= step);
6721 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006722 }
6723 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006724 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006725 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006726 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006727 memset(table->v[i], 0, createSize);
6728 table->v[i]->name = name;
6729 (table->used)++;
6730 return table->v[i];
6731}
6732
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006733static void FASTCALL
6734hashTableClear(HASH_TABLE *table)
6735{
6736 size_t i;
6737 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006738 table->mem->free_fcn(table->v[i]);
6739 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006740 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006741 table->used = 0;
6742}
6743
6744static void FASTCALL
6745hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006746{
6747 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006748 for (i = 0; i < table->size; i++)
6749 table->mem->free_fcn(table->v[i]);
6750 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006751}
6752
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006753static void FASTCALL
6754hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006755{
Fred Drake08317ae2003-10-21 15:38:55 +00006756 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006757 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006758 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006759 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006760 p->mem = ms;
6761}
6762
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006763static void FASTCALL
6764hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006765{
6766 iter->p = table->v;
6767 iter->end = iter->p + table->size;
6768}
6769
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006770static NAMED * FASTCALL
6771hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006772{
6773 while (iter->p != iter->end) {
6774 NAMED *tem = *(iter->p)++;
6775 if (tem)
6776 return tem;
6777 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006778 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006779}
6780
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006781static void FASTCALL
6782poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006783{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006784 pool->blocks = NULL;
6785 pool->freeBlocks = NULL;
6786 pool->start = NULL;
6787 pool->ptr = NULL;
6788 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006789 pool->mem = ms;
6790}
6791
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006792static void FASTCALL
6793poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006794{
6795 if (!pool->freeBlocks)
6796 pool->freeBlocks = pool->blocks;
6797 else {
6798 BLOCK *p = pool->blocks;
6799 while (p) {
6800 BLOCK *tem = p->next;
6801 p->next = pool->freeBlocks;
6802 pool->freeBlocks = p;
6803 p = tem;
6804 }
6805 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006806 pool->blocks = NULL;
6807 pool->start = NULL;
6808 pool->ptr = NULL;
6809 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006810}
6811
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006812static void FASTCALL
6813poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006814{
6815 BLOCK *p = pool->blocks;
6816 while (p) {
6817 BLOCK *tem = p->next;
6818 pool->mem->free_fcn(p);
6819 p = tem;
6820 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006821 p = pool->freeBlocks;
6822 while (p) {
6823 BLOCK *tem = p->next;
6824 pool->mem->free_fcn(p);
6825 p = tem;
6826 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006827}
6828
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006829static XML_Char *
6830poolAppend(STRING_POOL *pool, const ENCODING *enc,
6831 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006832{
6833 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006834 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006835 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006836 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6837 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006838 break;
6839 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006840 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006841 }
6842 return pool->start;
6843}
6844
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006845static const XML_Char * FASTCALL
6846poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006847{
6848 do {
6849 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006850 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006851 } while (*s++);
6852 s = pool->start;
6853 poolFinish(pool);
6854 return s;
6855}
6856
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006857static const XML_Char *
6858poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006859{
Victor Stinner93d0cb52017-08-18 23:43:54 +02006860 if (!pool->ptr && !poolGrow(pool)) {
6861 /* The following line is unreachable given the current usage of
6862 * poolCopyStringN(). Currently it is called from exactly one
6863 * place to copy the text of a simple general entity. By that
6864 * point, the name of the entity is already stored in the pool, so
6865 * pool->ptr cannot be NULL.
6866 *
6867 * If poolCopyStringN() is used elsewhere as it well might be,
6868 * this line may well become executable again. Regardless, this
6869 * sort of check shouldn't be removed lightly, so we just exclude
6870 * it from the coverage statistics.
6871 */
6872 return NULL; /* LCOV_EXCL_LINE */
6873 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006874 for (; n > 0; --n, s++) {
6875 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006876 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006877 }
6878 s = pool->start;
6879 poolFinish(pool);
6880 return s;
6881}
6882
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006883static const XML_Char * FASTCALL
6884poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006885{
6886 while (*s) {
6887 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006888 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006889 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006890 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006891 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006892}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006893
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006894static XML_Char *
6895poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6896 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006897{
6898 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006899 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006900 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006901 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006902 *(pool->ptr)++ = 0;
6903 return pool->start;
6904}
6905
Victor Stinner5ff71322017-06-21 14:39:22 +02006906static size_t
6907poolBytesToAllocateFor(int blockSize)
6908{
6909 /* Unprotected math would be:
6910 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6911 **
6912 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6913 ** For a + b * c we check b * c in isolation first, so that addition of a
6914 ** on top has no chance of making us accept a small non-negative number
6915 */
6916 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6917
6918 if (blockSize <= 0)
6919 return 0;
6920
6921 if (blockSize > (int)(INT_MAX / stretch))
6922 return 0;
6923
6924 {
6925 const int stretchedBlockSize = blockSize * (int)stretch;
6926 const int bytesToAllocate = (int)(
6927 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6928 if (bytesToAllocate < 0)
6929 return 0;
6930
6931 return (size_t)bytesToAllocate;
6932 }
6933}
6934
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006935static XML_Bool FASTCALL
6936poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006937{
6938 if (pool->freeBlocks) {
6939 if (pool->start == 0) {
6940 pool->blocks = pool->freeBlocks;
6941 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006942 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006943 pool->start = pool->blocks->s;
6944 pool->end = pool->start + pool->blocks->size;
6945 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006946 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006947 }
6948 if (pool->end - pool->start < pool->freeBlocks->size) {
6949 BLOCK *tem = pool->freeBlocks->next;
6950 pool->freeBlocks->next = pool->blocks;
6951 pool->blocks = pool->freeBlocks;
6952 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006953 memcpy(pool->blocks->s, pool->start,
6954 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006955 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6956 pool->start = pool->blocks->s;
6957 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006958 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006959 }
6960 }
6961 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006962 BLOCK *temp;
6963 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02006964 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006965
Benjamin Peterson4e211002018-06-26 19:25:45 -07006966 /* NOTE: Needs to be calculated prior to calling `realloc`
6967 to avoid dangling pointers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +02006968 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6969
6970 if (blockSize < 0) {
6971 /* This condition traps a situation where either more than
6972 * INT_MAX/2 bytes have already been allocated. This isn't
6973 * readily testable, since it is unlikely that an average
6974 * machine will have that much memory, so we exclude it from the
6975 * coverage statistics.
6976 */
6977 return XML_FALSE; /* LCOV_EXCL_LINE */
6978 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02006979
Victor Stinner5ff71322017-06-21 14:39:22 +02006980 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6981 if (bytesToAllocate == 0)
6982 return XML_FALSE;
6983
Victor Stinner23ec4b52017-06-15 00:54:36 +02006984 temp = (BLOCK *)
Victor Stinner5ff71322017-06-21 14:39:22 +02006985 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006986 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006987 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006988 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006989 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006990 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006991 pool->start = pool->blocks->s;
6992 pool->end = pool->start + blockSize;
6993 }
6994 else {
6995 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006996 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02006997 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006998
Victor Stinner93d0cb52017-08-18 23:43:54 +02006999 if (blockSize < 0) {
7000 /* This condition traps a situation where either more than
7001 * INT_MAX bytes have already been allocated (which is prevented
7002 * by various pieces of program logic, not least this one, never
7003 * mind the unlikelihood of actually having that much memory) or
7004 * the pool control fields have been corrupted (which could
7005 * conceivably happen in an extremely buggy user handler
7006 * function). Either way it isn't readily testable, so we
7007 * exclude it from the coverage statistics.
7008 */
7009 return XML_FALSE; /* LCOV_EXCL_LINE */
7010 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007011
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007012 if (blockSize < INIT_BLOCK_SIZE)
7013 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02007014 else {
7015 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7016 if ((int)((unsigned)blockSize * 2U) < 0) {
7017 return XML_FALSE;
7018 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007019 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02007020 }
7021
7022 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7023 if (bytesToAllocate == 0)
7024 return XML_FALSE;
7025
7026 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007027 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007028 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007029 tem->size = blockSize;
7030 tem->next = pool->blocks;
7031 pool->blocks = tem;
7032 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007033 memcpy(tem->s, pool->start,
7034 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007035 pool->ptr = tem->s + (pool->ptr - pool->start);
7036 pool->start = tem->s;
7037 pool->end = tem->s + blockSize;
7038 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007039 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007040}
7041
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007042static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007043nextScaffoldPart(XML_Parser parser)
7044{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007045 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007046 CONTENT_SCAFFOLD * me;
7047 int next;
7048
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007049 if (!dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007050 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007051 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007052 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007053 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007054 }
7055
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007056 if (dtd->scaffCount >= dtd->scaffSize) {
7057 CONTENT_SCAFFOLD *temp;
7058 if (dtd->scaffold) {
7059 temp = (CONTENT_SCAFFOLD *)
Benjamin Peterson4e211002018-06-26 19:25:45 -07007060 REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007061 if (temp == NULL)
7062 return -1;
7063 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007064 }
7065 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007066 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007067 * sizeof(CONTENT_SCAFFOLD));
7068 if (temp == NULL)
7069 return -1;
7070 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007071 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007072 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007073 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007074 next = dtd->scaffCount++;
7075 me = &dtd->scaffold[next];
7076 if (dtd->scaffLevel) {
7077 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007078 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007079 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007080 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007081 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007082 parent->firstchild = next;
7083 parent->lastchild = next;
7084 parent->childcnt++;
7085 }
7086 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7087 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007088}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007089
7090static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007091build_node(XML_Parser parser,
7092 int src_node,
7093 XML_Content *dest,
7094 XML_Content **contpos,
7095 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007096{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007097 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007098 dest->type = dtd->scaffold[src_node].type;
7099 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007100 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007101 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007102 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007103 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007104 for (;;) {
7105 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007106 if (!*src)
7107 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007108 src++;
7109 }
7110 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007111 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007112 }
7113 else {
7114 unsigned int i;
7115 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007116 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007117 dest->children = *contpos;
7118 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007119 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7120 i < dest->numchildren;
7121 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007122 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7123 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007124 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007125 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007126}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007127
7128static XML_Content *
7129build_model (XML_Parser parser)
7130{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007131 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007132 XML_Content *ret;
7133 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007134 XML_Char * str;
7135 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7136 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007137
Benjamin Peterson4e211002018-06-26 19:25:45 -07007138 ret = (XML_Content *)MALLOC(parser, allocsize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007139 if (!ret)
7140 return NULL;
7141
7142 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007143 cpos = &ret[1];
7144
7145 build_node(parser, 0, ret, &cpos, &str);
7146 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007147}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007148
7149static ELEMENT_TYPE *
7150getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007151 const ENCODING *enc,
7152 const char *ptr,
7153 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007154{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007155 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007156 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007157 ELEMENT_TYPE *ret;
7158
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007159 if (!name)
7160 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07007161 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007162 if (!ret)
7163 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007164 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007165 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007166 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007167 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007168 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007169 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007170 }
7171 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007172}
Victor Stinner93d0cb52017-08-18 23:43:54 +02007173
7174static XML_Char *
7175copyString(const XML_Char *s,
7176 const XML_Memory_Handling_Suite *memsuite)
7177{
7178 int charsRequired = 0;
7179 XML_Char *result;
7180
7181 /* First determine how long the string is */
7182 while (s[charsRequired] != 0) {
7183 charsRequired++;
7184 }
7185 /* Include the terminator */
7186 charsRequired++;
7187
7188 /* Now allocate space for the copy */
7189 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7190 if (result == NULL)
7191 return NULL;
7192 /* Copy the original into place */
7193 memcpy(result, s, charsRequired * sizeof(XML_Char));
7194 return result;
7195}