blob: 9c0987f4f6d8749d2d3a0885219e366c1ac67393 [file] [log] [blame]
Benjamin Peterson3b03b092019-06-27 20:54:44 -07001/* 69df5be70289a11fb834869ce4a91c23c1d9dd04baffcbd10e86742d149a080c (2.2.7+)
Victor Stinner759e30e2017-09-05 01:58:08 +02002 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
Victor Stinner5ff71322017-06-21 14:39:22 +02008
Victor Stinner759e30e2017-09-05 01:58:08 +02009 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000031*/
32
Victor Stinner93d0cb52017-08-18 23:43:54 +020033#if !defined(_GNU_SOURCE)
34# define _GNU_SOURCE 1 /* syscall prototype */
35#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020036
Victor Stinner23ec4b52017-06-15 00:54:36 +020037#include <stddef.h>
38#include <string.h> /* memset(), memcpy() */
39#include <assert.h>
40#include <limits.h> /* UINT_MAX */
Victor Stinner5ff71322017-06-21 14:39:22 +020041#include <stdio.h> /* fprintf */
42#include <stdlib.h> /* getenv */
Victor Stinner23ec4b52017-06-15 00:54:36 +020043
Victor Stinner5ff71322017-06-21 14:39:22 +020044#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +020045#define getpid GetCurrentProcessId
46#else
47#include <sys/time.h> /* gettimeofday() */
48#include <sys/types.h> /* getpid() */
49#include <unistd.h> /* getpid() */
Victor Stinner93d0cb52017-08-18 23:43:54 +020050#include <fcntl.h> /* O_RDONLY */
51#include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020052#endif
53
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070054#define XML_BUILDING_EXPAT 1
55
Victor Stinner5ff71322017-06-21 14:39:22 +020056#ifdef _WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070057#include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070058#elif defined(HAVE_EXPAT_CONFIG_H)
59#include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020060#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010061
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070062#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000063#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020064#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000065
Victor Stinner93d0cb52017-08-18 23:43:54 +020066#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67# if defined(HAVE_GETRANDOM)
68# include <sys/random.h> /* getrandom */
69# else
70# include <unistd.h> /* syscall */
71# include <sys/syscall.h> /* SYS_getrandom */
72# endif
73# if ! defined(GRND_NONBLOCK)
74# define GRND_NONBLOCK 0x0001
75# endif /* defined(GRND_NONBLOCK) */
76#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
77
78#if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80# include <bsd/stdlib.h>
81#endif
82
83#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
85#endif
86
87#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
90 && !defined(_WIN32) \
91 && !defined(XML_POOR_ENTROPY)
92# error \
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
95 \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
105 \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
108 \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
111#endif
112
113
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000114#ifdef XML_UNICODE
115#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116#define XmlConvert XmlUtf16Convert
117#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700120/* Using pointer subtraction to convert to integer type. */
121#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000122typedef unsigned short ICHAR;
123#else
124#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125#define XmlConvert XmlUtf8Convert
126#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128#define XmlEncode XmlUtf8Encode
129#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
130typedef char ICHAR;
131#endif
132
133
134#ifndef XML_NS
135
136#define XmlInitEncodingNS XmlInitEncoding
137#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138#undef XmlGetInternalEncodingNS
139#define XmlGetInternalEncodingNS XmlGetInternalEncoding
140#define XmlParseXmlDeclNS XmlParseXmlDecl
141
142#endif
143
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000144#ifdef XML_UNICODE
145
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000146#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000147#define XML_T(x) (const wchar_t)x
148#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000149#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000150#define XML_T(x) (const unsigned short)x
151#define XML_L(x) x
152#endif
153
154#else
155
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000156#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000157#define XML_L(x) x
158
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000159#endif
160
161/* Round up n to be a multiple of sz, where sz is a power of 2. */
162#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
163
Benjamin Peterson5033aa72018-09-10 21:04:00 -0700164/* Do safe (NULL-aware) pointer arithmetic */
165#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
166
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000167#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000168#include "xmltok.h"
169#include "xmlrole.h"
170
171typedef const XML_Char *KEY;
172
173typedef struct {
174 KEY name;
175} NAMED;
176
177typedef struct {
178 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000179 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000180 size_t size;
181 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000182 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000183} HASH_TABLE;
184
Victor Stinner5ff71322017-06-21 14:39:22 +0200185static size_t
186keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000187
Victor Stinner5ff71322017-06-21 14:39:22 +0200188static void
189copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
Fred Drake08317ae2003-10-21 15:38:55 +0000190
191/* For probing (after a collision) we need a step size relative prime
192 to the hash table size, which is a power of 2. We use double-hashing,
193 since we can calculate a second hash value cheaply by taking those bits
194 of the first hash value that were discarded (masked out) when the table
195 index was calculated: index = hash & mask, where mask = table->size - 1.
196 We limit the maximum step size to table->size / 4 (mask >> 2) and make
197 it odd, since odd numbers are always relative prime to a power of 2.
198*/
199#define SECOND_HASH(hash, mask, power) \
200 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
201#define PROBE_STEP(hash, mask, power) \
202 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
203
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000204typedef struct {
205 NAMED **p;
206 NAMED **end;
207} HASH_TABLE_ITER;
208
209#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
210#define INIT_DATA_BUF_SIZE 1024
211#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000212#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000213#define INIT_BLOCK_SIZE 1024
214#define INIT_BUFFER_SIZE 1024
215
216#define EXPAND_SPARE 24
217
218typedef struct binding {
219 struct prefix *prefix;
220 struct binding *nextTagBinding;
221 struct binding *prevPrefixBinding;
222 const struct attribute_id *attId;
223 XML_Char *uri;
224 int uriLen;
225 int uriAlloc;
226} BINDING;
227
228typedef struct prefix {
229 const XML_Char *name;
230 BINDING *binding;
231} PREFIX;
232
233typedef struct {
234 const XML_Char *str;
235 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000236 const XML_Char *prefix;
237 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000238 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000239 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000240} TAG_NAME;
241
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000242/* TAG represents an open element.
243 The name of the element is stored in both the document and API
244 encodings. The memory buffer 'buf' is a separately-allocated
245 memory area which stores the name. During the XML_Parse()/
246 XMLParseBuffer() when the element is open, the memory for the 'raw'
247 version of the name (in the document encoding) is shared with the
248 document buffer. If the element is open across calls to
249 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
250 contain the 'raw' name as well.
251
252 A parser re-uses these structures, maintaining a list of allocated
253 TAG objects in a free list.
254*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000255typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000256 struct tag *parent; /* parent of this element */
257 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000258 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000259 TAG_NAME name; /* tagName in the API encoding */
260 char *buf; /* buffer for name components */
261 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000262 BINDING *bindings;
263} TAG;
264
265typedef struct {
266 const XML_Char *name;
267 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000268 int textLen; /* length in XML_Chars */
269 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000270 const XML_Char *systemId;
271 const XML_Char *base;
272 const XML_Char *publicId;
273 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000274 XML_Bool open;
275 XML_Bool is_param;
276 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000277} ENTITY;
278
279typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000280 enum XML_Content_Type type;
281 enum XML_Content_Quant quant;
282 const XML_Char * name;
283 int firstchild;
284 int lastchild;
285 int childcnt;
286 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000287} CONTENT_SCAFFOLD;
288
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000289#define INIT_SCAFFOLD_ELEMENTS 32
290
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000291typedef struct block {
292 struct block *next;
293 int size;
294 XML_Char s[1];
295} BLOCK;
296
297typedef struct {
298 BLOCK *blocks;
299 BLOCK *freeBlocks;
300 const XML_Char *end;
301 XML_Char *ptr;
302 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000303 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000304} STRING_POOL;
305
306/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000307 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000308typedef struct attribute_id {
309 XML_Char *name;
310 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000311 XML_Bool maybeTokenized;
312 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000313} ATTRIBUTE_ID;
314
315typedef struct {
316 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000317 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000318 const XML_Char *value;
319} DEFAULT_ATTRIBUTE;
320
321typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000322 unsigned long version;
323 unsigned long hash;
324 const XML_Char *uriName;
325} NS_ATT;
326
327typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000328 const XML_Char *name;
329 PREFIX *prefix;
330 const ATTRIBUTE_ID *idAtt;
331 int nDefaultAtts;
332 int allocDefaultAtts;
333 DEFAULT_ATTRIBUTE *defaultAtts;
334} ELEMENT_TYPE;
335
336typedef struct {
337 HASH_TABLE generalEntities;
338 HASH_TABLE elementTypes;
339 HASH_TABLE attributeIds;
340 HASH_TABLE prefixes;
341 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000342 STRING_POOL entityValuePool;
343 /* false once a parameter entity reference has been skipped */
344 XML_Bool keepProcessing;
345 /* true once an internal or external PE reference has been encountered;
346 this includes the reference to an external subset */
347 XML_Bool hasParamEntityRefs;
348 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000349#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000350 /* indicates if external PE has been read */
351 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000352 HASH_TABLE paramEntities;
353#endif /* XML_DTD */
354 PREFIX defaultPrefix;
355 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000356 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000357 CONTENT_SCAFFOLD *scaffold;
358 unsigned contentStringLen;
359 unsigned scaffSize;
360 unsigned scaffCount;
361 int scaffLevel;
362 int *scaffIndex;
363} DTD;
364
365typedef struct open_internal_entity {
366 const char *internalEventPtr;
367 const char *internalEventEndPtr;
368 struct open_internal_entity *next;
369 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000370 int startTagLevel;
371 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000372} OPEN_INTERNAL_ENTITY;
373
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000374typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
375 const char *start,
376 const char *end,
377 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000378
379static Processor prologProcessor;
380static Processor prologInitProcessor;
381static Processor contentProcessor;
382static Processor cdataSectionProcessor;
383#ifdef XML_DTD
384static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000385static Processor externalParEntProcessor;
386static Processor externalParEntInitProcessor;
387static Processor entityValueProcessor;
388static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000389#endif /* XML_DTD */
390static Processor epilogProcessor;
391static Processor errorProcessor;
392static Processor externalEntityInitProcessor;
393static Processor externalEntityInitProcessor2;
394static Processor externalEntityInitProcessor3;
395static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000396static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000397
398static enum XML_Error
399handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
400static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000401processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000402 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000403static enum XML_Error
404initializeEncoding(XML_Parser parser);
405static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700406doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
407 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000408 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000409static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700410processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000411 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000412static enum XML_Error
413doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700414 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000415 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000416static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000417doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000418 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000419#ifdef XML_DTD
420static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000421doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000422 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000423#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000424
Victor Stinner5ff71322017-06-21 14:39:22 +0200425static void
426freeBindings(XML_Parser parser, BINDING *bindings);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000427static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000428storeAtts(XML_Parser parser, const ENCODING *, const char *s,
429 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000430static enum XML_Error
431addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
432 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000433static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700434defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000435 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000436static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000437storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
438 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000439static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000440appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
441 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000442static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000443getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
444 const char *end);
445static int
446setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000447static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000448storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
449 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000450static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000451reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
452 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000453static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000454reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
455 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000456static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000457reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
458 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000459
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000460static const XML_Char * getContext(XML_Parser parser);
461static XML_Bool
462setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000463
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000464static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000465
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
Benjamin Peterson4e211002018-06-26 19:25:45 -0700467/* do not call if m_parentParser != NULL */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000468static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
469static void
470dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
471static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700472dtdCopy(XML_Parser oldParser,
473 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000474static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700475copyEntityTable(XML_Parser oldParser,
476 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000477static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700478lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000479static void FASTCALL
480hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
481static void FASTCALL hashTableClear(HASH_TABLE *);
482static void FASTCALL hashTableDestroy(HASH_TABLE *);
483static void FASTCALL
484hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
485static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000486
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000487static void FASTCALL
488poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
489static void FASTCALL poolClear(STRING_POOL *);
490static void FASTCALL poolDestroy(STRING_POOL *);
491static XML_Char *
492poolAppend(STRING_POOL *pool, const ENCODING *enc,
493 const char *ptr, const char *end);
494static XML_Char *
495poolStoreString(STRING_POOL *pool, const ENCODING *enc,
496 const char *ptr, const char *end);
497static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
498static const XML_Char * FASTCALL
499poolCopyString(STRING_POOL *pool, const XML_Char *s);
500static const XML_Char *
501poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
502static const XML_Char * FASTCALL
503poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000504
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000505static int FASTCALL nextScaffoldPart(XML_Parser parser);
506static XML_Content * build_model(XML_Parser parser);
507static ELEMENT_TYPE *
508getElementType(XML_Parser parser, const ENCODING *enc,
509 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000510
Victor Stinner93d0cb52017-08-18 23:43:54 +0200511static XML_Char *copyString(const XML_Char *s,
512 const XML_Memory_Handling_Suite *memsuite);
513
Victor Stinner23ec4b52017-06-15 00:54:36 +0200514static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700515static XML_Bool startParsing(XML_Parser parser);
516
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000517static XML_Parser
518parserCreate(const XML_Char *encodingName,
519 const XML_Memory_Handling_Suite *memsuite,
520 const XML_Char *nameSep,
521 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700522
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000523static void
524parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000525
526#define poolStart(pool) ((pool)->start)
527#define poolEnd(pool) ((pool)->ptr)
528#define poolLength(pool) ((pool)->ptr - (pool)->start)
529#define poolChop(pool) ((void)--(pool->ptr))
530#define poolLastChar(pool) (((pool)->ptr)[-1])
531#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
532#define poolFinish(pool) ((pool)->start = (pool)->ptr)
533#define poolAppendChar(pool, c) \
534 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
535 ? 0 \
536 : ((*((pool)->ptr)++ = c), 1))
537
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000538struct XML_ParserStruct {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700539 /* The first member must be m_userData so that the XML_GetUserData
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000540 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000541 void *m_userData;
542 void *m_handlerArg;
543 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000544 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000545 /* first character to be parsed */
546 const char *m_bufferPtr;
547 /* past last character to be parsed */
548 char *m_bufferEnd;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700549 /* allocated end of m_buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000550 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000551 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000552 const char *m_parseEndPtr;
553 XML_Char *m_dataBuf;
554 XML_Char *m_dataBufEnd;
555 XML_StartElementHandler m_startElementHandler;
556 XML_EndElementHandler m_endElementHandler;
557 XML_CharacterDataHandler m_characterDataHandler;
558 XML_ProcessingInstructionHandler m_processingInstructionHandler;
559 XML_CommentHandler m_commentHandler;
560 XML_StartCdataSectionHandler m_startCdataSectionHandler;
561 XML_EndCdataSectionHandler m_endCdataSectionHandler;
562 XML_DefaultHandler m_defaultHandler;
563 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
564 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
565 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
566 XML_NotationDeclHandler m_notationDeclHandler;
567 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
568 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
569 XML_NotStandaloneHandler m_notStandaloneHandler;
570 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000571 XML_Parser m_externalEntityRefHandlerArg;
572 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000573 XML_UnknownEncodingHandler m_unknownEncodingHandler;
574 XML_ElementDeclHandler m_elementDeclHandler;
575 XML_AttlistDeclHandler m_attlistDeclHandler;
576 XML_EntityDeclHandler m_entityDeclHandler;
577 XML_XmlDeclHandler m_xmlDeclHandler;
578 const ENCODING *m_encoding;
579 INIT_ENCODING m_initEncoding;
580 const ENCODING *m_internalEncoding;
581 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000582 XML_Bool m_ns;
583 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000584 void *m_unknownEncodingMem;
585 void *m_unknownEncodingData;
586 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000587 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000588 PROLOG_STATE m_prologState;
589 Processor *m_processor;
590 enum XML_Error m_errorCode;
591 const char *m_eventPtr;
592 const char *m_eventEndPtr;
593 const char *m_positionPtr;
594 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000595 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000596 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000597 int m_tagLevel;
598 ENTITY *m_declEntity;
599 const XML_Char *m_doctypeName;
600 const XML_Char *m_doctypeSysid;
601 const XML_Char *m_doctypePubid;
602 const XML_Char *m_declAttributeType;
603 const XML_Char *m_declNotationName;
604 const XML_Char *m_declNotationPublicId;
605 ELEMENT_TYPE *m_declElementType;
606 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000607 XML_Bool m_declAttributeIsCdata;
608 XML_Bool m_declAttributeIsId;
609 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000610 const XML_Char *m_curBase;
611 TAG *m_tagStack;
612 TAG *m_freeTagList;
613 BINDING *m_inheritedBindings;
614 BINDING *m_freeBindingList;
615 int m_attsSize;
616 int m_nSpecifiedAtts;
617 int m_idAttIndex;
618 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000619 NS_ATT *m_nsAtts;
620 unsigned long m_nsAttsVersion;
621 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700622#ifdef XML_ATTR_INFO
623 XML_AttrInfo *m_attInfo;
624#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000625 POSITION m_position;
626 STRING_POOL m_tempPool;
627 STRING_POOL m_temp2Pool;
628 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000629 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000630 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000631 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000632 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000633#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000634 XML_Bool m_isParamEntity;
635 XML_Bool m_useForeignDTD;
636 enum XML_ParamEntityParsing m_paramEntityParsing;
637#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700638 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000639};
640
Benjamin Peterson4e211002018-06-26 19:25:45 -0700641#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
642#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p),(s)))
643#define FREE(parser, p) (parser->m_mem.free_fcn((p)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000644
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000645
Fred Drake08317ae2003-10-21 15:38:55 +0000646XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000647XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000648{
649 return XML_ParserCreate_MM(encodingName, NULL, NULL);
650}
651
Fred Drake08317ae2003-10-21 15:38:55 +0000652XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000653XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000654{
655 XML_Char tmp[2];
656 *tmp = nsSep;
657 return XML_ParserCreate_MM(encodingName, NULL, tmp);
658}
659
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000660static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700661 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
662 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
663 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
664 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
665 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
666 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000667};
668
Victor Stinner5ff71322017-06-21 14:39:22 +0200669
Benjamin Peterson4e211002018-06-26 19:25:45 -0700670/* To avoid warnings about unused functions: */
671#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
672
Victor Stinner5ff71322017-06-21 14:39:22 +0200673#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200674
675/* Obtain entropy on Linux 3.17+ */
676static int
Victor Stinner93d0cb52017-08-18 23:43:54 +0200677writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200678 int success = 0; /* full count bytes written? */
679 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200680 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200681
682 do {
683 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
684 const size_t bytesToWrite = count - bytesWrittenTotal;
685
686 const int bytesWrittenMore =
687#if defined(HAVE_GETRANDOM)
688 getrandom(currentTarget, bytesToWrite, getrandomFlags);
689#else
690 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
691#endif
692
693 if (bytesWrittenMore > 0) {
694 bytesWrittenTotal += bytesWrittenMore;
695 if (bytesWrittenTotal >= count)
696 success = 1;
697 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200698 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200699
700 return success;
701}
702
703#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
704
705
Victor Stinner93d0cb52017-08-18 23:43:54 +0200706#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
707
708/* Extract entropy from /dev/urandom */
709static int
710writeRandomBytes_dev_urandom(void * target, size_t count) {
711 int success = 0; /* full count bytes written? */
712 size_t bytesWrittenTotal = 0;
713
714 const int fd = open("/dev/urandom", O_RDONLY);
715 if (fd < 0) {
716 return 0;
717 }
718
719 do {
720 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
721 const size_t bytesToWrite = count - bytesWrittenTotal;
722
723 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
724
725 if (bytesWrittenMore > 0) {
726 bytesWrittenTotal += bytesWrittenMore;
727 if (bytesWrittenTotal >= count)
728 success = 1;
729 }
730 } while (! success && (errno == EINTR));
731
732 close(fd);
733 return success;
734}
735
736#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
737
Benjamin Peterson4e211002018-06-26 19:25:45 -0700738#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
739
Victor Stinner93d0cb52017-08-18 23:43:54 +0200740
Benjamin Peterson3b03b092019-06-27 20:54:44 -0700741#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200742
743static void
744writeRandomBytes_arc4random(void * target, size_t count) {
745 size_t bytesWrittenTotal = 0;
746
747 while (bytesWrittenTotal < count) {
748 const uint32_t random32 = arc4random();
749 size_t i = 0;
750
751 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
752 i++, bytesWrittenTotal++) {
753 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
754 ((uint8_t *)target)[bytesWrittenTotal] = random8;
755 }
756 }
757}
758
Benjamin Peterson3b03b092019-06-27 20:54:44 -0700759#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200760
761
Victor Stinner5ff71322017-06-21 14:39:22 +0200762#ifdef _WIN32
763
764typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200765HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
Victor Stinner5ff71322017-06-21 14:39:22 +0200766
767/* Obtain entropy on Windows XP / Windows Server 2003 and later.
Victor Stinner93d0cb52017-08-18 23:43:54 +0200768 * Hint on RtlGenRandom and the following article from libsodium.
Victor Stinner5ff71322017-06-21 14:39:22 +0200769 *
770 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
771 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
772 */
773static int
774writeRandomBytes_RtlGenRandom(void * target, size_t count) {
775 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200776 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
Victor Stinner5ff71322017-06-21 14:39:22 +0200777
778 if (advapi32) {
779 const RTLGENRANDOM_FUNC RtlGenRandom
780 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
781 if (RtlGenRandom) {
782 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
783 success = 1;
784 }
785 }
786 FreeLibrary(advapi32);
787 }
788
789 return success;
790}
791
792#endif /* _WIN32 */
793
794
Victor Stinner93d0cb52017-08-18 23:43:54 +0200795#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
796
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700797static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200798gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000799{
Victor Stinner5ff71322017-06-21 14:39:22 +0200800#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200801 FILETIME ft;
802 GetSystemTimeAsFileTime(&ft); /* never fails */
803 return ft.dwHighDateTime ^ ft.dwLowDateTime;
804#else
805 struct timeval tv;
806 int gettimeofday_res;
807
808 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200809
810#if defined(NDEBUG)
811 (void)gettimeofday_res;
812#else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200813 assert (gettimeofday_res == 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200814#endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200815
816 /* Microseconds time is <20 bits entropy */
817 return tv.tv_usec;
818#endif
819}
820
Victor Stinner93d0cb52017-08-18 23:43:54 +0200821#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
822
Victor Stinner5ff71322017-06-21 14:39:22 +0200823
824static unsigned long
825ENTROPY_DEBUG(const char * label, unsigned long entropy) {
826 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
827 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
828 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
829 label,
830 (int)sizeof(entropy) * 2, entropy,
831 (unsigned long)sizeof(entropy));
832 }
833 return entropy;
834}
835
Victor Stinner23ec4b52017-06-15 00:54:36 +0200836static unsigned long
837generate_hash_secret_salt(XML_Parser parser)
838{
Victor Stinner5ff71322017-06-21 14:39:22 +0200839 unsigned long entropy;
840 (void)parser;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700841
842 /* "Failproof" high quality providers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200843#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200844 arc4random_buf(&entropy, sizeof(entropy));
845 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200846#elif defined(HAVE_ARC4RANDOM)
847 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
848 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200849#else
850 /* Try high quality providers first .. */
851#ifdef _WIN32
852 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
853 return ENTROPY_DEBUG("RtlGenRandom", entropy);
854 }
855#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200856 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200857 return ENTROPY_DEBUG("getrandom", entropy);
858 }
859#endif
Victor Stinner93d0cb52017-08-18 23:43:54 +0200860#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
861 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
862 return ENTROPY_DEBUG("/dev/urandom", entropy);
863 }
864#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200865 /* .. and self-made low quality for backup: */
866
867 /* Process ID is 0 bits entropy if attacker has local access */
868 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200869
870 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
871 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200872 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200873 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200874 return ENTROPY_DEBUG("fallback(8)",
Victor Stinner93d0cb52017-08-18 23:43:54 +0200875 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200876 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200877#endif
878}
879
880static unsigned long
881get_hash_secret_salt(XML_Parser parser) {
882 if (parser->m_parentParser != NULL)
883 return get_hash_secret_salt(parser->m_parentParser);
884 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700885}
886
887static XML_Bool /* only valid for root parser */
888startParsing(XML_Parser parser)
889{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700890 /* hash functions must be initialized before setContext() is called */
Benjamin Peterson4e211002018-06-26 19:25:45 -0700891 if (parser->m_hash_secret_salt == 0)
892 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
893 if (parser->m_ns) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700894 /* implicit context only set for root parser, since child
895 parsers (i.e. external entity parsers) will inherit it
896 */
897 return setContext(parser, implicitContext);
898 }
899 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700900}
901
902XML_Parser XMLCALL
903XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700904 const XML_Memory_Handling_Suite *memsuite,
905 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700906{
907 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000908}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000909
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000910static XML_Parser
911parserCreate(const XML_Char *encodingName,
912 const XML_Memory_Handling_Suite *memsuite,
913 const XML_Char *nameSep,
914 DTD *dtd)
915{
916 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000917
918 if (memsuite) {
919 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000920 parser = (XML_Parser)
921 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
922 if (parser != NULL) {
923 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
924 mtemp->malloc_fcn = memsuite->malloc_fcn;
925 mtemp->realloc_fcn = memsuite->realloc_fcn;
926 mtemp->free_fcn = memsuite->free_fcn;
927 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000928 }
929 else {
930 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000931 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
932 if (parser != NULL) {
933 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
934 mtemp->malloc_fcn = malloc;
935 mtemp->realloc_fcn = realloc;
936 mtemp->free_fcn = free;
937 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000938 }
939
940 if (!parser)
941 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000942
Benjamin Peterson4e211002018-06-26 19:25:45 -0700943 parser->m_buffer = NULL;
944 parser->m_bufferLim = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000945
Benjamin Peterson4e211002018-06-26 19:25:45 -0700946 parser->m_attsSize = INIT_ATTS_SIZE;
947 parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
948 if (parser->m_atts == NULL) {
949 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000950 return NULL;
951 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700952#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700953 parser->m_attInfo = (XML_AttrInfo*)MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo));
954 if (parser->m_attInfo == NULL) {
955 FREE(parser, parser->m_atts);
956 FREE(parser, parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700957 return NULL;
958 }
959#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700960 parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
961 if (parser->m_dataBuf == NULL) {
962 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700963#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700964 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700965#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700966 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000967 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000968 }
Benjamin Peterson4e211002018-06-26 19:25:45 -0700969 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000970
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000971 if (dtd)
Benjamin Peterson4e211002018-06-26 19:25:45 -0700972 parser->m_dtd = dtd;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000973 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700974 parser->m_dtd = dtdCreate(&parser->m_mem);
975 if (parser->m_dtd == NULL) {
976 FREE(parser, parser->m_dataBuf);
977 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700978#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700979 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700980#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700981 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000982 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000983 }
984 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000985
Benjamin Peterson4e211002018-06-26 19:25:45 -0700986 parser->m_freeBindingList = NULL;
987 parser->m_freeTagList = NULL;
988 parser->m_freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000989
Benjamin Peterson4e211002018-06-26 19:25:45 -0700990 parser->m_groupSize = 0;
991 parser->m_groupConnector = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000992
Benjamin Peterson4e211002018-06-26 19:25:45 -0700993 parser->m_unknownEncodingHandler = NULL;
994 parser->m_unknownEncodingHandlerData = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000995
Benjamin Peterson4e211002018-06-26 19:25:45 -0700996 parser->m_namespaceSeparator = ASCII_EXCL;
997 parser->m_ns = XML_FALSE;
998 parser->m_ns_triplets = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000999
Benjamin Peterson4e211002018-06-26 19:25:45 -07001000 parser->m_nsAtts = NULL;
1001 parser->m_nsAttsVersion = 0;
1002 parser->m_nsAttsPower = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00001003
Benjamin Peterson4e211002018-06-26 19:25:45 -07001004 parser->m_protocolEncodingName = NULL;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001005
Benjamin Peterson4e211002018-06-26 19:25:45 -07001006 poolInit(&parser->m_tempPool, &(parser->m_mem));
1007 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001008 parserInit(parser, encodingName);
1009
Benjamin Peterson4e211002018-06-26 19:25:45 -07001010 if (encodingName && !parser->m_protocolEncodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001011 XML_ParserFree(parser);
1012 return NULL;
1013 }
1014
1015 if (nameSep) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001016 parser->m_ns = XML_TRUE;
1017 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1018 parser->m_namespaceSeparator = *nameSep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001019 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001020 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001021 parser->m_internalEncoding = XmlGetInternalEncoding();
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001022 }
1023
1024 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001025}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001026
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001027static void
1028parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001029{
Benjamin Peterson4e211002018-06-26 19:25:45 -07001030 parser->m_processor = prologInitProcessor;
1031 XmlPrologStateInit(&parser->m_prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001032 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001033 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Victor Stinner93d0cb52017-08-18 23:43:54 +02001034 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001035 parser->m_curBase = NULL;
1036 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1037 parser->m_userData = NULL;
1038 parser->m_handlerArg = NULL;
1039 parser->m_startElementHandler = NULL;
1040 parser->m_endElementHandler = NULL;
1041 parser->m_characterDataHandler = NULL;
1042 parser->m_processingInstructionHandler = NULL;
1043 parser->m_commentHandler = NULL;
1044 parser->m_startCdataSectionHandler = NULL;
1045 parser->m_endCdataSectionHandler = NULL;
1046 parser->m_defaultHandler = NULL;
1047 parser->m_startDoctypeDeclHandler = NULL;
1048 parser->m_endDoctypeDeclHandler = NULL;
1049 parser->m_unparsedEntityDeclHandler = NULL;
1050 parser->m_notationDeclHandler = NULL;
1051 parser->m_startNamespaceDeclHandler = NULL;
1052 parser->m_endNamespaceDeclHandler = NULL;
1053 parser->m_notStandaloneHandler = NULL;
1054 parser->m_externalEntityRefHandler = NULL;
1055 parser->m_externalEntityRefHandlerArg = parser;
1056 parser->m_skippedEntityHandler = NULL;
1057 parser->m_elementDeclHandler = NULL;
1058 parser->m_attlistDeclHandler = NULL;
1059 parser->m_entityDeclHandler = NULL;
1060 parser->m_xmlDeclHandler = NULL;
1061 parser->m_bufferPtr = parser->m_buffer;
1062 parser->m_bufferEnd = parser->m_buffer;
1063 parser->m_parseEndByteIndex = 0;
1064 parser->m_parseEndPtr = NULL;
1065 parser->m_declElementType = NULL;
1066 parser->m_declAttributeId = NULL;
1067 parser->m_declEntity = NULL;
1068 parser->m_doctypeName = NULL;
1069 parser->m_doctypeSysid = NULL;
1070 parser->m_doctypePubid = NULL;
1071 parser->m_declAttributeType = NULL;
1072 parser->m_declNotationName = NULL;
1073 parser->m_declNotationPublicId = NULL;
1074 parser->m_declAttributeIsCdata = XML_FALSE;
1075 parser->m_declAttributeIsId = XML_FALSE;
1076 memset(&parser->m_position, 0, sizeof(POSITION));
1077 parser->m_errorCode = XML_ERROR_NONE;
1078 parser->m_eventPtr = NULL;
1079 parser->m_eventEndPtr = NULL;
1080 parser->m_positionPtr = NULL;
1081 parser->m_openInternalEntities = NULL;
1082 parser->m_defaultExpandInternalEntities = XML_TRUE;
1083 parser->m_tagLevel = 0;
1084 parser->m_tagStack = NULL;
1085 parser->m_inheritedBindings = NULL;
1086 parser->m_nSpecifiedAtts = 0;
1087 parser->m_unknownEncodingMem = NULL;
1088 parser->m_unknownEncodingRelease = NULL;
1089 parser->m_unknownEncodingData = NULL;
1090 parser->m_parentParser = NULL;
1091 parser->m_parsingStatus.parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001092#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001093 parser->m_isParamEntity = XML_FALSE;
1094 parser->m_useForeignDTD = XML_FALSE;
1095 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001096#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001097 parser->m_hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001098}
1099
Benjamin Peterson4e211002018-06-26 19:25:45 -07001100/* moves list of bindings to m_freeBindingList */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001101static void FASTCALL
1102moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1103{
1104 while (bindings) {
1105 BINDING *b = bindings;
1106 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001107 b->nextTagBinding = parser->m_freeBindingList;
1108 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001109 }
1110}
1111
Fred Drake08317ae2003-10-21 15:38:55 +00001112XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001113XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1114{
1115 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001116 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001117
1118 if (parser == NULL)
1119 return XML_FALSE;
1120
Benjamin Peterson4e211002018-06-26 19:25:45 -07001121 if (parser->m_parentParser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001122 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001123 /* move m_tagStack to m_freeTagList */
1124 tStk = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001125 while (tStk) {
1126 TAG *tag = tStk;
1127 tStk = tStk->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001128 tag->parent = parser->m_freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001129 moveToFreeBindingList(parser, tag->bindings);
1130 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001131 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001132 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001133 /* move m_openInternalEntities to m_freeInternalEntities */
1134 openEntityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001135 while (openEntityList) {
1136 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1137 openEntityList = openEntity->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001138 openEntity->next = parser->m_freeInternalEntities;
1139 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00001140 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001141 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1142 FREE(parser, parser->m_unknownEncodingMem);
1143 if (parser->m_unknownEncodingRelease)
1144 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1145 poolClear(&parser->m_tempPool);
1146 poolClear(&parser->m_temp2Pool);
1147 FREE(parser, (void *)parser->m_protocolEncodingName);
1148 parser->m_protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001149 parserInit(parser, encodingName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001150 dtdReset(parser->m_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001151 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001152}
1153
Fred Drake08317ae2003-10-21 15:38:55 +00001154enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001155XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1156{
Victor Stinner5ff71322017-06-21 14:39:22 +02001157 if (parser == NULL)
1158 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001159 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1160 XXX There's no way for the caller to determine which of the
1161 XXX possible error cases caused the XML_STATUS_ERROR return.
1162 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001163 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001164 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001165
1166 /* Get rid of any previous encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001167 FREE(parser, (void *)parser->m_protocolEncodingName);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001168
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001169 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001170 /* No new encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001171 parser->m_protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001172 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001173 /* Copy the new encoding name into allocated memory */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001174 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1175 if (!parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001176 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001177 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001178 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001179}
1180
Fred Drake08317ae2003-10-21 15:38:55 +00001181XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001182XML_ExternalEntityParserCreate(XML_Parser oldParser,
1183 const XML_Char *context,
1184 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001185{
1186 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001187 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001188 DTD *oldDtd;
1189 XML_StartElementHandler oldStartElementHandler;
1190 XML_EndElementHandler oldEndElementHandler;
1191 XML_CharacterDataHandler oldCharacterDataHandler;
1192 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1193 XML_CommentHandler oldCommentHandler;
1194 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1195 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1196 XML_DefaultHandler oldDefaultHandler;
1197 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1198 XML_NotationDeclHandler oldNotationDeclHandler;
1199 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1200 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1201 XML_NotStandaloneHandler oldNotStandaloneHandler;
1202 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1203 XML_SkippedEntityHandler oldSkippedEntityHandler;
1204 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1205 XML_ElementDeclHandler oldElementDeclHandler;
1206 XML_AttlistDeclHandler oldAttlistDeclHandler;
1207 XML_EntityDeclHandler oldEntityDeclHandler;
1208 XML_XmlDeclHandler oldXmlDeclHandler;
1209 ELEMENT_TYPE * oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001210
Victor Stinner5ff71322017-06-21 14:39:22 +02001211 void *oldUserData;
1212 void *oldHandlerArg;
1213 XML_Bool oldDefaultExpandInternalEntities;
1214 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001215#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001216 enum XML_ParamEntityParsing oldParamEntityParsing;
1217 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001218#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001219 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001220 /* Note that the new parser shares the same hash secret as the old
1221 parser, so that dtdCopy and copyEntityTable can lookup values
1222 from hash tables associated with either parser without us having
1223 to worry which hash secrets each table has.
1224 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001225 unsigned long oldhash_secret_salt;
1226
1227 /* Validate the oldParser parameter before we pull everything out of it */
1228 if (oldParser == NULL)
1229 return NULL;
1230
1231 /* Stash the original parser contents on the stack */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001232 oldDtd = parser->m_dtd;
1233 oldStartElementHandler = parser->m_startElementHandler;
1234 oldEndElementHandler = parser->m_endElementHandler;
1235 oldCharacterDataHandler = parser->m_characterDataHandler;
1236 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1237 oldCommentHandler = parser->m_commentHandler;
1238 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1239 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1240 oldDefaultHandler = parser->m_defaultHandler;
1241 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1242 oldNotationDeclHandler = parser->m_notationDeclHandler;
1243 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1244 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1245 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1246 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1247 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1248 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1249 oldElementDeclHandler = parser->m_elementDeclHandler;
1250 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1251 oldEntityDeclHandler = parser->m_entityDeclHandler;
1252 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1253 oldDeclElementType = parser->m_declElementType;
Victor Stinner5ff71322017-06-21 14:39:22 +02001254
Benjamin Peterson4e211002018-06-26 19:25:45 -07001255 oldUserData = parser->m_userData;
1256 oldHandlerArg = parser->m_handlerArg;
1257 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1258 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
Victor Stinner5ff71322017-06-21 14:39:22 +02001259#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001260 oldParamEntityParsing = parser->m_paramEntityParsing;
1261 oldInEntityValue = parser->m_prologState.inEntityValue;
Victor Stinner5ff71322017-06-21 14:39:22 +02001262#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001263 oldns_triplets = parser->m_ns_triplets;
Victor Stinner5ff71322017-06-21 14:39:22 +02001264 /* Note that the new parser shares the same hash secret as the old
1265 parser, so that dtdCopy and copyEntityTable can lookup values
1266 from hash tables associated with either parser without us having
1267 to worry which hash secrets each table has.
1268 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001269 oldhash_secret_salt = parser->m_hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001270
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001271#ifdef XML_DTD
1272 if (!context)
1273 newDtd = oldDtd;
1274#endif /* XML_DTD */
1275
1276 /* Note that the magical uses of the pre-processor to make field
1277 access look more like C++ require that `parser' be overwritten
1278 here. This makes this function more painful to follow than it
1279 would be otherwise.
1280 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001281 if (parser->m_ns) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001282 XML_Char tmp[2];
Benjamin Peterson4e211002018-06-26 19:25:45 -07001283 *tmp = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001284 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001285 }
1286 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001287 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001288 }
1289
1290 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001291 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001292
Benjamin Peterson4e211002018-06-26 19:25:45 -07001293 parser->m_startElementHandler = oldStartElementHandler;
1294 parser->m_endElementHandler = oldEndElementHandler;
1295 parser->m_characterDataHandler = oldCharacterDataHandler;
1296 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1297 parser->m_commentHandler = oldCommentHandler;
1298 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1299 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1300 parser->m_defaultHandler = oldDefaultHandler;
1301 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1302 parser->m_notationDeclHandler = oldNotationDeclHandler;
1303 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1304 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1305 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1306 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1307 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1308 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1309 parser->m_elementDeclHandler = oldElementDeclHandler;
1310 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1311 parser->m_entityDeclHandler = oldEntityDeclHandler;
1312 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1313 parser->m_declElementType = oldDeclElementType;
1314 parser->m_userData = oldUserData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001315 if (oldUserData == oldHandlerArg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001316 parser->m_handlerArg = parser->m_userData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001317 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001318 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001319 if (oldExternalEntityRefHandlerArg != oldParser)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001320 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1321 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1322 parser->m_ns_triplets = oldns_triplets;
1323 parser->m_hash_secret_salt = oldhash_secret_salt;
1324 parser->m_parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001325#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001326 parser->m_paramEntityParsing = oldParamEntityParsing;
1327 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001328 if (context) {
1329#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001330 if (!dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001331 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001332 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001333 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001334 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001335 parser->m_processor = externalEntityInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001336#ifdef XML_DTD
1337 }
1338 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001339 /* The DTD instance referenced by parser->m_dtd is shared between the document's
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001340 root parser and external PE parsers, therefore one does not need to
1341 call setContext. In addition, one also *must* not call setContext,
1342 because this would overwrite existing prefix->binding pointers in
Benjamin Peterson4e211002018-06-26 19:25:45 -07001343 parser->m_dtd with ones that get destroyed with the external PE parser.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001344 This would leave those prefixes with dangling pointers.
1345 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001346 parser->m_isParamEntity = XML_TRUE;
1347 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1348 parser->m_processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001349 }
1350#endif /* XML_DTD */
1351 return parser;
1352}
1353
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001354static void FASTCALL
1355destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001356{
1357 for (;;) {
1358 BINDING *b = bindings;
1359 if (!b)
1360 break;
1361 bindings = b->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001362 FREE(parser, b->uri);
1363 FREE(parser, b);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001364 }
1365}
1366
Fred Drake08317ae2003-10-21 15:38:55 +00001367void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001368XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001369{
Fred Drake31d485c2004-08-03 07:06:22 +00001370 TAG *tagList;
1371 OPEN_INTERNAL_ENTITY *entityList;
1372 if (parser == NULL)
1373 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001374 /* free m_tagStack and m_freeTagList */
1375 tagList = parser->m_tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001376 for (;;) {
1377 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001378 if (tagList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001379 if (parser->m_freeTagList == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001380 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001381 tagList = parser->m_freeTagList;
1382 parser->m_freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001383 }
Fred Drake31d485c2004-08-03 07:06:22 +00001384 p = tagList;
1385 tagList = tagList->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001386 FREE(parser, p->buf);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001387 destroyBindings(p->bindings, parser);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001388 FREE(parser, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001389 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001390 /* free m_openInternalEntities and m_freeInternalEntities */
1391 entityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001392 for (;;) {
1393 OPEN_INTERNAL_ENTITY *openEntity;
1394 if (entityList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001395 if (parser->m_freeInternalEntities == NULL)
Fred Drake31d485c2004-08-03 07:06:22 +00001396 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001397 entityList = parser->m_freeInternalEntities;
1398 parser->m_freeInternalEntities = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001399 }
1400 openEntity = entityList;
1401 entityList = entityList->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001402 FREE(parser, openEntity);
Fred Drake31d485c2004-08-03 07:06:22 +00001403 }
1404
Benjamin Peterson4e211002018-06-26 19:25:45 -07001405 destroyBindings(parser->m_freeBindingList, parser);
1406 destroyBindings(parser->m_inheritedBindings, parser);
1407 poolDestroy(&parser->m_tempPool);
1408 poolDestroy(&parser->m_temp2Pool);
1409 FREE(parser, (void *)parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001410#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001411 /* external parameter entity parsers share the DTD structure
1412 parser->m_dtd with the root parser, so we must not destroy it
1413 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001414 if (!parser->m_isParamEntity && parser->m_dtd)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001415#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001416 if (parser->m_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001417#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001418 dtdDestroy(parser->m_dtd, (XML_Bool)!parser->m_parentParser, &parser->m_mem);
1419 FREE(parser, (void *)parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001420#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001421 FREE(parser, (void *)parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001422#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001423 FREE(parser, parser->m_groupConnector);
1424 FREE(parser, parser->m_buffer);
1425 FREE(parser, parser->m_dataBuf);
1426 FREE(parser, parser->m_nsAtts);
1427 FREE(parser, parser->m_unknownEncodingMem);
1428 if (parser->m_unknownEncodingRelease)
1429 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1430 FREE(parser, parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001431}
1432
Fred Drake08317ae2003-10-21 15:38:55 +00001433void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001434XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001435{
Victor Stinner5ff71322017-06-21 14:39:22 +02001436 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001437 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001438}
1439
Fred Drake08317ae2003-10-21 15:38:55 +00001440enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001441XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1442{
Victor Stinner5ff71322017-06-21 14:39:22 +02001443 if (parser == NULL)
1444 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001445#ifdef XML_DTD
1446 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001447 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001448 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001449 parser->m_useForeignDTD = useDTD;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001450 return XML_ERROR_NONE;
1451#else
1452 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1453#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001454}
1455
Fred Drake08317ae2003-10-21 15:38:55 +00001456void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001457XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1458{
Victor Stinner5ff71322017-06-21 14:39:22 +02001459 if (parser == NULL)
1460 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001461 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001462 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001463 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001464 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001465}
1466
Fred Drake08317ae2003-10-21 15:38:55 +00001467void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001468XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001469{
Victor Stinner5ff71322017-06-21 14:39:22 +02001470 if (parser == NULL)
1471 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001472 if (parser->m_handlerArg == parser->m_userData)
1473 parser->m_handlerArg = parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001474 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001475 parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001476}
1477
Fred Drake08317ae2003-10-21 15:38:55 +00001478enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001479XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001480{
Victor Stinner5ff71322017-06-21 14:39:22 +02001481 if (parser == NULL)
1482 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001483 if (p) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001484 p = poolCopyString(&parser->m_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001485 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001486 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001487 parser->m_curBase = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001488 }
1489 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001490 parser->m_curBase = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001491 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001492}
1493
Fred Drake08317ae2003-10-21 15:38:55 +00001494const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001495XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001496{
Victor Stinner5ff71322017-06-21 14:39:22 +02001497 if (parser == NULL)
1498 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001499 return parser->m_curBase;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001500}
1501
Fred Drake08317ae2003-10-21 15:38:55 +00001502int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001503XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001504{
Victor Stinner5ff71322017-06-21 14:39:22 +02001505 if (parser == NULL)
1506 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001507 return parser->m_nSpecifiedAtts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001508}
1509
Fred Drake08317ae2003-10-21 15:38:55 +00001510int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001511XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001512{
Victor Stinner5ff71322017-06-21 14:39:22 +02001513 if (parser == NULL)
1514 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001515 return parser->m_idAttIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001516}
1517
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001518#ifdef XML_ATTR_INFO
1519const XML_AttrInfo * XMLCALL
1520XML_GetAttributeInfo(XML_Parser parser)
1521{
Victor Stinner5ff71322017-06-21 14:39:22 +02001522 if (parser == NULL)
1523 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001524 return parser->m_attInfo;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001525}
1526#endif
1527
Fred Drake08317ae2003-10-21 15:38:55 +00001528void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001529XML_SetElementHandler(XML_Parser parser,
1530 XML_StartElementHandler start,
1531 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001532{
Victor Stinner5ff71322017-06-21 14:39:22 +02001533 if (parser == NULL)
1534 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001535 parser->m_startElementHandler = start;
1536 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001537}
1538
Fred Drake08317ae2003-10-21 15:38:55 +00001539void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001540XML_SetStartElementHandler(XML_Parser parser,
1541 XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001542 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001543 parser->m_startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001544}
1545
Fred Drake08317ae2003-10-21 15:38:55 +00001546void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001547XML_SetEndElementHandler(XML_Parser parser,
1548 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001549 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001550 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001551}
1552
Fred Drake08317ae2003-10-21 15:38:55 +00001553void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001554XML_SetCharacterDataHandler(XML_Parser parser,
1555 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001556{
Victor Stinner5ff71322017-06-21 14:39:22 +02001557 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001558 parser->m_characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001559}
1560
Fred Drake08317ae2003-10-21 15:38:55 +00001561void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001562XML_SetProcessingInstructionHandler(XML_Parser parser,
1563 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001564{
Victor Stinner5ff71322017-06-21 14:39:22 +02001565 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001566 parser->m_processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001567}
1568
Fred Drake08317ae2003-10-21 15:38:55 +00001569void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001570XML_SetCommentHandler(XML_Parser parser,
1571 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001572{
Victor Stinner5ff71322017-06-21 14:39:22 +02001573 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001574 parser->m_commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001575}
1576
Fred Drake08317ae2003-10-21 15:38:55 +00001577void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001578XML_SetCdataSectionHandler(XML_Parser parser,
1579 XML_StartCdataSectionHandler start,
1580 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001581{
Victor Stinner5ff71322017-06-21 14:39:22 +02001582 if (parser == NULL)
1583 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001584 parser->m_startCdataSectionHandler = start;
1585 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001586}
1587
Fred Drake08317ae2003-10-21 15:38:55 +00001588void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001589XML_SetStartCdataSectionHandler(XML_Parser parser,
1590 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001591 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001592 parser->m_startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001593}
1594
Fred Drake08317ae2003-10-21 15:38:55 +00001595void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001596XML_SetEndCdataSectionHandler(XML_Parser parser,
1597 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001598 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001599 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001600}
1601
Fred Drake08317ae2003-10-21 15:38:55 +00001602void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001603XML_SetDefaultHandler(XML_Parser parser,
1604 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001605{
Victor Stinner5ff71322017-06-21 14:39:22 +02001606 if (parser == NULL)
1607 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001608 parser->m_defaultHandler = handler;
1609 parser->m_defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001610}
1611
Fred Drake08317ae2003-10-21 15:38:55 +00001612void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001613XML_SetDefaultHandlerExpand(XML_Parser parser,
1614 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001615{
Victor Stinner5ff71322017-06-21 14:39:22 +02001616 if (parser == NULL)
1617 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001618 parser->m_defaultHandler = handler;
1619 parser->m_defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001620}
1621
Fred Drake08317ae2003-10-21 15:38:55 +00001622void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001623XML_SetDoctypeDeclHandler(XML_Parser parser,
1624 XML_StartDoctypeDeclHandler start,
1625 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001626{
Victor Stinner5ff71322017-06-21 14:39:22 +02001627 if (parser == NULL)
1628 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001629 parser->m_startDoctypeDeclHandler = start;
1630 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001631}
1632
Fred Drake08317ae2003-10-21 15:38:55 +00001633void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001634XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1635 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001636 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001637 parser->m_startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001638}
1639
Fred Drake08317ae2003-10-21 15:38:55 +00001640void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001641XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1642 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001643 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001644 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001645}
1646
Fred Drake08317ae2003-10-21 15:38:55 +00001647void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001648XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1649 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001650{
Victor Stinner5ff71322017-06-21 14:39:22 +02001651 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001652 parser->m_unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001653}
1654
Fred Drake08317ae2003-10-21 15:38:55 +00001655void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001656XML_SetNotationDeclHandler(XML_Parser parser,
1657 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001658{
Victor Stinner5ff71322017-06-21 14:39:22 +02001659 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001660 parser->m_notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001661}
1662
Fred Drake08317ae2003-10-21 15:38:55 +00001663void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001664XML_SetNamespaceDeclHandler(XML_Parser parser,
1665 XML_StartNamespaceDeclHandler start,
1666 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001667{
Victor Stinner5ff71322017-06-21 14:39:22 +02001668 if (parser == NULL)
1669 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001670 parser->m_startNamespaceDeclHandler = start;
1671 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001672}
1673
Fred Drake08317ae2003-10-21 15:38:55 +00001674void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001675XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1676 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001677 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001678 parser->m_startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001679}
1680
Fred Drake08317ae2003-10-21 15:38:55 +00001681void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001682XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1683 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001684 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001685 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001686}
1687
Fred Drake08317ae2003-10-21 15:38:55 +00001688void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001689XML_SetNotStandaloneHandler(XML_Parser parser,
1690 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001691{
Victor Stinner5ff71322017-06-21 14:39:22 +02001692 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001693 parser->m_notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001694}
1695
Fred Drake08317ae2003-10-21 15:38:55 +00001696void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001697XML_SetExternalEntityRefHandler(XML_Parser parser,
1698 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001699{
Victor Stinner5ff71322017-06-21 14:39:22 +02001700 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001701 parser->m_externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001702}
1703
Fred Drake08317ae2003-10-21 15:38:55 +00001704void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001705XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001706{
Victor Stinner5ff71322017-06-21 14:39:22 +02001707 if (parser == NULL)
1708 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001709 if (arg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001710 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001711 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001712 parser->m_externalEntityRefHandlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001713}
1714
Fred Drake08317ae2003-10-21 15:38:55 +00001715void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001716XML_SetSkippedEntityHandler(XML_Parser parser,
1717 XML_SkippedEntityHandler handler)
1718{
Victor Stinner5ff71322017-06-21 14:39:22 +02001719 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001720 parser->m_skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001721}
1722
Fred Drake08317ae2003-10-21 15:38:55 +00001723void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001724XML_SetUnknownEncodingHandler(XML_Parser parser,
1725 XML_UnknownEncodingHandler handler,
1726 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001727{
Victor Stinner5ff71322017-06-21 14:39:22 +02001728 if (parser == NULL)
1729 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001730 parser->m_unknownEncodingHandler = handler;
1731 parser->m_unknownEncodingHandlerData = data;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001732}
1733
Fred Drake08317ae2003-10-21 15:38:55 +00001734void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001735XML_SetElementDeclHandler(XML_Parser parser,
1736 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001737{
Victor Stinner5ff71322017-06-21 14:39:22 +02001738 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001739 parser->m_elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001740}
1741
Fred Drake08317ae2003-10-21 15:38:55 +00001742void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001743XML_SetAttlistDeclHandler(XML_Parser parser,
1744 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001745{
Victor Stinner5ff71322017-06-21 14:39:22 +02001746 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001747 parser->m_attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001748}
1749
Fred Drake08317ae2003-10-21 15:38:55 +00001750void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001751XML_SetEntityDeclHandler(XML_Parser parser,
1752 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001753{
Victor Stinner5ff71322017-06-21 14:39:22 +02001754 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001755 parser->m_entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001756}
1757
Fred Drake08317ae2003-10-21 15:38:55 +00001758void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001759XML_SetXmlDeclHandler(XML_Parser parser,
1760 XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001761 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001762 parser->m_xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001763}
1764
Fred Drake08317ae2003-10-21 15:38:55 +00001765int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001766XML_SetParamEntityParsing(XML_Parser parser,
1767 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001768{
Victor Stinner5ff71322017-06-21 14:39:22 +02001769 if (parser == NULL)
1770 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001771 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001772 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001773 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001774#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001775 parser->m_paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001776 return 1;
1777#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001778 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001779#endif
1780}
1781
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001782int XMLCALL
1783XML_SetHashSalt(XML_Parser parser,
1784 unsigned long hash_salt)
1785{
Victor Stinner5ff71322017-06-21 14:39:22 +02001786 if (parser == NULL)
1787 return 0;
1788 if (parser->m_parentParser)
1789 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001790 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001791 if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001792 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001793 parser->m_hash_secret_salt = hash_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001794 return 1;
1795}
1796
Fred Drake08317ae2003-10-21 15:38:55 +00001797enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001798XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001799{
Victor Stinner5ff71322017-06-21 14:39:22 +02001800 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001801 if (parser != NULL)
1802 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001803 return XML_STATUS_ERROR;
1804 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001805 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001806 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001807 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001808 return XML_STATUS_ERROR;
1809 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001810 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001811 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001812 case XML_INITIALIZED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001813 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1814 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001815 return XML_STATUS_ERROR;
1816 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001817 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001818 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001819 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001820 }
1821
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001822 if (len == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001823 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001824 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001825 return XML_STATUS_OK;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001826 parser->m_positionPtr = parser->m_bufferPtr;
1827 parser->m_parseEndPtr = parser->m_bufferEnd;
Fred Drake31d485c2004-08-03 07:06:22 +00001828
1829 /* If data are left over from last buffer, and we now know that these
1830 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001831 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001832 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001833 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001834
Benjamin Peterson4e211002018-06-26 19:25:45 -07001835 if (parser->m_errorCode == XML_ERROR_NONE) {
1836 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001837 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001838 /* It is hard to be certain, but it seems that this case
1839 * cannot occur. This code is cleaning up a previous parse
1840 * with no new data (since len == 0). Changing the parsing
1841 * state requires getting to execute a handler function, and
1842 * there doesn't seem to be an opportunity for that while in
1843 * this circumstance.
1844 *
1845 * Given the uncertainty, we retain the code but exclude it
1846 * from coverage tests.
1847 *
1848 * LCOV_EXCL_START
1849 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001850 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
1851 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001852 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001853 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001854 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001855 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001856 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001857 /* fall through */
1858 default:
1859 return XML_STATUS_OK;
1860 }
1861 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001862 parser->m_eventEndPtr = parser->m_eventPtr;
1863 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001864 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001865 }
1866#ifndef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07001867 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001868 const char *end;
1869 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001870 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001871 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001872 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
1873 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1874 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1875 parser->m_processor = errorProcessor;
Victor Stinner5ff71322017-06-21 14:39:22 +02001876 return XML_STATUS_ERROR;
1877 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001878 parser->m_parseEndByteIndex += len;
1879 parser->m_positionPtr = s;
1880 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001881
Benjamin Peterson4e211002018-06-26 19:25:45 -07001882 parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001883
Benjamin Peterson4e211002018-06-26 19:25:45 -07001884 if (parser->m_errorCode != XML_ERROR_NONE) {
1885 parser->m_eventEndPtr = parser->m_eventPtr;
1886 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001887 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001888 }
Fred Drake31d485c2004-08-03 07:06:22 +00001889 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001890 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001891 case XML_SUSPENDED:
1892 result = XML_STATUS_SUSPENDED;
1893 break;
1894 case XML_INITIALIZED:
1895 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001896 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001897 parser->m_parsingStatus.parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001898 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001899 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001900 /* fall through */
1901 default:
1902 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001903 }
1904 }
1905
Benjamin Peterson4e211002018-06-26 19:25:45 -07001906 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001907 nLeftOver = s + len - end;
1908 if (nLeftOver) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001909 if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001910 /* avoid _signed_ integer overflow */
1911 char *temp = NULL;
1912 const int bytesToAllocate = (int)((unsigned)len * 2U);
1913 if (bytesToAllocate > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001914 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
Victor Stinner5ff71322017-06-21 14:39:22 +02001915 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001916 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001917 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1918 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1919 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001920 return XML_STATUS_ERROR;
1921 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001922 parser->m_buffer = temp;
1923 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001924 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001925 memcpy(parser->m_buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001926 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001927 parser->m_bufferPtr = parser->m_buffer;
1928 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1929 parser->m_positionPtr = parser->m_bufferPtr;
1930 parser->m_parseEndPtr = parser->m_bufferEnd;
1931 parser->m_eventPtr = parser->m_bufferPtr;
1932 parser->m_eventEndPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001933 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001934 }
1935#endif /* not defined XML_CONTEXT_BYTES */
1936 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001937 void *buff = XML_GetBuffer(parser, len);
1938 if (buff == NULL)
1939 return XML_STATUS_ERROR;
1940 else {
1941 memcpy(buff, s, len);
1942 return XML_ParseBuffer(parser, len, isFinal);
1943 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001944 }
1945}
1946
Fred Drake08317ae2003-10-21 15:38:55 +00001947enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001948XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001949{
Fred Drake31d485c2004-08-03 07:06:22 +00001950 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001951 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001952
Victor Stinner5ff71322017-06-21 14:39:22 +02001953 if (parser == NULL)
1954 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001955 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001956 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001957 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001958 return XML_STATUS_ERROR;
1959 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001960 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001961 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001962 case XML_INITIALIZED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001963 if (parser->m_parentParser == NULL && !startParsing(parser)) {
1964 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001965 return XML_STATUS_ERROR;
1966 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001967 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001968 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001969 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001970 }
1971
Benjamin Peterson4e211002018-06-26 19:25:45 -07001972 start = parser->m_bufferPtr;
1973 parser->m_positionPtr = start;
1974 parser->m_bufferEnd += len;
1975 parser->m_parseEndPtr = parser->m_bufferEnd;
1976 parser->m_parseEndByteIndex += len;
1977 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001978
Benjamin Peterson4e211002018-06-26 19:25:45 -07001979 parser->m_errorCode = parser->m_processor(parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001980
Benjamin Peterson4e211002018-06-26 19:25:45 -07001981 if (parser->m_errorCode != XML_ERROR_NONE) {
1982 parser->m_eventEndPtr = parser->m_eventPtr;
1983 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001984 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001985 }
Fred Drake31d485c2004-08-03 07:06:22 +00001986 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001987 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001988 case XML_SUSPENDED:
1989 result = XML_STATUS_SUSPENDED;
1990 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001991 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001992 case XML_PARSING:
1993 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001994 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001995 return result;
1996 }
1997 default: ; /* should not happen */
1998 }
1999 }
2000
Benjamin Peterson4e211002018-06-26 19:25:45 -07002001 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2002 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002003 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002004}
2005
Fred Drake08317ae2003-10-21 15:38:55 +00002006void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002007XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002008{
Victor Stinner5ff71322017-06-21 14:39:22 +02002009 if (parser == NULL)
2010 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002011 if (len < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002012 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002013 return NULL;
2014 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002015 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002016 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002017 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002018 return NULL;
2019 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002020 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002021 return NULL;
2022 default: ;
2023 }
2024
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002025 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002026#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002027 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002028#endif /* defined XML_CONTEXT_BYTES */
2029 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002030 int neededSize = (int) ((unsigned)len +
2031 (unsigned)EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd,
2032 parser->m_bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002033 if (neededSize < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002034 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002035 return NULL;
2036 }
2037#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002038 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002039 if (keep > XML_CONTEXT_BYTES)
2040 keep = XML_CONTEXT_BYTES;
2041 neededSize += keep;
2042#endif /* defined XML_CONTEXT_BYTES */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002043 if (neededSize <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002044#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002045 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2046 int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) - keep;
2047 /* The buffer pointers cannot be NULL here; we have at least some bytes in the buffer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002048 memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep);
2049 parser->m_bufferEnd -= offset;
2050 parser->m_bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002051 }
2052#else
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002053 if (parser->m_buffer && parser->m_bufferPtr) {
2054 memmove(parser->m_buffer, parser->m_bufferPtr,
2055 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2056 parser->m_bufferEnd = parser->m_buffer +
2057 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2058 parser->m_bufferPtr = parser->m_buffer;
2059 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002060#endif /* not defined XML_CONTEXT_BYTES */
2061 }
2062 else {
2063 char *newBuf;
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002064 int bufferSize = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002065 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002066 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002067 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02002068 /* Do not invoke signed arithmetic overflow: */
2069 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002070 } while (bufferSize < neededSize && bufferSize > 0);
2071 if (bufferSize <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002072 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002073 return NULL;
2074 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002075 newBuf = (char *)MALLOC(parser, bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002076 if (newBuf == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002077 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002078 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002079 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002080 parser->m_bufferLim = newBuf + bufferSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002081#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002082 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002083 int keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002084 if (keep > XML_CONTEXT_BYTES)
2085 keep = XML_CONTEXT_BYTES;
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002086 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2087 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002088 FREE(parser, parser->m_buffer);
2089 parser->m_buffer = newBuf;
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002090 parser->m_bufferEnd = parser->m_buffer +
2091 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002092 parser->m_bufferPtr = parser->m_buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002093 }
2094 else {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002095 /* This must be a brand new buffer with no data in it yet */
2096 parser->m_bufferEnd = newBuf;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002097 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002098 }
2099#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002100 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002101 memcpy(newBuf, parser->m_bufferPtr,
2102 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson4e211002018-06-26 19:25:45 -07002103 FREE(parser, parser->m_buffer);
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002104 parser->m_bufferEnd = newBuf +
2105 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002106 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002107 else {
2108 /* This must be a brand new buffer with no data in it yet */
2109 parser->m_bufferEnd = newBuf;
2110 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002111 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002112#endif /* not defined XML_CONTEXT_BYTES */
2113 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002114 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2115 parser->m_positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002116 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002117 return parser->m_bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002118}
2119
Fred Drake31d485c2004-08-03 07:06:22 +00002120enum XML_Status XMLCALL
2121XML_StopParser(XML_Parser parser, XML_Bool resumable)
2122{
Victor Stinner5ff71322017-06-21 14:39:22 +02002123 if (parser == NULL)
2124 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002125 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002126 case XML_SUSPENDED:
2127 if (resumable) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002128 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002129 return XML_STATUS_ERROR;
2130 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002131 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002132 break;
2133 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002134 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002135 return XML_STATUS_ERROR;
2136 default:
2137 if (resumable) {
2138#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07002139 if (parser->m_isParamEntity) {
2140 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
Fred Drake31d485c2004-08-03 07:06:22 +00002141 return XML_STATUS_ERROR;
2142 }
2143#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07002144 parser->m_parsingStatus.parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002145 }
2146 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002147 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002148 }
2149 return XML_STATUS_OK;
2150}
2151
2152enum XML_Status XMLCALL
2153XML_ResumeParser(XML_Parser parser)
2154{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002155 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002156
Victor Stinner5ff71322017-06-21 14:39:22 +02002157 if (parser == NULL)
2158 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002159 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2160 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002161 return XML_STATUS_ERROR;
2162 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002163 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002164
Benjamin Peterson4e211002018-06-26 19:25:45 -07002165 parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00002166
Benjamin Peterson4e211002018-06-26 19:25:45 -07002167 if (parser->m_errorCode != XML_ERROR_NONE) {
2168 parser->m_eventEndPtr = parser->m_eventPtr;
2169 parser->m_processor = errorProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00002170 return XML_STATUS_ERROR;
2171 }
2172 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002173 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002174 case XML_SUSPENDED:
2175 result = XML_STATUS_SUSPENDED;
2176 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002177 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002178 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002179 if (parser->m_parsingStatus.finalBuffer) {
2180 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002181 return result;
2182 }
2183 default: ;
2184 }
2185 }
2186
Benjamin Peterson4e211002018-06-26 19:25:45 -07002187 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position);
2188 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002189 return result;
2190}
2191
2192void XMLCALL
2193XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2194{
Victor Stinner5ff71322017-06-21 14:39:22 +02002195 if (parser == NULL)
2196 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002197 assert(status != NULL);
2198 *status = parser->m_parsingStatus;
2199}
2200
Fred Drake08317ae2003-10-21 15:38:55 +00002201enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002202XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002203{
Victor Stinner5ff71322017-06-21 14:39:22 +02002204 if (parser == NULL)
2205 return XML_ERROR_INVALID_ARGUMENT;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002206 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002207}
2208
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002209XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002210XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002211{
Victor Stinner5ff71322017-06-21 14:39:22 +02002212 if (parser == NULL)
2213 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002214 if (parser->m_eventPtr)
2215 return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002216 return -1;
2217}
2218
Fred Drake08317ae2003-10-21 15:38:55 +00002219int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002220XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002221{
Victor Stinner5ff71322017-06-21 14:39:22 +02002222 if (parser == NULL)
2223 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002224 if (parser->m_eventEndPtr && parser->m_eventPtr)
2225 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002226 return 0;
2227}
2228
Fred Drake08317ae2003-10-21 15:38:55 +00002229const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002230XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002231{
2232#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002233 if (parser == NULL)
2234 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002235 if (parser->m_eventPtr && parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002236 if (offset != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002237 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
Victor Stinner5ff71322017-06-21 14:39:22 +02002238 if (size != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002239 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2240 return parser->m_buffer;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002241 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002242#else
2243 (void)parser;
2244 (void)offset;
2245 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002246#endif /* defined XML_CONTEXT_BYTES */
2247 return (char *) 0;
2248}
2249
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002250XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002251XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002252{
Victor Stinner5ff71322017-06-21 14:39:22 +02002253 if (parser == NULL)
2254 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002255 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2256 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2257 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002258 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002259 return parser->m_position.lineNumber + 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002260}
2261
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002262XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002263XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002264{
Victor Stinner5ff71322017-06-21 14:39:22 +02002265 if (parser == NULL)
2266 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002267 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2268 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position);
2269 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002270 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002271 return parser->m_position.columnNumber;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002272}
2273
Fred Drake08317ae2003-10-21 15:38:55 +00002274void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002275XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2276{
Victor Stinner5ff71322017-06-21 14:39:22 +02002277 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002278 FREE(parser, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002279}
2280
Fred Drake08317ae2003-10-21 15:38:55 +00002281void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002282XML_MemMalloc(XML_Parser parser, size_t size)
2283{
Victor Stinner5ff71322017-06-21 14:39:22 +02002284 if (parser == NULL)
2285 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002286 return MALLOC(parser, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002287}
2288
Fred Drake08317ae2003-10-21 15:38:55 +00002289void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002290XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2291{
Victor Stinner5ff71322017-06-21 14:39:22 +02002292 if (parser == NULL)
2293 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002294 return REALLOC(parser, ptr, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002295}
2296
Fred Drake08317ae2003-10-21 15:38:55 +00002297void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002298XML_MemFree(XML_Parser parser, void *ptr)
2299{
Victor Stinner5ff71322017-06-21 14:39:22 +02002300 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002301 FREE(parser, ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002302}
2303
Fred Drake08317ae2003-10-21 15:38:55 +00002304void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002305XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002306{
Victor Stinner5ff71322017-06-21 14:39:22 +02002307 if (parser == NULL)
2308 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002309 if (parser->m_defaultHandler) {
2310 if (parser->m_openInternalEntities)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002311 reportDefault(parser,
Benjamin Peterson4e211002018-06-26 19:25:45 -07002312 parser->m_internalEncoding,
2313 parser->m_openInternalEntities->internalEventPtr,
2314 parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002315 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002316 reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002317 }
2318}
2319
Fred Drake08317ae2003-10-21 15:38:55 +00002320const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002321XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002322{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002323 switch (code) {
2324 case XML_ERROR_NONE:
2325 return NULL;
2326 case XML_ERROR_NO_MEMORY:
2327 return XML_L("out of memory");
2328 case XML_ERROR_SYNTAX:
2329 return XML_L("syntax error");
2330 case XML_ERROR_NO_ELEMENTS:
2331 return XML_L("no element found");
2332 case XML_ERROR_INVALID_TOKEN:
2333 return XML_L("not well-formed (invalid token)");
2334 case XML_ERROR_UNCLOSED_TOKEN:
2335 return XML_L("unclosed token");
2336 case XML_ERROR_PARTIAL_CHAR:
2337 return XML_L("partial character");
2338 case XML_ERROR_TAG_MISMATCH:
2339 return XML_L("mismatched tag");
2340 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2341 return XML_L("duplicate attribute");
2342 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2343 return XML_L("junk after document element");
2344 case XML_ERROR_PARAM_ENTITY_REF:
2345 return XML_L("illegal parameter entity reference");
2346 case XML_ERROR_UNDEFINED_ENTITY:
2347 return XML_L("undefined entity");
2348 case XML_ERROR_RECURSIVE_ENTITY_REF:
2349 return XML_L("recursive entity reference");
2350 case XML_ERROR_ASYNC_ENTITY:
2351 return XML_L("asynchronous entity");
2352 case XML_ERROR_BAD_CHAR_REF:
2353 return XML_L("reference to invalid character number");
2354 case XML_ERROR_BINARY_ENTITY_REF:
2355 return XML_L("reference to binary entity");
2356 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2357 return XML_L("reference to external entity in attribute");
2358 case XML_ERROR_MISPLACED_XML_PI:
2359 return XML_L("XML or text declaration not at start of entity");
2360 case XML_ERROR_UNKNOWN_ENCODING:
2361 return XML_L("unknown encoding");
2362 case XML_ERROR_INCORRECT_ENCODING:
2363 return XML_L("encoding specified in XML declaration is incorrect");
2364 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2365 return XML_L("unclosed CDATA section");
2366 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2367 return XML_L("error in processing external entity reference");
2368 case XML_ERROR_NOT_STANDALONE:
2369 return XML_L("document is not standalone");
2370 case XML_ERROR_UNEXPECTED_STATE:
2371 return XML_L("unexpected parser state - please send a bug report");
2372 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2373 return XML_L("entity declared in parameter entity");
2374 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2375 return XML_L("requested feature requires XML_DTD support in Expat");
2376 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2377 return XML_L("cannot change setting once parsing has begun");
2378 /* Added in 1.95.7. */
2379 case XML_ERROR_UNBOUND_PREFIX:
2380 return XML_L("unbound prefix");
2381 /* Added in 1.95.8. */
2382 case XML_ERROR_UNDECLARING_PREFIX:
2383 return XML_L("must not undeclare prefix");
2384 case XML_ERROR_INCOMPLETE_PE:
2385 return XML_L("incomplete markup in parameter entity");
2386 case XML_ERROR_XML_DECL:
2387 return XML_L("XML declaration not well-formed");
2388 case XML_ERROR_TEXT_DECL:
2389 return XML_L("text declaration not well-formed");
2390 case XML_ERROR_PUBLICID:
2391 return XML_L("illegal character(s) in public id");
2392 case XML_ERROR_SUSPENDED:
2393 return XML_L("parser suspended");
2394 case XML_ERROR_NOT_SUSPENDED:
2395 return XML_L("parser not suspended");
2396 case XML_ERROR_ABORTED:
2397 return XML_L("parsing aborted");
2398 case XML_ERROR_FINISHED:
2399 return XML_L("parsing finished");
2400 case XML_ERROR_SUSPEND_PE:
2401 return XML_L("cannot suspend in external parameter entity");
2402 /* Added in 2.0.0. */
2403 case XML_ERROR_RESERVED_PREFIX_XML:
2404 return XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name");
2405 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2406 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2407 case XML_ERROR_RESERVED_NAMESPACE_URI:
2408 return XML_L("prefix must not be bound to one of the reserved namespace names");
2409 /* Added in 2.2.5. */
2410 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2411 return XML_L("invalid argument");
2412 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002413 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002414}
2415
Fred Drake08317ae2003-10-21 15:38:55 +00002416const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002417XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002418
2419 /* V1 is used to string-ize the version number. However, it would
2420 string-ize the actual version macro *names* unless we get them
2421 substituted before being passed to V1. CPP is defined to expand
2422 a macro, then rescan for more expansions. Thus, we use V2 to expand
2423 the version macros, then CPP will expand the resulting V1() macro
2424 with the correct numerals. */
2425 /* ### I'm assuming cpp is portable in this respect... */
2426
2427#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2428#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2429
2430 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2431
2432#undef V1
2433#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002434}
2435
Fred Drake08317ae2003-10-21 15:38:55 +00002436XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002437XML_ExpatVersionInfo(void)
2438{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002439 XML_Expat_Version version;
2440
2441 version.major = XML_MAJOR_VERSION;
2442 version.minor = XML_MINOR_VERSION;
2443 version.micro = XML_MICRO_VERSION;
2444
2445 return version;
2446}
2447
Fred Drake08317ae2003-10-21 15:38:55 +00002448const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002449XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002450{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002451 static const XML_Feature features[] = {
2452 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2453 sizeof(XML_Char)},
2454 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2455 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002456#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002457 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002458#endif
2459#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002460 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002461#endif
2462#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002463 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002464#endif
2465#ifdef XML_CONTEXT_BYTES
2466 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2467 XML_CONTEXT_BYTES},
2468#endif
2469#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002470 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002471#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002472#ifdef XML_NS
2473 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2474#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002475#ifdef XML_LARGE_SIZE
2476 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2477#endif
2478#ifdef XML_ATTR_INFO
2479 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2480#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002481 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002482 };
2483
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002484 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002485}
2486
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002487/* Initially tag->rawName always points into the parse buffer;
2488 for those TAG instances opened while the current parse buffer was
2489 processed, and not yet closed, we need to store tag->rawName in a more
2490 permanent location, since the parse buffer is about to be discarded.
2491*/
2492static XML_Bool
2493storeRawNames(XML_Parser parser)
2494{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002495 TAG *tag = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002496 while (tag) {
2497 int bufSize;
2498 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2499 char *rawNameBuf = tag->buf + nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002500 /* Stop if already stored. Since m_tagStack is a stack, we can stop
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002501 at the first entry that has already been copied; everything
2502 below it in the stack is already been accounted for in a
2503 previous call to this function.
2504 */
2505 if (tag->rawName == rawNameBuf)
2506 break;
2507 /* For re-use purposes we need to ensure that the
2508 size of tag->buf is a multiple of sizeof(XML_Char).
2509 */
2510 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2511 if (bufSize > tag->bufEnd - tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002512 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002513 if (temp == NULL)
2514 return XML_FALSE;
2515 /* if tag->name.str points to tag->buf (only when namespace
2516 processing is off) then we have to update it
2517 */
2518 if (tag->name.str == (XML_Char *)tag->buf)
2519 tag->name.str = (XML_Char *)temp;
2520 /* if tag->name.localPart is set (when namespace processing is on)
2521 then update it as well, since it will always point into tag->buf
2522 */
2523 if (tag->name.localPart)
2524 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2525 (XML_Char *)tag->buf);
2526 tag->buf = temp;
2527 tag->bufEnd = temp + bufSize;
2528 rawNameBuf = temp + nameLen;
2529 }
2530 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2531 tag->rawName = rawNameBuf;
2532 tag = tag->parent;
2533 }
2534 return XML_TRUE;
2535}
2536
2537static enum XML_Error PTRCALL
2538contentProcessor(XML_Parser parser,
2539 const char *start,
2540 const char *end,
2541 const char **endPtr)
2542{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002543 enum XML_Error result = doContent(parser, 0, parser->m_encoding, start, end,
2544 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002545 if (result == XML_ERROR_NONE) {
2546 if (!storeRawNames(parser))
2547 return XML_ERROR_NO_MEMORY;
2548 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002549 return result;
2550}
2551
2552static enum XML_Error PTRCALL
2553externalEntityInitProcessor(XML_Parser parser,
2554 const char *start,
2555 const char *end,
2556 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002557{
2558 enum XML_Error result = initializeEncoding(parser);
2559 if (result != XML_ERROR_NONE)
2560 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002561 parser->m_processor = externalEntityInitProcessor2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002562 return externalEntityInitProcessor2(parser, start, end, endPtr);
2563}
2564
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002565static enum XML_Error PTRCALL
2566externalEntityInitProcessor2(XML_Parser parser,
2567 const char *start,
2568 const char *end,
2569 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002570{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002571 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002572 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002573 switch (tok) {
2574 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002575 /* If we are at the end of the buffer, this would cause the next stage,
2576 i.e. externalEntityInitProcessor3, to pass control directly to
2577 doContent (by detecting XML_TOK_NONE) without processing any xml text
2578 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2579 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002580 if (next == end && !parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002581 *endPtr = next;
2582 return XML_ERROR_NONE;
2583 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002584 start = next;
2585 break;
2586 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002587 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002588 *endPtr = start;
2589 return XML_ERROR_NONE;
2590 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002591 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002592 return XML_ERROR_UNCLOSED_TOKEN;
2593 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002594 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002595 *endPtr = start;
2596 return XML_ERROR_NONE;
2597 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002598 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002599 return XML_ERROR_PARTIAL_CHAR;
2600 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002601 parser->m_processor = externalEntityInitProcessor3;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002602 return externalEntityInitProcessor3(parser, start, end, endPtr);
2603}
2604
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002605static enum XML_Error PTRCALL
2606externalEntityInitProcessor3(XML_Parser parser,
2607 const char *start,
2608 const char *end,
2609 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002610{
Fred Drake31d485c2004-08-03 07:06:22 +00002611 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002612 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002613 parser->m_eventPtr = start;
2614 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2615 parser->m_eventEndPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00002616
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002617 switch (tok) {
2618 case XML_TOK_XML_DECL:
2619 {
Fred Drake31d485c2004-08-03 07:06:22 +00002620 enum XML_Error result;
2621 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002622 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002623 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002624 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002625 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002626 *endPtr = next;
2627 return XML_ERROR_NONE;
2628 case XML_FINISHED:
2629 return XML_ERROR_ABORTED;
2630 default:
2631 start = next;
2632 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002633 }
2634 break;
2635 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002636 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002637 *endPtr = start;
2638 return XML_ERROR_NONE;
2639 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002640 return XML_ERROR_UNCLOSED_TOKEN;
2641 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002642 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002643 *endPtr = start;
2644 return XML_ERROR_NONE;
2645 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002646 return XML_ERROR_PARTIAL_CHAR;
2647 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002648 parser->m_processor = externalEntityContentProcessor;
2649 parser->m_tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002650 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002651}
2652
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002653static enum XML_Error PTRCALL
2654externalEntityContentProcessor(XML_Parser parser,
2655 const char *start,
2656 const char *end,
2657 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002658{
Benjamin Peterson4e211002018-06-26 19:25:45 -07002659 enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end,
2660 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002661 if (result == XML_ERROR_NONE) {
2662 if (!storeRawNames(parser))
2663 return XML_ERROR_NO_MEMORY;
2664 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002665 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002666}
2667
2668static enum XML_Error
2669doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002670 int startTagLevel,
2671 const ENCODING *enc,
2672 const char *s,
2673 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002674 const char **nextPtr,
2675 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002676{
Fred Drake31d485c2004-08-03 07:06:22 +00002677 /* save one level of indirection */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002678 DTD * const dtd = parser->m_dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002679
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002680 const char **eventPP;
2681 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002682 if (enc == parser->m_encoding) {
2683 eventPP = &parser->m_eventPtr;
2684 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002685 }
2686 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002687 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2688 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002689 }
2690 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002691
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002692 for (;;) {
2693 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2694 int tok = XmlContentTok(enc, s, end, &next);
2695 *eventEndPP = next;
2696 switch (tok) {
2697 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002698 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002699 *nextPtr = s;
2700 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002701 }
2702 *eventEndPP = end;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002703 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002704 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002705 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002706 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002707 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002708 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002709 /* We are at the end of the final buffer, should we check for
2710 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002711 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002712 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002713 return XML_ERROR_NO_ELEMENTS;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002714 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002715 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002716 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002717 return XML_ERROR_NONE;
2718 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002719 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002720 *nextPtr = s;
2721 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002722 }
2723 if (startTagLevel > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002724 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002725 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002726 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002727 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002728 }
2729 return XML_ERROR_NO_ELEMENTS;
2730 case XML_TOK_INVALID:
2731 *eventPP = next;
2732 return XML_ERROR_INVALID_TOKEN;
2733 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002734 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002735 *nextPtr = s;
2736 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002737 }
2738 return XML_ERROR_UNCLOSED_TOKEN;
2739 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002740 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002741 *nextPtr = s;
2742 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002743 }
2744 return XML_ERROR_PARTIAL_CHAR;
2745 case XML_TOK_ENTITY_REF:
2746 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002747 const XML_Char *name;
2748 ENTITY *entity;
2749 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2750 s + enc->minBytesPerChar,
2751 next - enc->minBytesPerChar);
2752 if (ch) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002753 if (parser->m_characterDataHandler)
2754 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
2755 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002756 reportDefault(parser, enc, s, next);
2757 break;
2758 }
2759 name = poolStoreString(&dtd->pool, enc,
2760 s + enc->minBytesPerChar,
2761 next - enc->minBytesPerChar);
2762 if (!name)
2763 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002764 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002765 poolDiscard(&dtd->pool);
2766 /* First, determine if a check for an existing declaration is needed;
2767 if yes, check that the entity exists, and that it is internal,
2768 otherwise call the skipped entity or default handler.
2769 */
2770 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2771 if (!entity)
2772 return XML_ERROR_UNDEFINED_ENTITY;
2773 else if (!entity->is_internal)
2774 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2775 }
2776 else if (!entity) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002777 if (parser->m_skippedEntityHandler)
2778 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2779 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002780 reportDefault(parser, enc, s, next);
2781 break;
2782 }
2783 if (entity->open)
2784 return XML_ERROR_RECURSIVE_ENTITY_REF;
2785 if (entity->notation)
2786 return XML_ERROR_BINARY_ENTITY_REF;
2787 if (entity->textPtr) {
2788 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002789 if (!parser->m_defaultExpandInternalEntities) {
2790 if (parser->m_skippedEntityHandler)
2791 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0);
2792 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002793 reportDefault(parser, enc, s, next);
2794 break;
2795 }
Fred Drake31d485c2004-08-03 07:06:22 +00002796 result = processInternalEntity(parser, entity, XML_FALSE);
2797 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002798 return result;
2799 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002800 else if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002801 const XML_Char *context;
2802 entity->open = XML_TRUE;
2803 context = getContext(parser);
2804 entity->open = XML_FALSE;
2805 if (!context)
2806 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002807 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002808 context,
2809 entity->base,
2810 entity->systemId,
2811 entity->publicId))
2812 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002813 poolDiscard(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002814 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002815 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002816 reportDefault(parser, enc, s, next);
2817 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002818 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002819 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002820 /* fall through */
2821 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002822 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002823 TAG *tag;
2824 enum XML_Error result;
2825 XML_Char *toPtr;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002826 if (parser->m_freeTagList) {
2827 tag = parser->m_freeTagList;
2828 parser->m_freeTagList = parser->m_freeTagList->parent;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002829 }
2830 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002831 tag = (TAG *)MALLOC(parser, sizeof(TAG));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002832 if (!tag)
2833 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002834 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002835 if (!tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002836 FREE(parser, tag);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002837 return XML_ERROR_NO_MEMORY;
2838 }
2839 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2840 }
2841 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002842 tag->parent = parser->m_tagStack;
2843 parser->m_tagStack = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002844 tag->name.localPart = NULL;
2845 tag->name.prefix = NULL;
2846 tag->rawName = s + enc->minBytesPerChar;
2847 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002848 ++parser->m_tagLevel;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002849 {
2850 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2851 const char *fromPtr = tag->rawName;
2852 toPtr = (XML_Char *)tag->buf;
2853 for (;;) {
2854 int bufSize;
2855 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002856 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002857 &fromPtr, rawNameEnd,
2858 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002859 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner5ff71322017-06-21 14:39:22 +02002860 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002861 tag->name.strLen = convLen;
2862 break;
2863 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002864 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002865 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002866 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002867 if (temp == NULL)
2868 return XML_ERROR_NO_MEMORY;
2869 tag->buf = temp;
2870 tag->bufEnd = temp + bufSize;
2871 toPtr = (XML_Char *)temp + convLen;
2872 }
2873 }
2874 }
2875 tag->name.str = (XML_Char *)tag->buf;
2876 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002877 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2878 if (result)
2879 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002880 if (parser->m_startElementHandler)
2881 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2882 (const XML_Char **)parser->m_atts);
2883 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002884 reportDefault(parser, enc, s, next);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002885 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002886 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002887 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002888 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002889 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002890 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2891 {
2892 const char *rawName = s + enc->minBytesPerChar;
2893 enum XML_Error result;
2894 BINDING *bindings = NULL;
2895 XML_Bool noElmHandlers = XML_TRUE;
2896 TAG_NAME name;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002897 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002898 rawName + XmlNameLength(enc, rawName));
2899 if (!name.str)
2900 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002901 poolFinish(&parser->m_tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002902 result = storeAtts(parser, enc, s, &name, &bindings);
Victor Stinner5ff71322017-06-21 14:39:22 +02002903 if (result != XML_ERROR_NONE) {
2904 freeBindings(parser, bindings);
Fred Drake4faea012003-01-28 06:42:40 +00002905 return result;
Victor Stinner5ff71322017-06-21 14:39:22 +02002906 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002907 poolFinish(&parser->m_tempPool);
2908 if (parser->m_startElementHandler) {
2909 parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002910 noElmHandlers = XML_FALSE;
2911 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002912 if (parser->m_endElementHandler) {
2913 if (parser->m_startElementHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002914 *eventPP = *eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002915 parser->m_endElementHandler(parser->m_handlerArg, name.str);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002916 noElmHandlers = XML_FALSE;
2917 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002918 if (noElmHandlers && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002919 reportDefault(parser, enc, s, next);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002920 poolClear(&parser->m_tempPool);
Victor Stinner5ff71322017-06-21 14:39:22 +02002921 freeBindings(parser, bindings);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002922 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002923 if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2924 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2925 parser->m_processor = epilogProcessor;
2926 else
2927 return epilogProcessor(parser, next, end, nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002928 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002929 break;
2930 case XML_TOK_END_TAG:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002931 if (parser->m_tagLevel == startTagLevel)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002932 return XML_ERROR_ASYNC_ENTITY;
2933 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002934 int len;
2935 const char *rawName;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002936 TAG *tag = parser->m_tagStack;
2937 parser->m_tagStack = tag->parent;
2938 tag->parent = parser->m_freeTagList;
2939 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002940 rawName = s + enc->minBytesPerChar*2;
2941 len = XmlNameLength(enc, rawName);
2942 if (len != tag->rawNameLength
2943 || memcmp(tag->rawName, rawName, len) != 0) {
2944 *eventPP = rawName;
2945 return XML_ERROR_TAG_MISMATCH;
2946 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002947 --parser->m_tagLevel;
2948 if (parser->m_endElementHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002949 const XML_Char *localPart;
2950 const XML_Char *prefix;
2951 XML_Char *uri;
2952 localPart = tag->name.localPart;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002953 if (parser->m_ns && localPart) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002954 /* localPart and prefix may have been overwritten in
2955 tag->name.str, since this points to the binding->uri
2956 buffer which gets re-used; so we have to add them again
2957 */
2958 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2959 /* don't need to check for space - already done in storeAtts() */
2960 while (*localPart) *uri++ = *localPart++;
2961 prefix = (XML_Char *)tag->name.prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002962 if (parser->m_ns_triplets && prefix) {
2963 *uri++ = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002964 while (*prefix) *uri++ = *prefix++;
2965 }
2966 *uri = XML_T('\0');
2967 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002968 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002969 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002970 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002971 reportDefault(parser, enc, s, next);
2972 while (tag->bindings) {
2973 BINDING *b = tag->bindings;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002974 if (parser->m_endNamespaceDeclHandler)
2975 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002976 tag->bindings = tag->bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002977 b->nextTagBinding = parser->m_freeBindingList;
2978 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002979 b->prefix->binding = b->prevPrefixBinding;
2980 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002981 if (parser->m_tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002982 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002983 }
2984 break;
2985 case XML_TOK_CHAR_REF:
2986 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002987 int n = XmlCharRefNumber(enc, s);
2988 if (n < 0)
2989 return XML_ERROR_BAD_CHAR_REF;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002990 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002991 XML_Char buf[XML_ENCODE_MAX];
Benjamin Peterson4e211002018-06-26 19:25:45 -07002992 parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002993 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002994 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002995 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002996 }
2997 break;
2998 case XML_TOK_XML_DECL:
2999 return XML_ERROR_MISPLACED_XML_PI;
3000 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003001 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003002 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003003 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003004 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003005 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003006 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003007 break;
3008 case XML_TOK_CDATA_SECT_OPEN:
3009 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003010 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003011 if (parser->m_startCdataSectionHandler)
3012 parser->m_startCdataSectionHandler(parser->m_handlerArg);
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003013/* BEGIN disabled code */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003014 /* Suppose you doing a transformation on a document that involves
3015 changing only the character data. You set up a defaultHandler
3016 and a characterDataHandler. The defaultHandler simply copies
3017 characters through. The characterDataHandler does the
3018 transformation and writes the characters out escaping them as
3019 necessary. This case will fail to work if we leave out the
3020 following two lines (because & and < inside CDATA sections will
3021 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003022
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003023 However, now we have a start/endCdataSectionHandler, so it seems
3024 easier to let the user deal with this.
3025 */
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003026 else if (0 && parser->m_characterDataHandler)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003027 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003028/* END disabled code */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003029 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003030 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00003031 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3032 if (result != XML_ERROR_NONE)
3033 return result;
3034 else if (!next) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003035 parser->m_processor = cdataSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003036 return result;
3037 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003038 }
3039 break;
3040 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00003041 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003042 *nextPtr = s;
3043 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003044 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003045 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003046 if (MUST_CONVERT(enc, s)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003047 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3048 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3049 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3050 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003051 }
3052 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003053 parser->m_characterDataHandler(parser->m_handlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003054 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003055 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003056 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003057 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003058 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003059 /* We are at the end of the final buffer, should we check for
3060 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00003061 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003062 if (startTagLevel == 0) {
3063 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003064 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003065 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003066 if (parser->m_tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003067 *eventPP = end;
3068 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003069 }
Fred Drake31d485c2004-08-03 07:06:22 +00003070 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003071 return XML_ERROR_NONE;
3072 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003073 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003074 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003075 if (charDataHandler) {
3076 if (MUST_CONVERT(enc, s)) {
3077 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003078 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3079 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003080 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003081 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3082 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003083 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003084 break;
3085 *eventPP = s;
3086 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003087 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003088 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003089 charDataHandler(parser->m_handlerArg,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003090 (XML_Char *)s,
3091 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003092 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003093 else if (parser->m_defaultHandler)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003094 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003095 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003096 break;
3097 case XML_TOK_PI:
3098 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003099 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003100 break;
3101 case XML_TOK_COMMENT:
3102 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003103 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003104 break;
3105 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003106 /* All of the tokens produced by XmlContentTok() have their own
3107 * explicit cases, so this default is not strictly necessary.
3108 * However it is a useful safety net, so we retain the code and
3109 * simply exclude it from the coverage tests.
3110 *
3111 * LCOV_EXCL_START
3112 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003113 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003114 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003115 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003116 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003117 }
3118 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003119 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003120 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003121 *nextPtr = next;
3122 return XML_ERROR_NONE;
3123 case XML_FINISHED:
3124 return XML_ERROR_ABORTED;
3125 default: ;
3126 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003127 }
3128 /* not reached */
3129}
3130
Victor Stinner5ff71322017-06-21 14:39:22 +02003131/* This function does not call free() on the allocated memory, merely
Benjamin Peterson4e211002018-06-26 19:25:45 -07003132 * moving it to the parser's m_freeBindingList where it can be freed or
Victor Stinner5ff71322017-06-21 14:39:22 +02003133 * reused as appropriate.
3134 */
3135static void
3136freeBindings(XML_Parser parser, BINDING *bindings)
3137{
3138 while (bindings) {
3139 BINDING *b = bindings;
3140
Benjamin Peterson4e211002018-06-26 19:25:45 -07003141 /* m_startNamespaceDeclHandler will have been called for this
Victor Stinner5ff71322017-06-21 14:39:22 +02003142 * binding in addBindings(), so call the end handler now.
3143 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003144 if (parser->m_endNamespaceDeclHandler)
3145 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Victor Stinner5ff71322017-06-21 14:39:22 +02003146
3147 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003148 b->nextTagBinding = parser->m_freeBindingList;
3149 parser->m_freeBindingList = b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003150 b->prefix->binding = b->prevPrefixBinding;
3151 }
3152}
3153
Fred Drake4faea012003-01-28 06:42:40 +00003154/* Precondition: all arguments must be non-NULL;
3155 Purpose:
3156 - normalize attributes
3157 - check attributes for well-formedness
3158 - generate namespace aware attribute names (URI, prefix)
3159 - build list of attributes for startElementHandler
3160 - default attributes
3161 - process namespace declarations (check and report them)
3162 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003163*/
3164static enum XML_Error
3165storeAtts(XML_Parser parser, const ENCODING *enc,
3166 const char *attStr, TAG_NAME *tagNamePtr,
3167 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003168{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003169 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003170 ELEMENT_TYPE *elementType;
3171 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003172 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003173 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003174 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003175 int i;
3176 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003177 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003178 int nPrefixes = 0;
3179 BINDING *binding;
3180 const XML_Char *localPart;
3181
3182 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003183 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00003184 if (!elementType) {
3185 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3186 if (!name)
3187 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003188 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003189 sizeof(ELEMENT_TYPE));
3190 if (!elementType)
3191 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003192 if (parser->m_ns && !setElementTypePrefix(parser, elementType))
Fred Drake4faea012003-01-28 06:42:40 +00003193 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003194 }
Fred Drake4faea012003-01-28 06:42:40 +00003195 nDefaultAtts = elementType->nDefaultAtts;
3196
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003197 /* get the attributes from the tokenizer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003198 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3199 if (n + nDefaultAtts > parser->m_attsSize) {
3200 int oldAttsSize = parser->m_attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003201 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003202#ifdef XML_ATTR_INFO
3203 XML_AttrInfo *temp2;
3204#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003205 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3206 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003207 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003208 parser->m_attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003209 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003210 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003211 parser->m_atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003212#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003213 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003214 if (temp2 == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003215 parser->m_attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003216 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003217 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003218 parser->m_attInfo = temp2;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003219#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003220 if (n > oldAttsSize)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003221 XmlGetAttributes(enc, attStr, n, parser->m_atts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003222 }
Fred Drake4faea012003-01-28 06:42:40 +00003223
Benjamin Peterson4e211002018-06-26 19:25:45 -07003224 appAtts = (const XML_Char **)parser->m_atts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003225 for (i = 0; i < n; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003226 ATTRIBUTE *currAtt = &parser->m_atts[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003227#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003228 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003229#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003230 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003231 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3232 currAtt->name
3233 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003234 if (!attId)
3235 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003236#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003237 currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003238 currAttInfo->nameEnd = currAttInfo->nameStart +
3239 XmlNameLength(enc, currAtt->name);
Benjamin Peterson4e211002018-06-26 19:25:45 -07003240 currAttInfo->valueStart = parser->m_parseEndByteIndex -
3241 (parser->m_parseEndPtr - currAtt->valuePtr);
3242 currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003243#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003244 /* Detect duplicate attributes by their QNames. This does not work when
3245 namespace processing is turned on and different prefixes for the same
3246 namespace are used. For this case we have a check further down.
3247 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003248 if ((attId->name)[-1]) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003249 if (enc == parser->m_encoding)
3250 parser->m_eventPtr = parser->m_atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003251 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3252 }
3253 (attId->name)[-1] = 1;
3254 appAtts[attIndex++] = attId->name;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003255 if (!parser->m_atts[i].normalized) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003256 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003257 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003258
3259 /* figure out whether declared as other than CDATA */
3260 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003261 int j;
3262 for (j = 0; j < nDefaultAtts; j++) {
3263 if (attId == elementType->defaultAtts[j].id) {
3264 isCdata = elementType->defaultAtts[j].isCdata;
3265 break;
3266 }
3267 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003268 }
3269
3270 /* normalize the attribute value */
3271 result = storeAttributeValue(parser, enc, isCdata,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003272 parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd,
3273 &parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003274 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003275 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003276 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3277 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003278 }
Fred Drake4faea012003-01-28 06:42:40 +00003279 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003280 /* the value did not need normalizing */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003281 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr,
3282 parser->m_atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003283 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003284 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003285 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003286 }
3287 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003288 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003289 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003290 /* deal with namespace declarations here */
3291 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3292 appAtts[attIndex], bindingsPtr);
3293 if (result)
3294 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003295 --attIndex;
3296 }
3297 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003298 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003299 attIndex++;
3300 nPrefixes++;
3301 (attId->name)[-1] = 2;
3302 }
3303 }
3304 else
3305 attIndex++;
3306 }
Fred Drake4faea012003-01-28 06:42:40 +00003307
3308 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003309 parser->m_nSpecifiedAtts = attIndex;
Fred Drake4faea012003-01-28 06:42:40 +00003310 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3311 for (i = 0; i < attIndex; i += 2)
3312 if (appAtts[i] == elementType->idAtt->name) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003313 parser->m_idAttIndex = i;
Fred Drake4faea012003-01-28 06:42:40 +00003314 break;
3315 }
3316 }
3317 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003318 parser->m_idAttIndex = -1;
Fred Drake4faea012003-01-28 06:42:40 +00003319
3320 /* do attribute defaulting */
3321 for (i = 0; i < nDefaultAtts; i++) {
3322 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3323 if (!(da->id->name)[-1] && da->value) {
3324 if (da->id->prefix) {
3325 if (da->id->xmlns) {
3326 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3327 da->value, bindingsPtr);
3328 if (result)
3329 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003330 }
3331 else {
Fred Drake4faea012003-01-28 06:42:40 +00003332 (da->id->name)[-1] = 2;
3333 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003334 appAtts[attIndex++] = da->id->name;
3335 appAtts[attIndex++] = da->value;
3336 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003337 }
Fred Drake4faea012003-01-28 06:42:40 +00003338 else {
3339 (da->id->name)[-1] = 1;
3340 appAtts[attIndex++] = da->id->name;
3341 appAtts[attIndex++] = da->value;
3342 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003343 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003344 }
Fred Drake4faea012003-01-28 06:42:40 +00003345 appAtts[attIndex] = 0;
3346
Fred Drake08317ae2003-10-21 15:38:55 +00003347 /* expand prefixed attribute names, check for duplicates,
3348 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003349 i = 0;
3350 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00003351 int j; /* hash table index */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003352 unsigned long version = parser->m_nsAttsVersion;
3353 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3354 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003355 /* size of hash table must be at least 2 * (# of prefixed attributes) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003356 if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
Fred Drake08317ae2003-10-21 15:38:55 +00003357 NS_ATT *temp;
3358 /* hash table size must also be a power of 2 and >= 8 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003359 while (nPrefixes >> parser->m_nsAttsPower++);
3360 if (parser->m_nsAttsPower < 3)
3361 parser->m_nsAttsPower = 3;
3362 nsAttsSize = (int)1 << parser->m_nsAttsPower;
3363 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003364 if (!temp) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003365 /* Restore actual size of memory in m_nsAtts */
3366 parser->m_nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003367 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003368 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003369 parser->m_nsAtts = temp;
3370 version = 0; /* force re-initialization of m_nsAtts hash table */
Fred Drake08317ae2003-10-21 15:38:55 +00003371 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003372 /* using a version flag saves us from initializing m_nsAtts every time */
Fred Drake08317ae2003-10-21 15:38:55 +00003373 if (!version) { /* initialize version flags when version wraps around */
3374 version = INIT_ATTS_VERSION;
3375 for (j = nsAttsSize; j != 0; )
Benjamin Peterson4e211002018-06-26 19:25:45 -07003376 parser->m_nsAtts[--j].version = version;
Fred Drake08317ae2003-10-21 15:38:55 +00003377 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003378 parser->m_nsAttsVersion = --version;
Fred Drake08317ae2003-10-21 15:38:55 +00003379
3380 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003381 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003382 const XML_Char *s = appAtts[i];
3383 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003384 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003385 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003386 unsigned long uriHash;
3387 struct siphash sip_state;
3388 struct sipkey sip_key;
3389
3390 copy_salt_to_sipkey(parser, &sip_key);
3391 sip24_init(&sip_state, &sip_key);
3392
Fred Drake08317ae2003-10-21 15:38:55 +00003393 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003394 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003395 if (!id || !id->prefix) {
3396 /* This code is walking through the appAtts array, dealing
3397 * with (in this case) a prefixed attribute name. To be in
3398 * the array, the attribute must have already been bound, so
3399 * has to have passed through the hash table lookup once
3400 * already. That implies that an entry for it already
3401 * exists, so the lookup above will return a pointer to
3402 * already allocated memory. There is no opportunaity for
3403 * the allocator to fail, so the condition above cannot be
3404 * fulfilled.
3405 *
3406 * Since it is difficult to be certain that the above
3407 * analysis is complete, we retain the test and merely
3408 * remove the code from coverage tests.
3409 */
3410 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3411 }
Fred Drake08317ae2003-10-21 15:38:55 +00003412 b = id->prefix->binding;
3413 if (!b)
3414 return XML_ERROR_UNBOUND_PREFIX;
3415
Fred Drake08317ae2003-10-21 15:38:55 +00003416 for (j = 0; j < b->uriLen; j++) {
3417 const XML_Char c = b->uri[j];
Benjamin Peterson4e211002018-06-26 19:25:45 -07003418 if (!poolAppendChar(&parser->m_tempPool, c))
Fred Drake08317ae2003-10-21 15:38:55 +00003419 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003420 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003421
3422 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3423
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003424 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003425 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003426
3427 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3428
Fred Drake08317ae2003-10-21 15:38:55 +00003429 do { /* copies null terminator */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003430 if (!poolAppendChar(&parser->m_tempPool, *s))
Fred Drake08317ae2003-10-21 15:38:55 +00003431 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003432 } while (*s++);
3433
Victor Stinner5ff71322017-06-21 14:39:22 +02003434 uriHash = (unsigned long)sip24_final(&sip_state);
3435
Fred Drake08317ae2003-10-21 15:38:55 +00003436 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003437 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003438 */
3439 unsigned char step = 0;
3440 unsigned long mask = nsAttsSize - 1;
3441 j = uriHash & mask; /* index into hash table */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003442 while (parser->m_nsAtts[j].version == version) {
Fred Drake08317ae2003-10-21 15:38:55 +00003443 /* for speed we compare stored hash values first */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003444 if (uriHash == parser->m_nsAtts[j].hash) {
3445 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3446 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
Fred Drake08317ae2003-10-21 15:38:55 +00003447 /* s1 is null terminated, but not s2 */
3448 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3449 if (*s1 == 0)
3450 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3451 }
3452 if (!step)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003453 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003454 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003455 }
Fred Drake08317ae2003-10-21 15:38:55 +00003456 }
3457
Benjamin Peterson4e211002018-06-26 19:25:45 -07003458 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3459 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
Fred Drake08317ae2003-10-21 15:38:55 +00003460 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003461 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003462 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003463 return XML_ERROR_NO_MEMORY;
3464 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003465 }
Fred Drake08317ae2003-10-21 15:38:55 +00003466
3467 /* store expanded name in attribute list */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003468 s = poolStart(&parser->m_tempPool);
3469 poolFinish(&parser->m_tempPool);
Fred Drake08317ae2003-10-21 15:38:55 +00003470 appAtts[i] = s;
3471
3472 /* fill empty slot with new version, uriName and hash value */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003473 parser->m_nsAtts[j].version = version;
3474 parser->m_nsAtts[j].hash = uriHash;
3475 parser->m_nsAtts[j].uriName = s;
Fred Drake08317ae2003-10-21 15:38:55 +00003476
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003477 if (!--nPrefixes) {
3478 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003479 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003480 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003481 }
Fred Drake08317ae2003-10-21 15:38:55 +00003482 else /* not prefixed */
3483 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003484 }
3485 }
Fred Drake08317ae2003-10-21 15:38:55 +00003486 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003487 for (; i < attIndex; i += 2)
3488 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003489 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3490 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003491
Benjamin Peterson4e211002018-06-26 19:25:45 -07003492 if (!parser->m_ns)
Fred Drake08317ae2003-10-21 15:38:55 +00003493 return XML_ERROR_NONE;
3494
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003495 /* expand the element type name */
3496 if (elementType->prefix) {
3497 binding = elementType->prefix->binding;
3498 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003499 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003500 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003501 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003502 ;
3503 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003504 else if (dtd->defaultPrefix.binding) {
3505 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003506 localPart = tagNamePtr->str;
3507 }
3508 else
3509 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003510 prefixLen = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003511 if (parser->m_ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003512 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003513 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003514 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003515 tagNamePtr->localPart = localPart;
3516 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003517 tagNamePtr->prefix = binding->prefix->name;
3518 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003519 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003520 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003521 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003522 if (n > binding->uriAlloc) {
3523 TAG *p;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003524 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003525 if (!uri)
3526 return XML_ERROR_NO_MEMORY;
3527 binding->uriAlloc = n + EXPAND_SPARE;
3528 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003529 for (p = parser->m_tagStack; p; p = p->parent)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003530 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003531 p->name.str = uri;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003532 FREE(parser, binding->uri);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003533 binding->uri = uri;
3534 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003535 /* if m_namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003536 uri = binding->uri + binding->uriLen;
3537 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003538 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003539 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003540 uri += i - 1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003541 *uri = parser->m_namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003542 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3543 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003544 tagNamePtr->str = binding->uri;
3545 return XML_ERROR_NONE;
3546}
3547
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003548/* addBinding() overwrites the value of prefix->binding without checking.
3549 Therefore one must keep track of the old value outside of addBinding().
3550*/
3551static enum XML_Error
3552addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3553 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003554{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003555 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003556 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3557 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3558 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3559 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3560 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3561 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003562 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003563 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003564 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3565 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003566 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3567 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3568 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3569 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3570 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003571 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003572 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003573 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3574
3575 XML_Bool mustBeXML = XML_FALSE;
3576 XML_Bool isXML = XML_TRUE;
3577 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003578
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003579 BINDING *b;
3580 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003581
Fred Drake31d485c2004-08-03 07:06:22 +00003582 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003583 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003584 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003585
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003586 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003587 && prefix->name[0] == XML_T(ASCII_x)
3588 && prefix->name[1] == XML_T(ASCII_m)
3589 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003590
3591 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003592 if (prefix->name[3] == XML_T(ASCII_n)
3593 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003594 && prefix->name[5] == XML_T('\0'))
3595 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3596
3597 if (prefix->name[3] == XML_T('\0'))
3598 mustBeXML = XML_TRUE;
3599 }
3600
3601 for (len = 0; uri[len]; len++) {
3602 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3603 isXML = XML_FALSE;
3604
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003605 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003606 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3607 isXMLNS = XML_FALSE;
3608 }
3609 isXML = isXML && len == xmlLen;
3610 isXMLNS = isXMLNS && len == xmlnsLen;
3611
3612 if (mustBeXML != isXML)
3613 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3614 : XML_ERROR_RESERVED_NAMESPACE_URI;
3615
3616 if (isXMLNS)
3617 return XML_ERROR_RESERVED_NAMESPACE_URI;
3618
Benjamin Peterson4e211002018-06-26 19:25:45 -07003619 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003620 len++;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003621 if (parser->m_freeBindingList) {
3622 b = parser->m_freeBindingList;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003623 if (len > b->uriAlloc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003624 XML_Char *temp = (XML_Char *)REALLOC(parser, b->uri,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003625 sizeof(XML_Char) * (len + EXPAND_SPARE));
3626 if (temp == NULL)
3627 return XML_ERROR_NO_MEMORY;
3628 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003629 b->uriAlloc = len + EXPAND_SPARE;
3630 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003631 parser->m_freeBindingList = b->nextTagBinding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003632 }
3633 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003634 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003635 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003636 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003637 b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003638 if (!b->uri) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003639 FREE(parser, b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003640 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003641 }
3642 b->uriAlloc = len + EXPAND_SPARE;
3643 }
3644 b->uriLen = len;
3645 memcpy(b->uri, uri, len * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003646 if (parser->m_namespaceSeparator)
3647 b->uri[len - 1] = parser->m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003648 b->prefix = prefix;
3649 b->attId = attId;
3650 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003651 /* NULL binding when default namespace undeclared */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003652 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003653 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003654 else
3655 prefix->binding = b;
3656 b->nextTagBinding = *bindingsPtr;
3657 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003658 /* if attId == NULL then we are not starting a namespace scope */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003659 if (attId && parser->m_startNamespaceDeclHandler)
3660 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003661 prefix->binding ? uri : 0);
3662 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003663}
3664
3665/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003666 the whole file is parsed with one call.
3667*/
3668static enum XML_Error PTRCALL
3669cdataSectionProcessor(XML_Parser parser,
3670 const char *start,
3671 const char *end,
3672 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003673{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003674 enum XML_Error result = doCdataSection(parser, parser->m_encoding, &start, end,
3675 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003676 if (result != XML_ERROR_NONE)
3677 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003678 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003679 if (parser->m_parentParser) { /* we are parsing an external entity */
3680 parser->m_processor = externalEntityContentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003681 return externalEntityContentProcessor(parser, start, end, endPtr);
3682 }
3683 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003684 parser->m_processor = contentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003685 return contentProcessor(parser, start, end, endPtr);
3686 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003687 }
3688 return result;
3689}
3690
Fred Drake31d485c2004-08-03 07:06:22 +00003691/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003692 the section is not yet closed.
3693*/
3694static enum XML_Error
3695doCdataSection(XML_Parser parser,
3696 const ENCODING *enc,
3697 const char **startPtr,
3698 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003699 const char **nextPtr,
3700 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003701{
3702 const char *s = *startPtr;
3703 const char **eventPP;
3704 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003705 if (enc == parser->m_encoding) {
3706 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003707 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003708 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003709 }
3710 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003711 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3712 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003713 }
3714 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003715 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003716
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003717 for (;;) {
3718 const char *next;
3719 int tok = XmlCdataSectionTok(enc, s, end, &next);
3720 *eventEndPP = next;
3721 switch (tok) {
3722 case XML_TOK_CDATA_SECT_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003723 if (parser->m_endCdataSectionHandler)
3724 parser->m_endCdataSectionHandler(parser->m_handlerArg);
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003725/* BEGIN disabled code */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003726 /* see comment under XML_TOK_CDATA_SECT_OPEN */
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003727 else if (0 && parser->m_characterDataHandler)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003728 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0);
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003729/* END disabled code */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003730 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003731 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003732 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003733 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003734 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003735 return XML_ERROR_ABORTED;
3736 else
3737 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003738 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003739 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003740 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003741 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003742 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003743 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003744 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003745 break;
3746 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003747 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003748 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003749 if (charDataHandler) {
3750 if (MUST_CONVERT(enc, s)) {
3751 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003752 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3753 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003754 *eventEndPP = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003755 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3756 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003757 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003758 break;
3759 *eventPP = s;
3760 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003761 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003762 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003763 charDataHandler(parser->m_handlerArg,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003764 (XML_Char *)s,
3765 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003766 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003767 else if (parser->m_defaultHandler)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003768 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003769 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003770 break;
3771 case XML_TOK_INVALID:
3772 *eventPP = next;
3773 return XML_ERROR_INVALID_TOKEN;
3774 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003775 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003776 *nextPtr = s;
3777 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003778 }
3779 return XML_ERROR_PARTIAL_CHAR;
3780 case XML_TOK_PARTIAL:
3781 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003782 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003783 *nextPtr = s;
3784 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003785 }
3786 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3787 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003788 /* Every token returned by XmlCdataSectionTok() has its own
3789 * explicit case, so this default case will never be executed.
3790 * We retain it as a safety net and exclude it from the coverage
3791 * statistics.
3792 *
3793 * LCOV_EXCL_START
3794 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003795 *eventPP = next;
3796 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003797 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003798 }
Fred Drake31d485c2004-08-03 07:06:22 +00003799
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003800 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003801 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003802 case XML_SUSPENDED:
3803 *nextPtr = next;
3804 return XML_ERROR_NONE;
3805 case XML_FINISHED:
3806 return XML_ERROR_ABORTED;
3807 default: ;
3808 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003809 }
3810 /* not reached */
3811}
3812
3813#ifdef XML_DTD
3814
3815/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003816 the whole file is parsed with one call.
3817*/
3818static enum XML_Error PTRCALL
3819ignoreSectionProcessor(XML_Parser parser,
3820 const char *start,
3821 const char *end,
3822 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003823{
Benjamin Peterson4e211002018-06-26 19:25:45 -07003824 enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end,
3825 endPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003826 if (result != XML_ERROR_NONE)
3827 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003828 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003829 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003830 return prologProcessor(parser, start, end, endPtr);
3831 }
3832 return result;
3833}
3834
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003835/* startPtr gets set to non-null is the section is closed, and to null
3836 if the section is not yet closed.
3837*/
3838static enum XML_Error
3839doIgnoreSection(XML_Parser parser,
3840 const ENCODING *enc,
3841 const char **startPtr,
3842 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003843 const char **nextPtr,
3844 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003845{
3846 const char *next;
3847 int tok;
3848 const char *s = *startPtr;
3849 const char **eventPP;
3850 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003851 if (enc == parser->m_encoding) {
3852 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003853 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003854 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003855 }
3856 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003857 /* It's not entirely clear, but it seems the following two lines
3858 * of code cannot be executed. The only occasions on which 'enc'
Benjamin Peterson4e211002018-06-26 19:25:45 -07003859 * is not 'encoding' are when this function is called
Victor Stinner93d0cb52017-08-18 23:43:54 +02003860 * from the internal entity processing, and IGNORE sections are an
3861 * error in internal entities.
3862 *
3863 * Since it really isn't clear that this is true, we keep the code
3864 * and just remove it from our coverage tests.
3865 *
3866 * LCOV_EXCL_START
3867 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003868 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3869 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003870 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003871 }
3872 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003873 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003874 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3875 *eventEndPP = next;
3876 switch (tok) {
3877 case XML_TOK_IGNORE_SECT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003878 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003879 reportDefault(parser, enc, s, next);
3880 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003881 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003882 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003883 return XML_ERROR_ABORTED;
3884 else
3885 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003886 case XML_TOK_INVALID:
3887 *eventPP = next;
3888 return XML_ERROR_INVALID_TOKEN;
3889 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003890 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003891 *nextPtr = s;
3892 return XML_ERROR_NONE;
3893 }
3894 return XML_ERROR_PARTIAL_CHAR;
3895 case XML_TOK_PARTIAL:
3896 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003897 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003898 *nextPtr = s;
3899 return XML_ERROR_NONE;
3900 }
3901 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3902 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003903 /* All of the tokens that XmlIgnoreSectionTok() returns have
3904 * explicit cases to handle them, so this default case is never
3905 * executed. We keep it as a safety net anyway, and remove it
3906 * from our test coverage statistics.
3907 *
3908 * LCOV_EXCL_START
3909 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003910 *eventPP = next;
3911 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003912 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003913 }
3914 /* not reached */
3915}
3916
3917#endif /* XML_DTD */
3918
3919static enum XML_Error
3920initializeEncoding(XML_Parser parser)
3921{
3922 const char *s;
3923#ifdef XML_UNICODE
3924 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003925 /* See comments abount `protoclEncodingName` in parserInit() */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003926 if (!parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003927 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003928 else {
3929 int i;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003930 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003931 if (i == sizeof(encodingBuf) - 1
Benjamin Peterson4e211002018-06-26 19:25:45 -07003932 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003933 encodingBuf[0] = '\0';
3934 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003935 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003936 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003937 }
3938 encodingBuf[i] = '\0';
3939 s = encodingBuf;
3940 }
3941#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003942 s = parser->m_protocolEncodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003943#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003944 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(&parser->m_initEncoding, &parser->m_encoding, s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003945 return XML_ERROR_NONE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003946 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003947}
3948
3949static enum XML_Error
3950processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003951 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003952{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003953 const char *encodingName = NULL;
3954 const XML_Char *storedEncName = NULL;
3955 const ENCODING *newEncoding = NULL;
3956 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003957 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003958 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003959 int standalone = -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003960 if (!(parser->m_ns
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003961 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003962 : XmlParseXmlDecl)(isGeneralTextEntity,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003963 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003964 s,
3965 next,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003966 &parser->m_eventPtr,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003967 &version,
3968 &versionend,
3969 &encodingName,
3970 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003971 &standalone)) {
3972 if (isGeneralTextEntity)
3973 return XML_ERROR_TEXT_DECL;
3974 else
3975 return XML_ERROR_XML_DECL;
3976 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003977 if (!isGeneralTextEntity && standalone == 1) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003978 parser->m_dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003979#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07003980 if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3981 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003982#endif /* XML_DTD */
3983 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003984 if (parser->m_xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003985 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003986 storedEncName = poolStoreString(&parser->m_temp2Pool,
3987 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003988 encodingName,
3989 encodingName
Benjamin Peterson4e211002018-06-26 19:25:45 -07003990 + XmlNameLength(parser->m_encoding, encodingName));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003991 if (!storedEncName)
3992 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003993 poolFinish(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003994 }
3995 if (version) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003996 storedversion = poolStoreString(&parser->m_temp2Pool,
3997 parser->m_encoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003998 version,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003999 versionend - parser->m_encoding->minBytesPerChar);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004000 if (!storedversion)
4001 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004002 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004003 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004004 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004005 else if (parser->m_defaultHandler)
4006 reportDefault(parser, parser->m_encoding, s, next);
4007 if (parser->m_protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004008 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004009 /* Check that the specified encoding does not conflict with what
4010 * the parser has already deduced. Do we have the same number
4011 * of bytes in the smallest representation of a character? If
4012 * this is UTF-16, is it the same endianness?
4013 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004014 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
Victor Stinner93d0cb52017-08-18 23:43:54 +02004015 || (newEncoding->minBytesPerChar == 2 &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004016 newEncoding != parser->m_encoding)) {
4017 parser->m_eventPtr = encodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004018 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004019 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004020 parser->m_encoding = newEncoding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004021 }
4022 else if (encodingName) {
4023 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004024 if (!storedEncName) {
4025 storedEncName = poolStoreString(
Benjamin Peterson4e211002018-06-26 19:25:45 -07004026 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4027 encodingName + XmlNameLength(parser->m_encoding, encodingName));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004028 if (!storedEncName)
4029 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004030 }
4031 result = handleUnknownEncoding(parser, storedEncName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004032 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004033 if (result == XML_ERROR_UNKNOWN_ENCODING)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004034 parser->m_eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004035 return result;
4036 }
4037 }
4038
4039 if (storedEncName || storedversion)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004040 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004041
4042 return XML_ERROR_NONE;
4043}
4044
4045static enum XML_Error
4046handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4047{
Benjamin Peterson4e211002018-06-26 19:25:45 -07004048 if (parser->m_unknownEncodingHandler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004049 XML_Encoding info;
4050 int i;
4051 for (i = 0; i < 256; i++)
4052 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004053 info.convert = NULL;
4054 info.data = NULL;
4055 info.release = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004056 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004057 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004058 ENCODING *enc;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004059 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4060 if (!parser->m_unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004061 if (info.release)
4062 info.release(info.data);
4063 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004064 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004065 enc = (parser->m_ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004066 ? XmlInitUnknownEncodingNS
Benjamin Peterson4e211002018-06-26 19:25:45 -07004067 : XmlInitUnknownEncoding)(parser->m_unknownEncodingMem,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004068 info.map,
4069 info.convert,
4070 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004071 if (enc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004072 parser->m_unknownEncodingData = info.data;
4073 parser->m_unknownEncodingRelease = info.release;
4074 parser->m_encoding = enc;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004075 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004076 }
4077 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004078 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004079 info.release(info.data);
4080 }
4081 return XML_ERROR_UNKNOWN_ENCODING;
4082}
4083
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004084static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004085prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004086 const char *s,
4087 const char *end,
4088 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004089{
4090 enum XML_Error result = initializeEncoding(parser);
4091 if (result != XML_ERROR_NONE)
4092 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004093 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004094 return prologProcessor(parser, s, end, nextPtr);
4095}
4096
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004097#ifdef XML_DTD
4098
4099static enum XML_Error PTRCALL
4100externalParEntInitProcessor(XML_Parser parser,
4101 const char *s,
4102 const char *end,
4103 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004104{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004105 enum XML_Error result = initializeEncoding(parser);
4106 if (result != XML_ERROR_NONE)
4107 return result;
4108
4109 /* we know now that XML_Parse(Buffer) has been called,
4110 so we consider the external parameter entity read */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004111 parser->m_dtd->paramEntityRead = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004112
Benjamin Peterson4e211002018-06-26 19:25:45 -07004113 if (parser->m_prologState.inEntityValue) {
4114 parser->m_processor = entityValueInitProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004115 return entityValueInitProcessor(parser, s, end, nextPtr);
4116 }
4117 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004118 parser->m_processor = externalParEntProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004119 return externalParEntProcessor(parser, s, end, nextPtr);
4120 }
4121}
4122
4123static enum XML_Error PTRCALL
4124entityValueInitProcessor(XML_Parser parser,
4125 const char *s,
4126 const char *end,
4127 const char **nextPtr)
4128{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004129 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00004130 const char *start = s;
4131 const char *next = start;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004132 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004133
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004134 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004135 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4136 parser->m_eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004137 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004138 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00004139 *nextPtr = s;
4140 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004141 }
4142 switch (tok) {
4143 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004144 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004145 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004146 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004147 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004148 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004149 case XML_TOK_NONE: /* start == end */
4150 default:
4151 break;
4152 }
Fred Drake31d485c2004-08-03 07:06:22 +00004153 /* found end of entity value - can store it now */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004154 return storeEntityValue(parser, parser->m_encoding, s, end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004155 }
4156 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00004157 enum XML_Error result;
4158 result = processXmlDecl(parser, 0, start, next);
4159 if (result != XML_ERROR_NONE)
4160 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004161 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For that
Victor Stinner93d0cb52017-08-18 23:43:54 +02004162 * to happen, a parameter entity parsing handler must have
4163 * attempted to suspend the parser, which fails and raises an
4164 * error. The parser can be aborted, but can't be suspended.
4165 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004166 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004167 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004168 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004169 /* stop scanning for text declaration - we found one */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004170 parser->m_processor = entityValueProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004171 return entityValueProcessor(parser, next, end, nextPtr);
4172 }
4173 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4174 return XML_TOK_NONE on the next call, which would then cause the
4175 function to exit with *nextPtr set to s - that is what we want for other
4176 tokens, but not for the BOM - we would rather like to skip it;
4177 then, when this routine is entered the next time, XmlPrologTok will
4178 return XML_TOK_INVALID, since the BOM is still in the buffer
4179 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004180 else if (tok == XML_TOK_BOM && next == end && !parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004181 *nextPtr = next;
4182 return XML_ERROR_NONE;
4183 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004184 /* If we get this token, we have the start of what might be a
4185 normal tag, but not a declaration (i.e. it doesn't begin with
4186 "<!"). In a DTD context, that isn't legal.
4187 */
4188 else if (tok == XML_TOK_INSTANCE_START) {
4189 *nextPtr = next;
4190 return XML_ERROR_SYNTAX;
4191 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004192 start = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004193 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004194 }
4195}
4196
4197static enum XML_Error PTRCALL
4198externalParEntProcessor(XML_Parser parser,
4199 const char *s,
4200 const char *end,
4201 const char **nextPtr)
4202{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004203 const char *next = s;
4204 int tok;
4205
Benjamin Peterson4e211002018-06-26 19:25:45 -07004206 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004207 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004208 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004209 *nextPtr = s;
4210 return XML_ERROR_NONE;
4211 }
4212 switch (tok) {
4213 case XML_TOK_INVALID:
4214 return XML_ERROR_INVALID_TOKEN;
4215 case XML_TOK_PARTIAL:
4216 return XML_ERROR_UNCLOSED_TOKEN;
4217 case XML_TOK_PARTIAL_CHAR:
4218 return XML_ERROR_PARTIAL_CHAR;
4219 case XML_TOK_NONE: /* start == end */
4220 default:
4221 break;
4222 }
4223 }
4224 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4225 However, when parsing an external subset, doProlog will not accept a BOM
4226 as valid, and report a syntax error, so we have to skip the BOM
4227 */
4228 else if (tok == XML_TOK_BOM) {
4229 s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004230 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004231 }
4232
Benjamin Peterson4e211002018-06-26 19:25:45 -07004233 parser->m_processor = prologProcessor;
4234 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4235 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004236}
4237
4238static enum XML_Error PTRCALL
4239entityValueProcessor(XML_Parser parser,
4240 const char *s,
4241 const char *end,
4242 const char **nextPtr)
4243{
4244 const char *start = s;
4245 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004246 const ENCODING *enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004247 int tok;
4248
4249 for (;;) {
4250 tok = XmlPrologTok(enc, start, end, &next);
4251 if (tok <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004252 if (!parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004253 *nextPtr = s;
4254 return XML_ERROR_NONE;
4255 }
4256 switch (tok) {
4257 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004258 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004259 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004260 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004261 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004262 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004263 case XML_TOK_NONE: /* start == end */
4264 default:
4265 break;
4266 }
Fred Drake31d485c2004-08-03 07:06:22 +00004267 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004268 return storeEntityValue(parser, enc, s, end);
4269 }
4270 start = next;
4271 }
4272}
4273
4274#endif /* XML_DTD */
4275
4276static enum XML_Error PTRCALL
4277prologProcessor(XML_Parser parser,
4278 const char *s,
4279 const char *end,
4280 const char **nextPtr)
4281{
4282 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004283 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4284 return doProlog(parser, parser->m_encoding, s, end, tok, next,
4285 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004286}
4287
4288static enum XML_Error
4289doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004290 const ENCODING *enc,
4291 const char *s,
4292 const char *end,
4293 int tok,
4294 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00004295 const char **nextPtr,
4296 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004297{
4298#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004299 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004300#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004301 static const XML_Char atypeCDATA[] =
4302 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4303 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4304 static const XML_Char atypeIDREF[] =
4305 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4306 static const XML_Char atypeIDREFS[] =
4307 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4308 static const XML_Char atypeENTITY[] =
4309 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4310 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4311 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004312 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004313 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4314 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4315 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4316 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4317 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4318 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4319 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004320
Fred Drake31d485c2004-08-03 07:06:22 +00004321 /* save one level of indirection */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004322 DTD * const dtd = parser->m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004323
4324 const char **eventPP;
4325 const char **eventEndPP;
4326 enum XML_Content_Quant quant;
4327
Benjamin Peterson4e211002018-06-26 19:25:45 -07004328 if (enc == parser->m_encoding) {
4329 eventPP = &parser->m_eventPtr;
4330 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004331 }
4332 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004333 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4334 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004335 }
Fred Drake31d485c2004-08-03 07:06:22 +00004336
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004337 for (;;) {
4338 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004339 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004340 *eventPP = s;
4341 *eventEndPP = next;
4342 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004343 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004344 *nextPtr = s;
4345 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004346 }
4347 switch (tok) {
4348 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004349 *eventPP = next;
4350 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004351 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004352 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004353 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004354 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004355 case -XML_TOK_PROLOG_S:
4356 tok = -tok;
4357 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004358 case XML_TOK_NONE:
4359#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004360 /* for internal PE NOT referenced between declarations */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004361 if (enc != parser->m_encoding && !parser->m_openInternalEntities->betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00004362 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004363 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004364 }
4365 /* WFC: PE Between Declarations - must check that PE contains
4366 complete markup, not only for external PEs, but also for
4367 internal PEs if the reference occurs between declarations.
4368 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004369 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4370 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004371 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004372 return XML_ERROR_INCOMPLETE_PE;
4373 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004374 return XML_ERROR_NONE;
4375 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004376#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004377 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004378 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004379 tok = -tok;
4380 next = end;
4381 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004382 }
4383 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004384 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004385 switch (role) {
4386 case XML_ROLE_XML_DECL:
4387 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004388 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4389 if (result != XML_ERROR_NONE)
4390 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004391 enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004392 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004393 }
4394 break;
4395 case XML_ROLE_DOCTYPE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004396 if (parser->m_startDoctypeDeclHandler) {
4397 parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next);
4398 if (!parser->m_doctypeName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004399 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004400 poolFinish(&parser->m_tempPool);
4401 parser->m_doctypePubid = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004402 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004403 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004404 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004405 break;
4406 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004407 if (parser->m_startDoctypeDeclHandler) {
4408 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4409 parser->m_doctypePubid, 1);
4410 parser->m_doctypeName = NULL;
4411 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004412 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004413 }
4414 break;
4415#ifdef XML_DTD
4416 case XML_ROLE_TEXT_DECL:
4417 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004418 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4419 if (result != XML_ERROR_NONE)
4420 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004421 enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004422 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004423 }
4424 break;
4425#endif /* XML_DTD */
4426 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004427#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004428 parser->m_useForeignDTD = XML_FALSE;
4429 parser->m_declEntity = (ENTITY *)lookup(parser,
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004430 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004431 externalSubsetName,
4432 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004433 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004434 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004435#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004436 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004437 if (parser->m_startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004438 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004439 if (!XmlIsPublicId(enc, s, next, eventPP))
4440 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004441 pubId = poolStoreString(&parser->m_tempPool, enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004442 s + enc->minBytesPerChar,
4443 next - enc->minBytesPerChar);
4444 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004445 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004446 normalizePublicId(pubId);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004447 poolFinish(&parser->m_tempPool);
4448 parser->m_doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004449 handleDefault = XML_FALSE;
4450 goto alreadyChecked;
4451 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004452 /* fall through */
4453 case XML_ROLE_ENTITY_PUBLIC_ID:
4454 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004455 return XML_ERROR_PUBLICID;
4456 alreadyChecked:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004457 if (dtd->keepProcessing && parser->m_declEntity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004458 XML_Char *tem = poolStoreString(&dtd->pool,
4459 enc,
4460 s + enc->minBytesPerChar,
4461 next - enc->minBytesPerChar);
4462 if (!tem)
4463 return XML_ERROR_NO_MEMORY;
4464 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004465 parser->m_declEntity->publicId = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004466 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004467 /* Don't suppress the default handler if we fell through from
4468 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4469 */
4470 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004471 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004472 }
4473 break;
4474 case XML_ROLE_DOCTYPE_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004475 if (parser->m_doctypeName) {
4476 parser->m_startDoctypeDeclHandler(parser->m_handlerArg, parser->m_doctypeName,
4477 parser->m_doctypeSysid, parser->m_doctypePubid, 0);
4478 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004479 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004480 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004481 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4482 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004483 was not set, indicating an external subset
4484 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004485#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004486 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004487 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4488 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004489 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004490 ENTITY *entity = (ENTITY *)lookup(parser,
4491 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004492 externalSubsetName,
4493 sizeof(ENTITY));
Victor Stinner93d0cb52017-08-18 23:43:54 +02004494 if (!entity) {
4495 /* The external subset name "#" will have already been
4496 * inserted into the hash table at the start of the
4497 * external entity parsing, so no allocation will happen
4498 * and lookup() cannot fail.
4499 */
4500 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4501 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004502 if (parser->m_useForeignDTD)
4503 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004504 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004505 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004506 0,
4507 entity->base,
4508 entity->systemId,
4509 entity->publicId))
4510 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004511 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004512 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004513 parser->m_notStandaloneHandler &&
4514 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004515 return XML_ERROR_NOT_STANDALONE;
4516 }
4517 /* if we didn't read the foreign DTD then this means that there
4518 is no external subset and we must reset dtd->hasParamEntityRefs
4519 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004520 else if (!parser->m_doctypeSysid)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004521 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004522 /* end of DTD - no need to update dtd->keepProcessing */
4523 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004524 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004525 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004526#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004527 if (parser->m_endDoctypeDeclHandler) {
4528 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004529 handleDefault = XML_FALSE;
4530 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004531 break;
4532 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004533#ifdef XML_DTD
4534 /* if there is no DOCTYPE declaration then now is the
4535 last chance to read the foreign DTD
4536 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004537 if (parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004538 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004539 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004540 if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004541 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004542 externalSubsetName,
4543 sizeof(ENTITY));
4544 if (!entity)
4545 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004546 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004547 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004548 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004549 0,
4550 entity->base,
4551 entity->systemId,
4552 entity->publicId))
4553 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004554 if (dtd->paramEntityRead) {
4555 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07004556 parser->m_notStandaloneHandler &&
4557 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004558 return XML_ERROR_NOT_STANDALONE;
4559 }
4560 /* if we didn't read the foreign DTD then this means that there
4561 is no external subset and we must reset dtd->hasParamEntityRefs
4562 */
4563 else
4564 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004565 /* end of DTD - no need to update dtd->keepProcessing */
4566 }
4567 }
4568#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004569 parser->m_processor = contentProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004570 return contentProcessor(parser, s, end, nextPtr);
4571 case XML_ROLE_ATTLIST_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004572 parser->m_declElementType = getElementType(parser, enc, s, next);
4573 if (!parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004574 return XML_ERROR_NO_MEMORY;
4575 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004576 case XML_ROLE_ATTRIBUTE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004577 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
4578 if (!parser->m_declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004579 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004580 parser->m_declAttributeIsCdata = XML_FALSE;
4581 parser->m_declAttributeType = NULL;
4582 parser->m_declAttributeIsId = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004583 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004584 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004585 parser->m_declAttributeIsCdata = XML_TRUE;
4586 parser->m_declAttributeType = atypeCDATA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004587 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004588 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004589 parser->m_declAttributeIsId = XML_TRUE;
4590 parser->m_declAttributeType = atypeID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004591 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004592 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004593 parser->m_declAttributeType = atypeIDREF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004594 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004595 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004596 parser->m_declAttributeType = atypeIDREFS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004597 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004598 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004599 parser->m_declAttributeType = atypeENTITY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004600 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004601 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004602 parser->m_declAttributeType = atypeENTITIES;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004603 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004604 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004605 parser->m_declAttributeType = atypeNMTOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004606 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004607 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004608 parser->m_declAttributeType = atypeNMTOKENS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004609 checkAttListDeclHandler:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004610 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004611 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004612 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004613 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4614 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004615 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004616 const XML_Char *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004617 if (parser->m_declAttributeType) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004618 prefix = enumValueSep;
4619 }
4620 else {
4621 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4622 ? notationPrefix
4623 : enumValueStart);
4624 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004625 if (!poolAppendString(&parser->m_tempPool, prefix))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004626 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004627 if (!poolAppend(&parser->m_tempPool, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004628 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004629 parser->m_declAttributeType = parser->m_tempPool.start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004630 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004631 }
4632 break;
4633 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4634 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004635 if (dtd->keepProcessing) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004636 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4637 parser->m_declAttributeIsCdata, parser->m_declAttributeIsId,
Fred Drake08317ae2003-10-21 15:38:55 +00004638 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004639 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004640 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4641 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4642 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4643 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004644 /* Enumerated or Notation type */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004645 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4646 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004647 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004648 parser->m_declAttributeType = parser->m_tempPool.start;
4649 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004650 }
4651 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004652 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4653 parser->m_declAttributeId->name, parser->m_declAttributeType,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004654 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004655 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004656 handleDefault = XML_FALSE;
4657 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004658 }
4659 break;
4660 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4661 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004662 if (dtd->keepProcessing) {
4663 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004664 enum XML_Error result =
Benjamin Peterson4e211002018-06-26 19:25:45 -07004665 storeAttributeValue(parser, enc, parser->m_declAttributeIsCdata,
Fred Drake08317ae2003-10-21 15:38:55 +00004666 s + enc->minBytesPerChar,
4667 next - enc->minBytesPerChar,
4668 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004669 if (result)
4670 return result;
4671 attVal = poolStart(&dtd->pool);
4672 poolFinish(&dtd->pool);
4673 /* ID attributes aren't allowed to have a default */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004674 if (!defineAttribute(parser->m_declElementType, parser->m_declAttributeId,
4675 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004676 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004677 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4678 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4679 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4680 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004681 /* Enumerated or Notation type */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004682 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4683 || !poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004684 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004685 parser->m_declAttributeType = parser->m_tempPool.start;
4686 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004687 }
4688 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004689 parser->m_attlistDeclHandler(parser->m_handlerArg, parser->m_declElementType->name,
4690 parser->m_declAttributeId->name, parser->m_declAttributeType,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004691 attVal,
4692 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004693 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004694 handleDefault = XML_FALSE;
4695 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004696 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004697 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004698 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004699 if (dtd->keepProcessing) {
4700 enum XML_Error result = storeEntityValue(parser, enc,
4701 s + enc->minBytesPerChar,
4702 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004703 if (parser->m_declEntity) {
4704 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
4705 parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004706 poolFinish(&dtd->entityValuePool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004707 if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004708 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004709 parser->m_entityDeclHandler(parser->m_handlerArg,
4710 parser->m_declEntity->name,
4711 parser->m_declEntity->is_param,
4712 parser->m_declEntity->textPtr,
4713 parser->m_declEntity->textLen,
4714 parser->m_curBase, 0, 0, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004715 handleDefault = XML_FALSE;
4716 }
4717 }
4718 else
4719 poolDiscard(&dtd->entityValuePool);
4720 if (result != XML_ERROR_NONE)
4721 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004722 }
4723 break;
4724 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004725#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004726 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004727#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004728 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004729 if (parser->m_startDoctypeDeclHandler) {
4730 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004731 s + enc->minBytesPerChar,
4732 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004733 if (parser->m_doctypeSysid == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004734 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004735 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004736 handleDefault = XML_FALSE;
4737 }
4738#ifdef XML_DTD
4739 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07004740 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4741 for the case where no parser->m_startDoctypeDeclHandler is set */
4742 parser->m_doctypeSysid = externalSubsetName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004743#endif /* XML_DTD */
4744 if (!dtd->standalone
4745#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004746 && !parser->m_paramEntityParsing
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004747#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004748 && parser->m_notStandaloneHandler
4749 && !parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004750 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004751#ifndef XML_DTD
4752 break;
4753#else /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004754 if (!parser->m_declEntity) {
4755 parser->m_declEntity = (ENTITY *)lookup(parser,
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004756 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004757 externalSubsetName,
4758 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004759 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004760 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004761 parser->m_declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004762 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004763#endif /* XML_DTD */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07004764 /* fall through */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004765 case XML_ROLE_ENTITY_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004766 if (dtd->keepProcessing && parser->m_declEntity) {
4767 parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004768 s + enc->minBytesPerChar,
4769 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004770 if (!parser->m_declEntity->systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004771 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004772 parser->m_declEntity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004773 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004774 /* Don't suppress the default handler if we fell through from
4775 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4776 */
4777 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004778 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004779 }
4780 break;
4781 case XML_ROLE_ENTITY_COMPLETE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004782 if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004783 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004784 parser->m_entityDeclHandler(parser->m_handlerArg,
4785 parser->m_declEntity->name,
4786 parser->m_declEntity->is_param,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004787 0,0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004788 parser->m_declEntity->base,
4789 parser->m_declEntity->systemId,
4790 parser->m_declEntity->publicId,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004791 0);
4792 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004793 }
4794 break;
4795 case XML_ROLE_ENTITY_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004796 if (dtd->keepProcessing && parser->m_declEntity) {
4797 parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4798 if (!parser->m_declEntity->notation)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004799 return XML_ERROR_NO_MEMORY;
4800 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004801 if (parser->m_unparsedEntityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004802 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004803 parser->m_unparsedEntityDeclHandler(parser->m_handlerArg,
4804 parser->m_declEntity->name,
4805 parser->m_declEntity->base,
4806 parser->m_declEntity->systemId,
4807 parser->m_declEntity->publicId,
4808 parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004809 handleDefault = XML_FALSE;
4810 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004811 else if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004812 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004813 parser->m_entityDeclHandler(parser->m_handlerArg,
4814 parser->m_declEntity->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004815 0,0,0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004816 parser->m_declEntity->base,
4817 parser->m_declEntity->systemId,
4818 parser->m_declEntity->publicId,
4819 parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004820 handleDefault = XML_FALSE;
4821 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004822 }
4823 break;
4824 case XML_ROLE_GENERAL_ENTITY_NAME:
4825 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004826 if (XmlPredefinedEntityName(enc, s, next)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004827 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004828 break;
4829 }
4830 if (dtd->keepProcessing) {
4831 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4832 if (!name)
4833 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004834 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004835 sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004836 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004837 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004838 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004839 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004840 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004841 }
4842 else {
4843 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004844 parser->m_declEntity->publicId = NULL;
4845 parser->m_declEntity->is_param = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004846 /* if we have a parent parser or are reading an internal parameter
4847 entity, then the entity declaration is not considered "internal"
4848 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004849 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4850 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004851 handleDefault = XML_FALSE;
4852 }
4853 }
4854 else {
4855 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004856 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004857 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004858 }
4859 break;
4860 case XML_ROLE_PARAM_ENTITY_NAME:
4861#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004862 if (dtd->keepProcessing) {
4863 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4864 if (!name)
4865 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004866 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004867 name, sizeof(ENTITY));
Benjamin Peterson4e211002018-06-26 19:25:45 -07004868 if (!parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004869 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004870 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004871 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004872 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004873 }
4874 else {
4875 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004876 parser->m_declEntity->publicId = NULL;
4877 parser->m_declEntity->is_param = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004878 /* if we have a parent parser or are reading an internal parameter
4879 entity, then the entity declaration is not considered "internal"
4880 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004881 parser->m_declEntity->is_internal = !(parser->m_parentParser || parser->m_openInternalEntities);
4882 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004883 handleDefault = XML_FALSE;
4884 }
4885 }
4886 else {
4887 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004888 parser->m_declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004889 }
4890#else /* not XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004891 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004892#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004893 break;
4894 case XML_ROLE_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004895 parser->m_declNotationPublicId = NULL;
4896 parser->m_declNotationName = NULL;
4897 if (parser->m_notationDeclHandler) {
4898 parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next);
4899 if (!parser->m_declNotationName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004900 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004901 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004902 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004903 }
4904 break;
4905 case XML_ROLE_NOTATION_PUBLIC_ID:
4906 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004907 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004908 if (parser->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4909 XML_Char *tem = poolStoreString(&parser->m_tempPool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004910 enc,
4911 s + enc->minBytesPerChar,
4912 next - enc->minBytesPerChar);
4913 if (!tem)
4914 return XML_ERROR_NO_MEMORY;
4915 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004916 parser->m_declNotationPublicId = tem;
4917 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004918 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004919 }
4920 break;
4921 case XML_ROLE_NOTATION_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004922 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004923 const XML_Char *systemId
Benjamin Peterson4e211002018-06-26 19:25:45 -07004924 = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004925 s + enc->minBytesPerChar,
4926 next - enc->minBytesPerChar);
4927 if (!systemId)
4928 return XML_ERROR_NO_MEMORY;
4929 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004930 parser->m_notationDeclHandler(parser->m_handlerArg,
4931 parser->m_declNotationName,
4932 parser->m_curBase,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004933 systemId,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004934 parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004935 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004936 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004937 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004938 break;
4939 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004940 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004941 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004942 parser->m_notationDeclHandler(parser->m_handlerArg,
4943 parser->m_declNotationName,
4944 parser->m_curBase,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004945 0,
Benjamin Peterson4e211002018-06-26 19:25:45 -07004946 parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004947 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004948 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004949 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004950 break;
4951 case XML_ROLE_ERROR:
4952 switch (tok) {
4953 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004954 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004955 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004956 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004957 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004958 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004959 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004960 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004961 }
4962#ifdef XML_DTD
4963 case XML_ROLE_IGNORE_SECT:
4964 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004965 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004966 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004967 reportDefault(parser, enc, s, next);
4968 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004969 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4970 if (result != XML_ERROR_NONE)
4971 return result;
4972 else if (!next) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004973 parser->m_processor = ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004974 return result;
4975 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004976 }
4977 break;
4978#endif /* XML_DTD */
4979 case XML_ROLE_GROUP_OPEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004980 if (parser->m_prologState.level >= parser->m_groupSize) {
4981 if (parser->m_groupSize) {
4982 char *temp = (char *)REALLOC(parser, parser->m_groupConnector, parser->m_groupSize *= 2);
Victor Stinner93d0cb52017-08-18 23:43:54 +02004983 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004984 parser->m_groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004985 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004986 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004987 parser->m_groupConnector = temp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004988 if (dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004989 int *temp = (int *)REALLOC(parser, dtd->scaffIndex,
4990 parser->m_groupSize * sizeof(int));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004991 if (temp == NULL)
4992 return XML_ERROR_NO_MEMORY;
4993 dtd->scaffIndex = temp;
4994 }
4995 }
4996 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004997 parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32);
4998 if (!parser->m_groupConnector) {
4999 parser->m_groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005000 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005001 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005002 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005003 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005004 parser->m_groupConnector[parser->m_prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005005 if (dtd->in_eldecl) {
5006 int myindex = nextScaffoldPart(parser);
5007 if (myindex < 0)
5008 return XML_ERROR_NO_MEMORY;
5009 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5010 dtd->scaffLevel++;
5011 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005012 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005013 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005014 }
5015 break;
5016 case XML_ROLE_GROUP_SEQUENCE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005017 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005018 return XML_ERROR_SYNTAX;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005019 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5020 if (dtd->in_eldecl && parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005021 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005022 break;
5023 case XML_ROLE_GROUP_CHOICE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005024 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005025 return XML_ERROR_SYNTAX;
5026 if (dtd->in_eldecl
Benjamin Peterson4e211002018-06-26 19:25:45 -07005027 && !parser->m_groupConnector[parser->m_prologState.level]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005028 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5029 != XML_CTYPE_MIXED)
5030 ) {
5031 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5032 = XML_CTYPE_CHOICE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005033 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005034 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005035 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005036 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005037 break;
5038 case XML_ROLE_PARAM_ENTITY_REF:
5039#ifdef XML_DTD
5040 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005041 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005042 if (!parser->m_paramEntityParsing)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005043 dtd->keepProcessing = dtd->standalone;
5044 else {
5045 const XML_Char *name;
5046 ENTITY *entity;
5047 name = poolStoreString(&dtd->pool, enc,
5048 s + enc->minBytesPerChar,
5049 next - enc->minBytesPerChar);
5050 if (!name)
5051 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005052 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005053 poolDiscard(&dtd->pool);
5054 /* first, determine if a check for an existing declaration is needed;
5055 if yes, check that the entity exists, and that it is internal,
5056 otherwise call the skipped entity handler
5057 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005058 if (parser->m_prologState.documentEntity &&
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005059 (dtd->standalone
Benjamin Peterson4e211002018-06-26 19:25:45 -07005060 ? !parser->m_openInternalEntities
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005061 : !dtd->hasParamEntityRefs)) {
5062 if (!entity)
5063 return XML_ERROR_UNDEFINED_ENTITY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005064 else if (!entity->is_internal) {
5065 /* It's hard to exhaustively search the code to be sure,
5066 * but there doesn't seem to be a way of executing the
5067 * following line. There are two cases:
5068 *
5069 * If 'standalone' is false, the DTD must have no
5070 * parameter entities or we wouldn't have passed the outer
5071 * 'if' statement. That measn the only entity in the hash
5072 * table is the external subset name "#" which cannot be
5073 * given as a parameter entity name in XML syntax, so the
5074 * lookup must have returned NULL and we don't even reach
5075 * the test for an internal entity.
5076 *
5077 * If 'standalone' is true, it does not seem to be
5078 * possible to create entities taking this code path that
5079 * are not internal entities, so fail the test above.
5080 *
5081 * Because this analysis is very uncertain, the code is
5082 * being left in place and merely removed from the
5083 * coverage test statistics.
5084 */
5085 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5086 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005087 }
5088 else if (!entity) {
5089 dtd->keepProcessing = dtd->standalone;
5090 /* cannot report skipped entities in declarations */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005091 if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) {
5092 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005093 handleDefault = XML_FALSE;
5094 }
5095 break;
5096 }
5097 if (entity->open)
5098 return XML_ERROR_RECURSIVE_ENTITY_REF;
5099 if (entity->textPtr) {
5100 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005101 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00005102 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5103 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005104 if (result != XML_ERROR_NONE)
5105 return result;
5106 handleDefault = XML_FALSE;
5107 break;
5108 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005109 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005110 dtd->paramEntityRead = XML_FALSE;
5111 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005112 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005113 0,
5114 entity->base,
5115 entity->systemId,
5116 entity->publicId)) {
5117 entity->open = XML_FALSE;
5118 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5119 }
5120 entity->open = XML_FALSE;
5121 handleDefault = XML_FALSE;
5122 if (!dtd->paramEntityRead) {
5123 dtd->keepProcessing = dtd->standalone;
5124 break;
5125 }
5126 }
5127 else {
5128 dtd->keepProcessing = dtd->standalone;
5129 break;
5130 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005131 }
5132#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005133 if (!dtd->standalone &&
Benjamin Peterson4e211002018-06-26 19:25:45 -07005134 parser->m_notStandaloneHandler &&
5135 !parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005136 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005137 break;
5138
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005139 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005140
5141 case XML_ROLE_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005142 if (parser->m_elementDeclHandler) {
5143 parser->m_declElementType = getElementType(parser, enc, s, next);
5144 if (!parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005145 return XML_ERROR_NO_MEMORY;
5146 dtd->scaffLevel = 0;
5147 dtd->scaffCount = 0;
5148 dtd->in_eldecl = XML_TRUE;
5149 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005150 }
5151 break;
5152
5153 case XML_ROLE_CONTENT_ANY:
5154 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005155 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005156 if (parser->m_elementDeclHandler) {
5157 XML_Content * content = (XML_Content *) MALLOC(parser, sizeof(XML_Content));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005158 if (!content)
5159 return XML_ERROR_NO_MEMORY;
5160 content->quant = XML_CQUANT_NONE;
5161 content->name = NULL;
5162 content->numchildren = 0;
5163 content->children = NULL;
5164 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5165 XML_CTYPE_ANY :
5166 XML_CTYPE_EMPTY);
5167 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005168 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, content);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005169 handleDefault = XML_FALSE;
5170 }
5171 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005172 }
5173 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005174
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005175 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005176 if (dtd->in_eldecl) {
5177 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5178 = XML_CTYPE_MIXED;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005179 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005180 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005181 }
5182 break;
5183
5184 case XML_ROLE_CONTENT_ELEMENT:
5185 quant = XML_CQUANT_NONE;
5186 goto elementContent;
5187 case XML_ROLE_CONTENT_ELEMENT_OPT:
5188 quant = XML_CQUANT_OPT;
5189 goto elementContent;
5190 case XML_ROLE_CONTENT_ELEMENT_REP:
5191 quant = XML_CQUANT_REP;
5192 goto elementContent;
5193 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5194 quant = XML_CQUANT_PLUS;
5195 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005196 if (dtd->in_eldecl) {
5197 ELEMENT_TYPE *el;
5198 const XML_Char *name;
5199 int nameLen;
5200 const char *nxt = (quant == XML_CQUANT_NONE
5201 ? next
5202 : next - enc->minBytesPerChar);
5203 int myindex = nextScaffoldPart(parser);
5204 if (myindex < 0)
5205 return XML_ERROR_NO_MEMORY;
5206 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5207 dtd->scaffold[myindex].quant = quant;
5208 el = getElementType(parser, enc, s, nxt);
5209 if (!el)
5210 return XML_ERROR_NO_MEMORY;
5211 name = el->name;
5212 dtd->scaffold[myindex].name = name;
5213 nameLen = 0;
5214 for (; name[nameLen++]; );
5215 dtd->contentStringLen += nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005216 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005217 handleDefault = XML_FALSE;
5218 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005219 break;
5220
5221 case XML_ROLE_GROUP_CLOSE:
5222 quant = XML_CQUANT_NONE;
5223 goto closeGroup;
5224 case XML_ROLE_GROUP_CLOSE_OPT:
5225 quant = XML_CQUANT_OPT;
5226 goto closeGroup;
5227 case XML_ROLE_GROUP_CLOSE_REP:
5228 quant = XML_CQUANT_REP;
5229 goto closeGroup;
5230 case XML_ROLE_GROUP_CLOSE_PLUS:
5231 quant = XML_CQUANT_PLUS;
5232 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005233 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005234 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005235 handleDefault = XML_FALSE;
5236 dtd->scaffLevel--;
5237 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5238 if (dtd->scaffLevel == 0) {
5239 if (!handleDefault) {
5240 XML_Content *model = build_model(parser);
5241 if (!model)
5242 return XML_ERROR_NO_MEMORY;
5243 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005244 parser->m_elementDeclHandler(parser->m_handlerArg, parser->m_declElementType->name, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005245 }
5246 dtd->in_eldecl = XML_FALSE;
5247 dtd->contentStringLen = 0;
5248 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005249 }
5250 break;
5251 /* End element declaration stuff */
5252
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005253 case XML_ROLE_PI:
5254 if (!reportProcessingInstruction(parser, enc, s, next))
5255 return XML_ERROR_NO_MEMORY;
5256 handleDefault = XML_FALSE;
5257 break;
5258 case XML_ROLE_COMMENT:
5259 if (!reportComment(parser, enc, s, next))
5260 return XML_ERROR_NO_MEMORY;
5261 handleDefault = XML_FALSE;
5262 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005263 case XML_ROLE_NONE:
5264 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005265 case XML_TOK_BOM:
5266 handleDefault = XML_FALSE;
5267 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005268 }
5269 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005270 case XML_ROLE_DOCTYPE_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005271 if (parser->m_startDoctypeDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005272 handleDefault = XML_FALSE;
5273 break;
5274 case XML_ROLE_ENTITY_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005275 if (dtd->keepProcessing && parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005276 handleDefault = XML_FALSE;
5277 break;
5278 case XML_ROLE_NOTATION_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005279 if (parser->m_notationDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005280 handleDefault = XML_FALSE;
5281 break;
5282 case XML_ROLE_ATTLIST_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005283 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005284 handleDefault = XML_FALSE;
5285 break;
5286 case XML_ROLE_ELEMENT_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005287 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005288 handleDefault = XML_FALSE;
5289 break;
5290 } /* end of big switch */
5291
Benjamin Peterson4e211002018-06-26 19:25:45 -07005292 if (handleDefault && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005293 reportDefault(parser, enc, s, next);
5294
Benjamin Peterson4e211002018-06-26 19:25:45 -07005295 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005296 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005297 *nextPtr = next;
5298 return XML_ERROR_NONE;
5299 case XML_FINISHED:
5300 return XML_ERROR_ABORTED;
5301 default:
5302 s = next;
5303 tok = XmlPrologTok(enc, s, end, &next);
5304 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005305 }
5306 /* not reached */
5307}
5308
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005309static enum XML_Error PTRCALL
5310epilogProcessor(XML_Parser parser,
5311 const char *s,
5312 const char *end,
5313 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005314{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005315 parser->m_processor = epilogProcessor;
5316 parser->m_eventPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005317 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005318 const char *next = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005319 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5320 parser->m_eventEndPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005321 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005322 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005323 case -XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005324 if (parser->m_defaultHandler) {
5325 reportDefault(parser, parser->m_encoding, s, next);
5326 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005327 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005328 }
Fred Drake31d485c2004-08-03 07:06:22 +00005329 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005330 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005331 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005332 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005333 return XML_ERROR_NONE;
5334 case XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005335 if (parser->m_defaultHandler)
5336 reportDefault(parser, parser->m_encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005337 break;
5338 case XML_TOK_PI:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005339 if (!reportProcessingInstruction(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005340 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005341 break;
5342 case XML_TOK_COMMENT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005343 if (!reportComment(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005344 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005345 break;
5346 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005347 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005348 return XML_ERROR_INVALID_TOKEN;
5349 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005350 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005351 *nextPtr = s;
5352 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005353 }
5354 return XML_ERROR_UNCLOSED_TOKEN;
5355 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005356 if (!parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005357 *nextPtr = s;
5358 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005359 }
5360 return XML_ERROR_PARTIAL_CHAR;
5361 default:
5362 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5363 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005364 parser->m_eventPtr = s = next;
5365 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005366 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005367 *nextPtr = next;
5368 return XML_ERROR_NONE;
5369 case XML_FINISHED:
5370 return XML_ERROR_ABORTED;
5371 default: ;
5372 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005373 }
5374}
5375
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005376static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00005377processInternalEntity(XML_Parser parser, ENTITY *entity,
5378 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005379{
Fred Drake31d485c2004-08-03 07:06:22 +00005380 const char *textStart, *textEnd;
5381 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005382 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005383 OPEN_INTERNAL_ENTITY *openEntity;
5384
Benjamin Peterson4e211002018-06-26 19:25:45 -07005385 if (parser->m_freeInternalEntities) {
5386 openEntity = parser->m_freeInternalEntities;
5387 parser->m_freeInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005388 }
5389 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005390 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
Fred Drake31d485c2004-08-03 07:06:22 +00005391 if (!openEntity)
5392 return XML_ERROR_NO_MEMORY;
5393 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005394 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005395 entity->processed = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005396 openEntity->next = parser->m_openInternalEntities;
5397 parser->m_openInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005398 openEntity->entity = entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005399 openEntity->startTagLevel = parser->m_tagLevel;
Fred Drake31d485c2004-08-03 07:06:22 +00005400 openEntity->betweenDecl = betweenDecl;
5401 openEntity->internalEventPtr = NULL;
5402 openEntity->internalEventEndPtr = NULL;
5403 textStart = (char *)entity->textPtr;
5404 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005405 /* Set a safe default value in case 'next' does not get set */
5406 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005407
5408#ifdef XML_DTD
5409 if (entity->is_param) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005410 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5411 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005412 next, &next, XML_FALSE);
5413 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005414 else
Fred Drake31d485c2004-08-03 07:06:22 +00005415#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005416 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00005417 textEnd, &next, XML_FALSE);
5418
5419 if (result == XML_ERROR_NONE) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005420 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005421 entity->processed = (int)(next - textStart);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005422 parser->m_processor = internalEntityProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005423 }
5424 else {
5425 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005426 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005427 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005428 openEntity->next = parser->m_freeInternalEntities;
5429 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005430 }
5431 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005432 return result;
5433}
5434
Fred Drake31d485c2004-08-03 07:06:22 +00005435static enum XML_Error PTRCALL
5436internalEntityProcessor(XML_Parser parser,
5437 const char *s,
5438 const char *end,
5439 const char **nextPtr)
5440{
5441 ENTITY *entity;
5442 const char *textStart, *textEnd;
5443 const char *next;
5444 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005445 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00005446 if (!openEntity)
5447 return XML_ERROR_UNEXPECTED_STATE;
5448
5449 entity = openEntity->entity;
5450 textStart = ((char *)entity->textPtr) + entity->processed;
5451 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005452 /* Set a safe default value in case 'next' does not get set */
5453 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005454
5455#ifdef XML_DTD
5456 if (entity->is_param) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005457 int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5458 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005459 next, &next, XML_FALSE);
5460 }
5461 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005462#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005463 result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005464 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005465
5466 if (result != XML_ERROR_NONE)
5467 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005468 else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005469 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005470 return result;
5471 }
5472 else {
5473 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005474 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005475 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005476 openEntity->next = parser->m_freeInternalEntities;
5477 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005478 }
5479
5480#ifdef XML_DTD
5481 if (entity->is_param) {
5482 int tok;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005483 parser->m_processor = prologProcessor;
5484 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5485 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
5486 (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00005487 }
5488 else
5489#endif /* XML_DTD */
5490 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005491 parser->m_processor = contentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005492 /* see externalEntityContentProcessor vs contentProcessor */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005493 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end,
5494 nextPtr, (XML_Bool)!parser->m_parsingStatus.finalBuffer);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005495 }
Fred Drake31d485c2004-08-03 07:06:22 +00005496}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005497
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005498static enum XML_Error PTRCALL
5499errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02005500 const char *UNUSED_P(s),
5501 const char *UNUSED_P(end),
5502 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005503{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005504 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005505}
5506
5507static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005508storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5509 const char *ptr, const char *end,
5510 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005511{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005512 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5513 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005514 if (result)
5515 return result;
5516 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5517 poolChop(pool);
5518 if (!poolAppendChar(pool, XML_T('\0')))
5519 return XML_ERROR_NO_MEMORY;
5520 return XML_ERROR_NONE;
5521}
5522
5523static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005524appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5525 const char *ptr, const char *end,
5526 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005527{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005528 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005529 for (;;) {
5530 const char *next;
5531 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5532 switch (tok) {
5533 case XML_TOK_NONE:
5534 return XML_ERROR_NONE;
5535 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005536 if (enc == parser->m_encoding)
5537 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005538 return XML_ERROR_INVALID_TOKEN;
5539 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005540 if (enc == parser->m_encoding)
5541 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005542 return XML_ERROR_INVALID_TOKEN;
5543 case XML_TOK_CHAR_REF:
5544 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005545 XML_Char buf[XML_ENCODE_MAX];
5546 int i;
5547 int n = XmlCharRefNumber(enc, ptr);
5548 if (n < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005549 if (enc == parser->m_encoding)
5550 parser->m_eventPtr = ptr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005551 return XML_ERROR_BAD_CHAR_REF;
5552 }
5553 if (!isCdata
5554 && n == 0x20 /* space */
5555 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5556 break;
5557 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005558 /* The XmlEncode() functions can never return 0 here. That
5559 * error return happens if the code point passed in is either
5560 * negative or greater than or equal to 0x110000. The
5561 * XmlCharRefNumber() functions will all return a number
5562 * strictly less than 0x110000 or a negative value if an error
5563 * occurred. The negative value is intercepted above, so
5564 * XmlEncode() is never passed a value it might return an
5565 * error for.
5566 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005567 for (i = 0; i < n; i++) {
5568 if (!poolAppendChar(pool, buf[i]))
5569 return XML_ERROR_NO_MEMORY;
5570 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005571 }
5572 break;
5573 case XML_TOK_DATA_CHARS:
5574 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005575 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005576 break;
5577 case XML_TOK_TRAILING_CR:
5578 next = ptr + enc->minBytesPerChar;
5579 /* fall through */
5580 case XML_TOK_ATTRIBUTE_VALUE_S:
5581 case XML_TOK_DATA_NEWLINE:
5582 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005583 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005584 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005585 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005586 break;
5587 case XML_TOK_ENTITY_REF:
5588 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005589 const XML_Char *name;
5590 ENTITY *entity;
5591 char checkEntityDecl;
5592 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5593 ptr + enc->minBytesPerChar,
5594 next - enc->minBytesPerChar);
5595 if (ch) {
5596 if (!poolAppendChar(pool, ch))
5597 return XML_ERROR_NO_MEMORY;
5598 break;
5599 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005600 name = poolStoreString(&parser->m_temp2Pool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005601 ptr + enc->minBytesPerChar,
5602 next - enc->minBytesPerChar);
5603 if (!name)
5604 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005605 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005606 poolDiscard(&parser->m_temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005607 /* First, determine if a check for an existing declaration is needed;
5608 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005609 */
5610 if (pool == &dtd->pool) /* are we called from prolog? */
5611 checkEntityDecl =
5612#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005613 parser->m_prologState.documentEntity &&
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005614#endif /* XML_DTD */
5615 (dtd->standalone
Benjamin Peterson4e211002018-06-26 19:25:45 -07005616 ? !parser->m_openInternalEntities
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005617 : !dtd->hasParamEntityRefs);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005618 else /* if (pool == &parser->m_tempPool): we are called from content */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005619 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5620 if (checkEntityDecl) {
5621 if (!entity)
5622 return XML_ERROR_UNDEFINED_ENTITY;
5623 else if (!entity->is_internal)
5624 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5625 }
5626 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005627 /* Cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005628 parser->m_skippedEntityHandler.
5629 if (parser->m_skippedEntityHandler)
5630 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005631 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005632 /* Cannot call the default handler because this would be
5633 out of sync with the call to the startElementHandler.
Benjamin Peterson4e211002018-06-26 19:25:45 -07005634 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005635 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005636 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005637 break;
5638 }
5639 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005640 if (enc == parser->m_encoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005641 /* It does not appear that this line can be executed.
5642 *
5643 * The "if (entity->open)" check catches recursive entity
5644 * definitions. In order to be called with an open
5645 * entity, it must have gone through this code before and
5646 * been through the recursive call to
5647 * appendAttributeValue() some lines below. That call
5648 * sets the local encoding ("enc") to the parser's
5649 * internal encoding (internal_utf8 or internal_utf16),
5650 * which can never be the same as the principle encoding.
5651 * It doesn't appear there is another code path that gets
5652 * here with entity->open being TRUE.
5653 *
5654 * Since it is not certain that this logic is watertight,
5655 * we keep the line and merely exclude it from coverage
5656 * tests.
5657 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005658 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
Victor Stinner93d0cb52017-08-18 23:43:54 +02005659 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005660 return XML_ERROR_RECURSIVE_ENTITY_REF;
5661 }
5662 if (entity->notation) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005663 if (enc == parser->m_encoding)
5664 parser->m_eventPtr = ptr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005665 return XML_ERROR_BINARY_ENTITY_REF;
5666 }
5667 if (!entity->textPtr) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005668 if (enc == parser->m_encoding)
5669 parser->m_eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005670 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005671 }
5672 else {
5673 enum XML_Error result;
5674 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5675 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005676 result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005677 (char *)entity->textPtr,
5678 (char *)textEnd, pool);
5679 entity->open = XML_FALSE;
5680 if (result)
5681 return result;
5682 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005683 }
5684 break;
5685 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005686 /* The only token returned by XmlAttributeValueTok() that does
5687 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5688 * Getting that would require an entity name to contain an
5689 * incomplete XML character (e.g. \xE2\x82); however previous
5690 * tokenisers will have already recognised and rejected such
5691 * names before XmlAttributeValueTok() gets a look-in. This
5692 * default case should be retained as a safety net, but the code
5693 * excluded from coverage tests.
5694 *
5695 * LCOV_EXCL_START
5696 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005697 if (enc == parser->m_encoding)
5698 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005699 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005700 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005701 }
5702 ptr = next;
5703 }
5704 /* not reached */
5705}
5706
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005707static enum XML_Error
5708storeEntityValue(XML_Parser parser,
5709 const ENCODING *enc,
5710 const char *entityTextPtr,
5711 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005712{
Benjamin Peterson4e211002018-06-26 19:25:45 -07005713 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005714 STRING_POOL *pool = &(dtd->entityValuePool);
5715 enum XML_Error result = XML_ERROR_NONE;
5716#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005717 int oldInEntityValue = parser->m_prologState.inEntityValue;
5718 parser->m_prologState.inEntityValue = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005719#endif /* XML_DTD */
5720 /* never return Null for the value argument in EntityDeclHandler,
5721 since this would indicate an external entity; therefore we
5722 have to make sure that entityValuePool.start is not null */
5723 if (!pool->blocks) {
5724 if (!poolGrow(pool))
5725 return XML_ERROR_NO_MEMORY;
5726 }
5727
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005728 for (;;) {
5729 const char *next;
5730 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5731 switch (tok) {
5732 case XML_TOK_PARAM_ENTITY_REF:
5733#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005734 if (parser->m_isParamEntity || enc != parser->m_encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005735 const XML_Char *name;
5736 ENTITY *entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005737 name = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005738 entityTextPtr + enc->minBytesPerChar,
5739 next - enc->minBytesPerChar);
5740 if (!name) {
5741 result = XML_ERROR_NO_MEMORY;
5742 goto endEntityValue;
5743 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005744 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005745 poolDiscard(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005746 if (!entity) {
5747 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5748 /* cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005749 parser->m_skippedEntityHandler
5750 if (parser->m_skippedEntityHandler)
5751 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005752 */
5753 dtd->keepProcessing = dtd->standalone;
5754 goto endEntityValue;
5755 }
5756 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005757 if (enc == parser->m_encoding)
5758 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005759 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5760 goto endEntityValue;
5761 }
5762 if (entity->systemId) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005763 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005764 dtd->paramEntityRead = XML_FALSE;
5765 entity->open = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005766 if (!parser->m_externalEntityRefHandler(parser->m_externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005767 0,
5768 entity->base,
5769 entity->systemId,
5770 entity->publicId)) {
5771 entity->open = XML_FALSE;
5772 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5773 goto endEntityValue;
5774 }
5775 entity->open = XML_FALSE;
5776 if (!dtd->paramEntityRead)
5777 dtd->keepProcessing = dtd->standalone;
5778 }
5779 else
5780 dtd->keepProcessing = dtd->standalone;
5781 }
5782 else {
5783 entity->open = XML_TRUE;
5784 result = storeEntityValue(parser,
Benjamin Peterson4e211002018-06-26 19:25:45 -07005785 parser->m_internalEncoding,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005786 (char *)entity->textPtr,
5787 (char *)(entity->textPtr
5788 + entity->textLen));
5789 entity->open = XML_FALSE;
5790 if (result)
5791 goto endEntityValue;
5792 }
5793 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005794 }
5795#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005796 /* In the internal subset, PE references are not legal
5797 within markup declarations, e.g entity values in this case. */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005798 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005799 result = XML_ERROR_PARAM_ENTITY_REF;
5800 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005801 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005802 result = XML_ERROR_NONE;
5803 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005804 case XML_TOK_ENTITY_REF:
5805 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005806 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5807 result = XML_ERROR_NO_MEMORY;
5808 goto endEntityValue;
5809 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005810 break;
5811 case XML_TOK_TRAILING_CR:
5812 next = entityTextPtr + enc->minBytesPerChar;
5813 /* fall through */
5814 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005815 if (pool->end == pool->ptr && !poolGrow(pool)) {
5816 result = XML_ERROR_NO_MEMORY;
5817 goto endEntityValue;
5818 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005819 *(pool->ptr)++ = 0xA;
5820 break;
5821 case XML_TOK_CHAR_REF:
5822 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005823 XML_Char buf[XML_ENCODE_MAX];
5824 int i;
5825 int n = XmlCharRefNumber(enc, entityTextPtr);
5826 if (n < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005827 if (enc == parser->m_encoding)
5828 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005829 result = XML_ERROR_BAD_CHAR_REF;
5830 goto endEntityValue;
5831 }
5832 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005833 /* The XmlEncode() functions can never return 0 here. That
5834 * error return happens if the code point passed in is either
5835 * negative or greater than or equal to 0x110000. The
5836 * XmlCharRefNumber() functions will all return a number
5837 * strictly less than 0x110000 or a negative value if an error
5838 * occurred. The negative value is intercepted above, so
5839 * XmlEncode() is never passed a value it might return an
5840 * error for.
5841 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005842 for (i = 0; i < n; i++) {
5843 if (pool->end == pool->ptr && !poolGrow(pool)) {
5844 result = XML_ERROR_NO_MEMORY;
5845 goto endEntityValue;
5846 }
5847 *(pool->ptr)++ = buf[i];
5848 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005849 }
5850 break;
5851 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005852 if (enc == parser->m_encoding)
5853 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005854 result = XML_ERROR_INVALID_TOKEN;
5855 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005856 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005857 if (enc == parser->m_encoding)
5858 parser->m_eventPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005859 result = XML_ERROR_INVALID_TOKEN;
5860 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005861 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005862 /* This default case should be unnecessary -- all the tokens
5863 * that XmlEntityValueTok() can return have their own explicit
5864 * cases -- but should be retained for safety. We do however
5865 * exclude it from the coverage statistics.
5866 *
5867 * LCOV_EXCL_START
5868 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005869 if (enc == parser->m_encoding)
5870 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005871 result = XML_ERROR_UNEXPECTED_STATE;
5872 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005873 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005874 }
5875 entityTextPtr = next;
5876 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005877endEntityValue:
5878#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005879 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005880#endif /* XML_DTD */
5881 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005882}
5883
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005884static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005885normalizeLines(XML_Char *s)
5886{
5887 XML_Char *p;
5888 for (;; s++) {
5889 if (*s == XML_T('\0'))
5890 return;
5891 if (*s == 0xD)
5892 break;
5893 }
5894 p = s;
5895 do {
5896 if (*s == 0xD) {
5897 *p++ = 0xA;
5898 if (*++s == 0xA)
5899 s++;
5900 }
5901 else
5902 *p++ = *s++;
5903 } while (*s);
5904 *p = XML_T('\0');
5905}
5906
5907static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005908reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5909 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005910{
5911 const XML_Char *target;
5912 XML_Char *data;
5913 const char *tem;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005914 if (!parser->m_processingInstructionHandler) {
5915 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005916 reportDefault(parser, enc, start, end);
5917 return 1;
5918 }
5919 start += enc->minBytesPerChar * 2;
5920 tem = start + XmlNameLength(enc, start);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005921 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005922 if (!target)
5923 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005924 poolFinish(&parser->m_tempPool);
5925 data = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005926 XmlSkipS(enc, tem),
5927 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005928 if (!data)
5929 return 0;
5930 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005931 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5932 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005933 return 1;
5934}
5935
5936static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005937reportComment(XML_Parser parser, const ENCODING *enc,
5938 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005939{
5940 XML_Char *data;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005941 if (!parser->m_commentHandler) {
5942 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005943 reportDefault(parser, enc, start, end);
5944 return 1;
5945 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005946 data = poolStoreString(&parser->m_tempPool,
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005947 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005948 start + enc->minBytesPerChar * 4,
5949 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005950 if (!data)
5951 return 0;
5952 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005953 parser->m_commentHandler(parser->m_handlerArg, data);
5954 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005955 return 1;
5956}
5957
5958static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005959reportDefault(XML_Parser parser, const ENCODING *enc,
5960 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005961{
5962 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005963 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005964 const char **eventPP;
5965 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005966 if (enc == parser->m_encoding) {
5967 eventPP = &parser->m_eventPtr;
5968 eventEndPP = &parser->m_eventEndPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005969 }
5970 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005971 /* To get here, two things must be true; the parser must be
5972 * using a character encoding that is not the same as the
5973 * encoding passed in, and the encoding passed in must need
5974 * conversion to the internal format (UTF-8 unless XML_UNICODE
5975 * is defined). The only occasions on which the encoding passed
5976 * in is not the same as the parser's encoding are when it is
5977 * the internal encoding (e.g. a previously defined parameter
5978 * entity, already converted to internal format). This by
5979 * definition doesn't need conversion, so the whole branch never
5980 * gets executed.
5981 *
5982 * For safety's sake we don't delete these lines and merely
5983 * exclude them from coverage statistics.
5984 *
5985 * LCOV_EXCL_START
5986 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005987 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5988 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005989 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005990 }
5991 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005992 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
5993 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005994 *eventEndPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005995 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005996 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02005997 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005998 }
5999 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07006000 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006001}
6002
6003
6004static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006005defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6006 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006007{
6008 DEFAULT_ATTRIBUTE *att;
6009 if (value || isId) {
6010 /* The handling of default attributes gets messed up if we have
6011 a default which duplicates a non-default. */
6012 int i;
6013 for (i = 0; i < type->nDefaultAtts; i++)
6014 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006015 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006016 if (isId && !type->idAtt && !attId->xmlns)
6017 type->idAtt = attId;
6018 }
6019 if (type->nDefaultAtts == type->allocDefaultAtts) {
6020 if (type->allocDefaultAtts == 0) {
6021 type->allocDefaultAtts = 8;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006022 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(parser, type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006023 * sizeof(DEFAULT_ATTRIBUTE));
Benjamin Peterson4e211002018-06-26 19:25:45 -07006024 if (!type->defaultAtts) {
6025 type->allocDefaultAtts = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006026 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006027 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006028 }
6029 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006030 DEFAULT_ATTRIBUTE *temp;
6031 int count = type->allocDefaultAtts * 2;
6032 temp = (DEFAULT_ATTRIBUTE *)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006033 REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006034 if (temp == NULL)
6035 return 0;
6036 type->allocDefaultAtts = count;
6037 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006038 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006039 }
6040 att = type->defaultAtts + type->nDefaultAtts;
6041 att->id = attId;
6042 att->value = value;
6043 att->isCdata = isCdata;
6044 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006045 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006046 type->nDefaultAtts += 1;
6047 return 1;
6048}
6049
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006050static int
6051setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006052{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006053 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006054 const XML_Char *name;
6055 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006056 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006057 PREFIX *prefix;
6058 const XML_Char *s;
6059 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006060 if (!poolAppendChar(&dtd->pool, *s))
6061 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006062 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006063 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6064 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006065 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006066 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006067 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006068 return 0;
6069 if (prefix->name == poolStart(&dtd->pool))
6070 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006071 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006072 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006073 elementType->prefix = prefix;
Benjamin Peterson3b03b092019-06-27 20:54:44 -07006074 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006075 }
6076 }
6077 return 1;
6078}
6079
6080static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006081getAttributeId(XML_Parser parser, const ENCODING *enc,
6082 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006083{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006084 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006085 ATTRIBUTE_ID *id;
6086 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006087 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6088 return NULL;
6089 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006090 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006091 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006092 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006093 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006094 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006095 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006096 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006097 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006098 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006099 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006100 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07006101 if (!parser->m_ns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006102 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006103 else if (name[0] == XML_T(ASCII_x)
6104 && name[1] == XML_T(ASCII_m)
6105 && name[2] == XML_T(ASCII_l)
6106 && name[3] == XML_T(ASCII_n)
6107 && name[4] == XML_T(ASCII_s)
6108 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006109 if (name[5] == XML_T('\0'))
6110 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006111 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006112 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006113 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006114 }
6115 else {
6116 int i;
6117 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006118 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006119 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006120 int j;
6121 for (j = 0; j < i; j++) {
6122 if (!poolAppendChar(&dtd->pool, name[j]))
6123 return NULL;
6124 }
6125 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6126 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006127 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006128 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07006129 if (!id->prefix)
6130 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006131 if (id->prefix->name == poolStart(&dtd->pool))
6132 poolFinish(&dtd->pool);
6133 else
6134 poolDiscard(&dtd->pool);
6135 break;
6136 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006137 }
6138 }
6139 }
6140 return id;
6141}
6142
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006143#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006144
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006145static const XML_Char *
6146getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006147{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006148 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006149 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006150 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006151
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006152 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006153 int i;
6154 int len;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006155 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006156 return NULL;
6157 len = dtd->defaultPrefix.binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006158 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006159 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006160 for (i = 0; i < len; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006161 if (!poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006162 /* Because of memory caching, I don't believe this line can be
6163 * executed.
6164 *
6165 * This is part of a loop copying the default prefix binding
6166 * URI into the parser's temporary string pool. Previously,
6167 * that URI was copied into the same string pool, with a
6168 * terminating NUL character, as part of setContext(). When
6169 * the pool was cleared, that leaves a block definitely big
6170 * enough to hold the URI on the free block list of the pool.
6171 * The URI copy in getContext() therefore cannot run out of
6172 * memory.
6173 *
6174 * If the pool is used between the setContext() and
6175 * getContext() calls, the worst it can do is leave a bigger
6176 * block on the front of the free list. Given that this is
6177 * all somewhat inobvious and program logic can be changed, we
6178 * don't delete the line but we do exclude it from the test
6179 * coverage statistics.
6180 */
6181 return NULL; /* LCOV_EXCL_LINE */
6182 }
6183 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006184 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006185 }
6186
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006187 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006188 for (;;) {
6189 int i;
6190 int len;
6191 const XML_Char *s;
6192 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6193 if (!prefix)
6194 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006195 if (!prefix->binding) {
6196 /* This test appears to be (justifiable) paranoia. There does
6197 * not seem to be a way of injecting a prefix without a binding
6198 * that doesn't get errored long before this function is called.
6199 * The test should remain for safety's sake, so we instead
6200 * exclude the following line from the coverage statistics.
6201 */
6202 continue; /* LCOV_EXCL_LINE */
6203 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006204 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006205 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006206 for (s = prefix->name; *s; s++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006207 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006208 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006209 if (!poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006210 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006211 len = prefix->binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006212 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006213 len--;
6214 for (i = 0; i < len; i++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006215 if (!poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006216 return NULL;
6217 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006218 }
6219
6220
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006221 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006222 for (;;) {
6223 const XML_Char *s;
6224 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6225 if (!e)
6226 break;
6227 if (!e->open)
6228 continue;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006229 if (needSep && !poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006230 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006231 for (s = e->name; *s; s++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006232 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006233 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006234 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006235 }
6236
Benjamin Peterson4e211002018-06-26 19:25:45 -07006237 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006238 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006239 return parser->m_tempPool.start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006240}
6241
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006242static XML_Bool
6243setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006244{
Benjamin Peterson4e211002018-06-26 19:25:45 -07006245 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006246 const XML_Char *s = context;
6247
6248 while (*context != XML_T('\0')) {
6249 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6250 ENTITY *e;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006251 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006252 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006253 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006254 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006255 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006256 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006257 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006258 context = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006259 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006260 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006261 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006262 PREFIX *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006263 if (poolLength(&parser->m_tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006264 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006265 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006266 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006267 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006268 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006269 sizeof(PREFIX));
6270 if (!prefix)
6271 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006272 if (prefix->name == poolStart(&parser->m_tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006273 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6274 if (!prefix->name)
6275 return XML_FALSE;
6276 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006277 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006278 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006279 for (context = s + 1;
6280 *context != CONTEXT_SEP && *context != XML_T('\0');
6281 context++)
Benjamin Peterson4e211002018-06-26 19:25:45 -07006282 if (!poolAppendChar(&parser->m_tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006283 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006284 if (!poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006285 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006286 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
6287 &parser->m_inheritedBindings) != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006288 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006289 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006290 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006291 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006292 s = context;
6293 }
6294 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006295 if (!poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006296 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006297 s++;
6298 }
6299 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006300 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301}
6302
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006303static void FASTCALL
6304normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006305{
6306 XML_Char *p = publicId;
6307 XML_Char *s;
6308 for (s = publicId; *s; s++) {
6309 switch (*s) {
6310 case 0x20:
6311 case 0xD:
6312 case 0xA:
6313 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006314 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006315 break;
6316 default:
6317 *p++ = *s;
6318 }
6319 }
6320 if (p != publicId && p[-1] == 0x20)
6321 --p;
6322 *p = XML_T('\0');
6323}
6324
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006325static DTD *
6326dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006327{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006328 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6329 if (p == NULL)
6330 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006331 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006332 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006333 hashTableInit(&(p->generalEntities), ms);
6334 hashTableInit(&(p->elementTypes), ms);
6335 hashTableInit(&(p->attributeIds), ms);
6336 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006337#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006338 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006339 hashTableInit(&(p->paramEntities), ms);
6340#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006341 p->defaultPrefix.name = NULL;
6342 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006343
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006344 p->in_eldecl = XML_FALSE;
6345 p->scaffIndex = NULL;
6346 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006347 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006348 p->scaffSize = 0;
6349 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006350 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006351
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006352 p->keepProcessing = XML_TRUE;
6353 p->hasParamEntityRefs = XML_FALSE;
6354 p->standalone = XML_FALSE;
6355 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006356}
6357
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006358static void
6359dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006360{
6361 HASH_TABLE_ITER iter;
6362 hashTableIterInit(&iter, &(p->elementTypes));
6363 for (;;) {
6364 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6365 if (!e)
6366 break;
6367 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006368 ms->free_fcn(e->defaultAtts);
6369 }
6370 hashTableClear(&(p->generalEntities));
6371#ifdef XML_DTD
6372 p->paramEntityRead = XML_FALSE;
6373 hashTableClear(&(p->paramEntities));
6374#endif /* XML_DTD */
6375 hashTableClear(&(p->elementTypes));
6376 hashTableClear(&(p->attributeIds));
6377 hashTableClear(&(p->prefixes));
6378 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006379 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006380 p->defaultPrefix.name = NULL;
6381 p->defaultPrefix.binding = NULL;
6382
6383 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006384
6385 ms->free_fcn(p->scaffIndex);
6386 p->scaffIndex = NULL;
6387 ms->free_fcn(p->scaffold);
6388 p->scaffold = NULL;
6389
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006390 p->scaffLevel = 0;
6391 p->scaffSize = 0;
6392 p->scaffCount = 0;
6393 p->contentStringLen = 0;
6394
6395 p->keepProcessing = XML_TRUE;
6396 p->hasParamEntityRefs = XML_FALSE;
6397 p->standalone = XML_FALSE;
6398}
6399
6400static void
6401dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6402{
6403 HASH_TABLE_ITER iter;
6404 hashTableIterInit(&iter, &(p->elementTypes));
6405 for (;;) {
6406 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6407 if (!e)
6408 break;
6409 if (e->allocDefaultAtts != 0)
6410 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006411 }
6412 hashTableDestroy(&(p->generalEntities));
6413#ifdef XML_DTD
6414 hashTableDestroy(&(p->paramEntities));
6415#endif /* XML_DTD */
6416 hashTableDestroy(&(p->elementTypes));
6417 hashTableDestroy(&(p->attributeIds));
6418 hashTableDestroy(&(p->prefixes));
6419 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006420 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006421 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006422 ms->free_fcn(p->scaffIndex);
6423 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006424 }
6425 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006426}
6427
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006428/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6429 The new DTD has already been initialized.
6430*/
6431static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006432dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006433{
6434 HASH_TABLE_ITER iter;
6435
6436 /* Copy the prefix table. */
6437
6438 hashTableIterInit(&iter, &(oldDtd->prefixes));
6439 for (;;) {
6440 const XML_Char *name;
6441 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6442 if (!oldP)
6443 break;
6444 name = poolCopyString(&(newDtd->pool), oldP->name);
6445 if (!name)
6446 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006447 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006448 return 0;
6449 }
6450
6451 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6452
6453 /* Copy the attribute id table. */
6454
6455 for (;;) {
6456 ATTRIBUTE_ID *newA;
6457 const XML_Char *name;
6458 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6459
6460 if (!oldA)
6461 break;
6462 /* Remember to allocate the scratch byte before the name. */
6463 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6464 return 0;
6465 name = poolCopyString(&(newDtd->pool), oldA->name);
6466 if (!name)
6467 return 0;
6468 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006469 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006470 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006471 if (!newA)
6472 return 0;
6473 newA->maybeTokenized = oldA->maybeTokenized;
6474 if (oldA->prefix) {
6475 newA->xmlns = oldA->xmlns;
6476 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006477 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006478 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006479 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006480 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006481 }
6482 }
6483
6484 /* Copy the element type table. */
6485
6486 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6487
6488 for (;;) {
6489 int i;
6490 ELEMENT_TYPE *newE;
6491 const XML_Char *name;
6492 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6493 if (!oldE)
6494 break;
6495 name = poolCopyString(&(newDtd->pool), oldE->name);
6496 if (!name)
6497 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006498 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006499 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006500 if (!newE)
6501 return 0;
6502 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006503 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6504 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6505 if (!newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006506 return 0;
6507 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006508 }
6509 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006510 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006511 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006512 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6513 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006514 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006515 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006516 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006517 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006518 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006519 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6520 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006521 newE->defaultAtts[i].value
6522 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6523 if (!newE->defaultAtts[i].value)
6524 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006525 }
6526 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006527 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006528 }
6529 }
6530
6531 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006532 if (!copyEntityTable(oldParser,
6533 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006534 &(newDtd->pool),
6535 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006536 return 0;
6537
6538#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006539 if (!copyEntityTable(oldParser,
6540 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006541 &(newDtd->pool),
6542 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006543 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006544 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006545#endif /* XML_DTD */
6546
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006547 newDtd->keepProcessing = oldDtd->keepProcessing;
6548 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006549 newDtd->standalone = oldDtd->standalone;
6550
6551 /* Don't want deep copying for scaffolding */
6552 newDtd->in_eldecl = oldDtd->in_eldecl;
6553 newDtd->scaffold = oldDtd->scaffold;
6554 newDtd->contentStringLen = oldDtd->contentStringLen;
6555 newDtd->scaffSize = oldDtd->scaffSize;
6556 newDtd->scaffLevel = oldDtd->scaffLevel;
6557 newDtd->scaffIndex = oldDtd->scaffIndex;
6558
6559 return 1;
6560} /* End dtdCopy */
6561
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006562static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006563copyEntityTable(XML_Parser oldParser,
6564 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006565 STRING_POOL *newPool,
6566 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006567{
6568 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006569 const XML_Char *cachedOldBase = NULL;
6570 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006571
6572 hashTableIterInit(&iter, oldTable);
6573
6574 for (;;) {
6575 ENTITY *newE;
6576 const XML_Char *name;
6577 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6578 if (!oldE)
6579 break;
6580 name = poolCopyString(newPool, oldE->name);
6581 if (!name)
6582 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006583 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006584 if (!newE)
6585 return 0;
6586 if (oldE->systemId) {
6587 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6588 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006589 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006590 newE->systemId = tem;
6591 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006592 if (oldE->base == cachedOldBase)
6593 newE->base = cachedNewBase;
6594 else {
6595 cachedOldBase = oldE->base;
6596 tem = poolCopyString(newPool, cachedOldBase);
6597 if (!tem)
6598 return 0;
6599 cachedNewBase = newE->base = tem;
6600 }
6601 }
6602 if (oldE->publicId) {
6603 tem = poolCopyString(newPool, oldE->publicId);
6604 if (!tem)
6605 return 0;
6606 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006607 }
6608 }
6609 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006610 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6611 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006612 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006613 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006614 newE->textPtr = tem;
6615 newE->textLen = oldE->textLen;
6616 }
6617 if (oldE->notation) {
6618 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6619 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006620 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006621 newE->notation = tem;
6622 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006623 newE->is_param = oldE->is_param;
6624 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006625 }
6626 return 1;
6627}
6628
Fred Drake08317ae2003-10-21 15:38:55 +00006629#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006630
Fred Drake08317ae2003-10-21 15:38:55 +00006631static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006632keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006633{
6634 for (; *s1 == *s2; s1++, s2++)
6635 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006636 return XML_TRUE;
6637 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006638}
6639
Victor Stinner5ff71322017-06-21 14:39:22 +02006640static size_t
6641keylen(KEY s)
6642{
6643 size_t len = 0;
6644 for (; *s; s++, len++);
6645 return len;
6646}
6647
6648static void
6649copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6650{
6651 key->k[0] = 0;
6652 key->k[1] = get_hash_secret_salt(parser);
6653}
6654
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006655static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006656hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006657{
Victor Stinner5ff71322017-06-21 14:39:22 +02006658 struct siphash state;
6659 struct sipkey key;
Victor Stinner5ff71322017-06-21 14:39:22 +02006660 (void)sip24_valid;
6661 copy_salt_to_sipkey(parser, &key);
6662 sip24_init(&state, &key);
6663 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6664 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006665}
6666
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006667static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006668lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006669{
6670 size_t i;
6671 if (table->size == 0) {
6672 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006673 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006674 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006675 table->power = INIT_POWER;
6676 /* table->size is a power of 2 */
6677 table->size = (size_t)1 << INIT_POWER;
6678 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006679 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006680 if (!table->v) {
6681 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006682 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006683 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006684 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006685 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006686 }
6687 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006688 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006689 unsigned long mask = (unsigned long)table->size - 1;
6690 unsigned char step = 0;
6691 i = h & mask;
6692 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006693 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006694 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006695 if (!step)
6696 step = PROBE_STEP(h, mask, table->power);
6697 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006698 }
6699 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006700 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006701
6702 /* check for overflow (table is half full) */
6703 if (table->used >> (table->power - 1)) {
6704 unsigned char newPower = table->power + 1;
6705 size_t newSize = (size_t)1 << newPower;
6706 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006707 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006708 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006709 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006710 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006711 memset(newV, 0, tsize);
6712 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006713 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006714 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006715 size_t j = newHash & newMask;
6716 step = 0;
6717 while (newV[j]) {
6718 if (!step)
6719 step = PROBE_STEP(newHash, newMask, newPower);
6720 j < step ? (j += newSize - step) : (j -= step);
6721 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006722 newV[j] = table->v[i];
6723 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006724 table->mem->free_fcn(table->v);
6725 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006726 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006727 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006728 i = h & newMask;
6729 step = 0;
6730 while (table->v[i]) {
6731 if (!step)
6732 step = PROBE_STEP(h, newMask, newPower);
6733 i < step ? (i += newSize - step) : (i -= step);
6734 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006735 }
6736 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006737 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006738 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006739 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006740 memset(table->v[i], 0, createSize);
6741 table->v[i]->name = name;
6742 (table->used)++;
6743 return table->v[i];
6744}
6745
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006746static void FASTCALL
6747hashTableClear(HASH_TABLE *table)
6748{
6749 size_t i;
6750 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006751 table->mem->free_fcn(table->v[i]);
6752 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006753 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006754 table->used = 0;
6755}
6756
6757static void FASTCALL
6758hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006759{
6760 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006761 for (i = 0; i < table->size; i++)
6762 table->mem->free_fcn(table->v[i]);
6763 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006764}
6765
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006766static void FASTCALL
6767hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006768{
Fred Drake08317ae2003-10-21 15:38:55 +00006769 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006770 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006771 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006772 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006773 p->mem = ms;
6774}
6775
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006776static void FASTCALL
6777hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006778{
6779 iter->p = table->v;
6780 iter->end = iter->p + table->size;
6781}
6782
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006783static NAMED * FASTCALL
6784hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006785{
6786 while (iter->p != iter->end) {
6787 NAMED *tem = *(iter->p)++;
6788 if (tem)
6789 return tem;
6790 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006791 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006792}
6793
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006794static void FASTCALL
6795poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006796{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006797 pool->blocks = NULL;
6798 pool->freeBlocks = NULL;
6799 pool->start = NULL;
6800 pool->ptr = NULL;
6801 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006802 pool->mem = ms;
6803}
6804
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006805static void FASTCALL
6806poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006807{
6808 if (!pool->freeBlocks)
6809 pool->freeBlocks = pool->blocks;
6810 else {
6811 BLOCK *p = pool->blocks;
6812 while (p) {
6813 BLOCK *tem = p->next;
6814 p->next = pool->freeBlocks;
6815 pool->freeBlocks = p;
6816 p = tem;
6817 }
6818 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006819 pool->blocks = NULL;
6820 pool->start = NULL;
6821 pool->ptr = NULL;
6822 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006823}
6824
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006825static void FASTCALL
6826poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006827{
6828 BLOCK *p = pool->blocks;
6829 while (p) {
6830 BLOCK *tem = p->next;
6831 pool->mem->free_fcn(p);
6832 p = tem;
6833 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006834 p = pool->freeBlocks;
6835 while (p) {
6836 BLOCK *tem = p->next;
6837 pool->mem->free_fcn(p);
6838 p = tem;
6839 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006840}
6841
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006842static XML_Char *
6843poolAppend(STRING_POOL *pool, const ENCODING *enc,
6844 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006845{
6846 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006847 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006848 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006849 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6850 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006851 break;
6852 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006853 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006854 }
6855 return pool->start;
6856}
6857
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006858static const XML_Char * FASTCALL
6859poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006860{
6861 do {
6862 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006863 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006864 } while (*s++);
6865 s = pool->start;
6866 poolFinish(pool);
6867 return s;
6868}
6869
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006870static const XML_Char *
6871poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006872{
Victor Stinner93d0cb52017-08-18 23:43:54 +02006873 if (!pool->ptr && !poolGrow(pool)) {
6874 /* The following line is unreachable given the current usage of
6875 * poolCopyStringN(). Currently it is called from exactly one
6876 * place to copy the text of a simple general entity. By that
6877 * point, the name of the entity is already stored in the pool, so
6878 * pool->ptr cannot be NULL.
6879 *
6880 * If poolCopyStringN() is used elsewhere as it well might be,
6881 * this line may well become executable again. Regardless, this
6882 * sort of check shouldn't be removed lightly, so we just exclude
6883 * it from the coverage statistics.
6884 */
6885 return NULL; /* LCOV_EXCL_LINE */
6886 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006887 for (; n > 0; --n, s++) {
6888 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006889 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006890 }
6891 s = pool->start;
6892 poolFinish(pool);
6893 return s;
6894}
6895
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006896static const XML_Char * FASTCALL
6897poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006898{
6899 while (*s) {
6900 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006901 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006902 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006903 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006904 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006905}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006906
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006907static XML_Char *
6908poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6909 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006910{
6911 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006912 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006913 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006914 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006915 *(pool->ptr)++ = 0;
6916 return pool->start;
6917}
6918
Victor Stinner5ff71322017-06-21 14:39:22 +02006919static size_t
6920poolBytesToAllocateFor(int blockSize)
6921{
6922 /* Unprotected math would be:
6923 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6924 **
6925 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6926 ** For a + b * c we check b * c in isolation first, so that addition of a
6927 ** on top has no chance of making us accept a small non-negative number
6928 */
6929 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6930
6931 if (blockSize <= 0)
6932 return 0;
6933
6934 if (blockSize > (int)(INT_MAX / stretch))
6935 return 0;
6936
6937 {
6938 const int stretchedBlockSize = blockSize * (int)stretch;
6939 const int bytesToAllocate = (int)(
6940 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6941 if (bytesToAllocate < 0)
6942 return 0;
6943
6944 return (size_t)bytesToAllocate;
6945 }
6946}
6947
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006948static XML_Bool FASTCALL
6949poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006950{
6951 if (pool->freeBlocks) {
6952 if (pool->start == 0) {
6953 pool->blocks = pool->freeBlocks;
6954 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006955 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006956 pool->start = pool->blocks->s;
6957 pool->end = pool->start + pool->blocks->size;
6958 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006959 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006960 }
6961 if (pool->end - pool->start < pool->freeBlocks->size) {
6962 BLOCK *tem = pool->freeBlocks->next;
6963 pool->freeBlocks->next = pool->blocks;
6964 pool->blocks = pool->freeBlocks;
6965 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006966 memcpy(pool->blocks->s, pool->start,
6967 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006968 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6969 pool->start = pool->blocks->s;
6970 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006971 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006972 }
6973 }
6974 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006975 BLOCK *temp;
6976 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02006977 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006978
Benjamin Peterson4e211002018-06-26 19:25:45 -07006979 /* NOTE: Needs to be calculated prior to calling `realloc`
6980 to avoid dangling pointers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +02006981 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6982
6983 if (blockSize < 0) {
6984 /* This condition traps a situation where either more than
6985 * INT_MAX/2 bytes have already been allocated. This isn't
6986 * readily testable, since it is unlikely that an average
6987 * machine will have that much memory, so we exclude it from the
6988 * coverage statistics.
6989 */
6990 return XML_FALSE; /* LCOV_EXCL_LINE */
6991 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02006992
Victor Stinner5ff71322017-06-21 14:39:22 +02006993 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6994 if (bytesToAllocate == 0)
6995 return XML_FALSE;
6996
Victor Stinner23ec4b52017-06-15 00:54:36 +02006997 temp = (BLOCK *)
Victor Stinner5ff71322017-06-21 14:39:22 +02006998 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006999 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007000 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007001 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007002 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007003 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007004 pool->start = pool->blocks->s;
7005 pool->end = pool->start + blockSize;
7006 }
7007 else {
7008 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00007009 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02007010 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007011
Victor Stinner93d0cb52017-08-18 23:43:54 +02007012 if (blockSize < 0) {
7013 /* This condition traps a situation where either more than
7014 * INT_MAX bytes have already been allocated (which is prevented
7015 * by various pieces of program logic, not least this one, never
7016 * mind the unlikelihood of actually having that much memory) or
7017 * the pool control fields have been corrupted (which could
7018 * conceivably happen in an extremely buggy user handler
7019 * function). Either way it isn't readily testable, so we
7020 * exclude it from the coverage statistics.
7021 */
7022 return XML_FALSE; /* LCOV_EXCL_LINE */
7023 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007024
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007025 if (blockSize < INIT_BLOCK_SIZE)
7026 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02007027 else {
7028 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7029 if ((int)((unsigned)blockSize * 2U) < 0) {
7030 return XML_FALSE;
7031 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007032 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02007033 }
7034
7035 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7036 if (bytesToAllocate == 0)
7037 return XML_FALSE;
7038
7039 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007040 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007041 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007042 tem->size = blockSize;
7043 tem->next = pool->blocks;
7044 pool->blocks = tem;
7045 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007046 memcpy(tem->s, pool->start,
7047 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007048 pool->ptr = tem->s + (pool->ptr - pool->start);
7049 pool->start = tem->s;
7050 pool->end = tem->s + blockSize;
7051 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007052 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007053}
7054
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007055static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007056nextScaffoldPart(XML_Parser parser)
7057{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007058 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007059 CONTENT_SCAFFOLD * me;
7060 int next;
7061
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007062 if (!dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007063 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007064 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007065 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007066 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007067 }
7068
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007069 if (dtd->scaffCount >= dtd->scaffSize) {
7070 CONTENT_SCAFFOLD *temp;
7071 if (dtd->scaffold) {
7072 temp = (CONTENT_SCAFFOLD *)
Benjamin Peterson4e211002018-06-26 19:25:45 -07007073 REALLOC(parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007074 if (temp == NULL)
7075 return -1;
7076 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007077 }
7078 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007079 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007080 * sizeof(CONTENT_SCAFFOLD));
7081 if (temp == NULL)
7082 return -1;
7083 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007084 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007085 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007086 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007087 next = dtd->scaffCount++;
7088 me = &dtd->scaffold[next];
7089 if (dtd->scaffLevel) {
7090 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007091 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007092 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007093 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007094 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007095 parent->firstchild = next;
7096 parent->lastchild = next;
7097 parent->childcnt++;
7098 }
7099 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7100 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007101}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007102
7103static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007104build_node(XML_Parser parser,
7105 int src_node,
7106 XML_Content *dest,
7107 XML_Content **contpos,
7108 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007109{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007110 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007111 dest->type = dtd->scaffold[src_node].type;
7112 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007113 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007114 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007115 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007116 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007117 for (;;) {
7118 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007119 if (!*src)
7120 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007121 src++;
7122 }
7123 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007124 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007125 }
7126 else {
7127 unsigned int i;
7128 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007129 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007130 dest->children = *contpos;
7131 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007132 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7133 i < dest->numchildren;
7134 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007135 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7136 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007137 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007138 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007139}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007140
7141static XML_Content *
7142build_model (XML_Parser parser)
7143{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007144 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007145 XML_Content *ret;
7146 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007147 XML_Char * str;
7148 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7149 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007150
Benjamin Peterson4e211002018-06-26 19:25:45 -07007151 ret = (XML_Content *)MALLOC(parser, allocsize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007152 if (!ret)
7153 return NULL;
7154
7155 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007156 cpos = &ret[1];
7157
7158 build_node(parser, 0, ret, &cpos, &str);
7159 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007160}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007161
7162static ELEMENT_TYPE *
7163getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007164 const ENCODING *enc,
7165 const char *ptr,
7166 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007167{
Benjamin Peterson4e211002018-06-26 19:25:45 -07007168 DTD * const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007169 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007170 ELEMENT_TYPE *ret;
7171
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007172 if (!name)
7173 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07007174 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007175 if (!ret)
7176 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007177 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007178 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007179 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007180 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007181 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007182 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007183 }
7184 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007185}
Victor Stinner93d0cb52017-08-18 23:43:54 +02007186
7187static XML_Char *
7188copyString(const XML_Char *s,
7189 const XML_Memory_Handling_Suite *memsuite)
7190{
7191 int charsRequired = 0;
7192 XML_Char *result;
7193
7194 /* First determine how long the string is */
7195 while (s[charsRequired] != 0) {
7196 charsRequired++;
7197 }
7198 /* Include the terminator */
7199 charsRequired++;
7200
7201 /* Now allocate space for the copy */
7202 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7203 if (result == NULL)
7204 return NULL;
7205 /* Copy the original into place */
7206 memcpy(result, s, charsRequired * sizeof(XML_Char));
7207 return result;
7208}