blob: e740f0e19c7d4a1b7a36b46491a2157da334f408 [file] [log] [blame]
Benjamin Peterson52b94082019-09-25 21:33:58 -07001/* f2d0ab6d1d4422a08cf1cf3bbdfba96b49dea42fb5ff4615e03a2a25c306e769 (2.2.8+)
Victor Stinner759e30e2017-09-05 01:58:08 +02002 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
Victor Stinner5ff71322017-06-21 14:39:22 +02008
Victor Stinner759e30e2017-09-05 01:58:08 +02009 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000031*/
32
Benjamin Peterson52b94082019-09-25 21:33:58 -070033#if ! defined(_GNU_SOURCE)
34# define _GNU_SOURCE 1 /* syscall prototype */
35#endif
36
37#ifdef _WIN32
38/* force stdlib to define rand_s() */
39# define _CRT_RAND_S
Victor Stinner93d0cb52017-08-18 23:43:54 +020040#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020041
Victor Stinner23ec4b52017-06-15 00:54:36 +020042#include <stddef.h>
Benjamin Peterson52b94082019-09-25 21:33:58 -070043#include <string.h> /* memset(), memcpy() */
Victor Stinner23ec4b52017-06-15 00:54:36 +020044#include <assert.h>
Benjamin Peterson52b94082019-09-25 21:33:58 -070045#include <limits.h> /* UINT_MAX */
46#include <stdio.h> /* fprintf */
47#include <stdlib.h> /* getenv, rand_s */
Victor Stinner23ec4b52017-06-15 00:54:36 +020048
Victor Stinner5ff71322017-06-21 14:39:22 +020049#ifdef _WIN32
Benjamin Peterson52b94082019-09-25 21:33:58 -070050# define getpid GetCurrentProcessId
Victor Stinner23ec4b52017-06-15 00:54:36 +020051#else
Benjamin Peterson52b94082019-09-25 21:33:58 -070052# include <sys/time.h> /* gettimeofday() */
53# include <sys/types.h> /* getpid() */
54# include <unistd.h> /* getpid() */
55# include <fcntl.h> /* O_RDONLY */
56# include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020057#endif
58
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070059#define XML_BUILDING_EXPAT 1
60
Victor Stinner5ff71322017-06-21 14:39:22 +020061#ifdef _WIN32
Benjamin Peterson52b94082019-09-25 21:33:58 -070062# include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070063#elif defined(HAVE_EXPAT_CONFIG_H)
Benjamin Peterson52b94082019-09-25 21:33:58 -070064# include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020065#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010066
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070067#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000068#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020069#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000070
Victor Stinner93d0cb52017-08-18 23:43:54 +020071#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Benjamin Peterson52b94082019-09-25 21:33:58 -070072# if defined(HAVE_GETRANDOM)
73# include <sys/random.h> /* getrandom */
74# else
75# include <unistd.h> /* syscall */
76# include <sys/syscall.h> /* SYS_getrandom */
77# endif
78# if ! defined(GRND_NONBLOCK)
79# define GRND_NONBLOCK 0x0001
80# endif /* defined(GRND_NONBLOCK) */
81#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
Victor Stinner93d0cb52017-08-18 23:43:54 +020082
Benjamin Peterson52b94082019-09-25 21:33:58 -070083#if defined(HAVE_LIBBSD) \
Victor Stinner93d0cb52017-08-18 23:43:54 +020084 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
Benjamin Peterson52b94082019-09-25 21:33:58 -070085# include <bsd/stdlib.h>
Victor Stinner93d0cb52017-08-18 23:43:54 +020086#endif
87
Benjamin Peterson52b94082019-09-25 21:33:58 -070088#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
89# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
Victor Stinner93d0cb52017-08-18 23:43:54 +020090#endif
91
Benjamin Peterson52b94082019-09-25 21:33:58 -070092#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
93 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
94 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
95 && ! defined(XML_POOR_ENTROPY)
96# error You do not have support for any sources of high quality entropy \
Victor Stinner93d0cb52017-08-18 23:43:54 +020097 enabled. For end user security, that is probably not what you want. \
98 \
99 Your options include: \
100 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
101 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
102 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
103 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
104 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
105 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
106 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
Benjamin Peterson52b94082019-09-25 21:33:58 -0700107 * Windows (rand_s): _WIN32. \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200108 \
109 If insist on not using any of these, bypass this error by defining \
110 XML_POOR_ENTROPY; you have been warned. \
111 \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200112 If you have reasons to patch this detection code away or need changes \
113 to the build system, please open a bug. Thank you!
114#endif
115
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000116#ifdef XML_UNICODE
Benjamin Peterson52b94082019-09-25 21:33:58 -0700117# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
118# define XmlConvert XmlUtf16Convert
119# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
120# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
121# define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700122/* Using pointer subtraction to convert to integer type. */
Benjamin Peterson52b94082019-09-25 21:33:58 -0700123# define MUST_CONVERT(enc, s) \
124 (! (enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000125typedef unsigned short ICHAR;
126#else
Benjamin Peterson52b94082019-09-25 21:33:58 -0700127# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
128# define XmlConvert XmlUtf8Convert
129# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
130# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
131# define XmlEncode XmlUtf8Encode
132# define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000133typedef char ICHAR;
134#endif
135
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000136#ifndef XML_NS
137
Benjamin Peterson52b94082019-09-25 21:33:58 -0700138# define XmlInitEncodingNS XmlInitEncoding
139# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
140# undef XmlGetInternalEncodingNS
141# define XmlGetInternalEncodingNS XmlGetInternalEncoding
142# define XmlParseXmlDeclNS XmlParseXmlDecl
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000143
144#endif
145
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000146#ifdef XML_UNICODE
147
Benjamin Peterson52b94082019-09-25 21:33:58 -0700148# ifdef XML_UNICODE_WCHAR_T
149# define XML_T(x) (const wchar_t) x
150# define XML_L(x) L##x
151# else
152# define XML_T(x) (const unsigned short)x
153# define XML_L(x) x
154# endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000155
156#else
157
Benjamin Peterson52b94082019-09-25 21:33:58 -0700158# define XML_T(x) x
159# define XML_L(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000160
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000161#endif
162
163/* Round up n to be a multiple of sz, where sz is a power of 2. */
Benjamin Peterson52b94082019-09-25 21:33:58 -0700164#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000165
Benjamin Peterson5033aa72018-09-10 21:04:00 -0700166/* Do safe (NULL-aware) pointer arithmetic */
167#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
168
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000169#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000170#include "xmltok.h"
171#include "xmlrole.h"
172
173typedef const XML_Char *KEY;
174
175typedef struct {
176 KEY name;
177} NAMED;
178
179typedef struct {
180 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000181 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000182 size_t size;
183 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000184 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000185} HASH_TABLE;
186
Benjamin Peterson52b94082019-09-25 21:33:58 -0700187static size_t keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000188
Benjamin Peterson52b94082019-09-25 21:33:58 -0700189static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
Fred Drake08317ae2003-10-21 15:38:55 +0000190
191/* For probing (after a collision) we need a step size relative prime
192 to the hash table size, which is a power of 2. We use double-hashing,
193 since we can calculate a second hash value cheaply by taking those bits
194 of the first hash value that were discarded (masked out) when the table
195 index was calculated: index = hash & mask, where mask = table->size - 1.
196 We limit the maximum step size to table->size / 4 (mask >> 2) and make
197 it odd, since odd numbers are always relative prime to a power of 2.
198*/
Benjamin Peterson52b94082019-09-25 21:33:58 -0700199#define SECOND_HASH(hash, mask, power) \
200 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
201#define PROBE_STEP(hash, mask, power) \
Fred Drake08317ae2003-10-21 15:38:55 +0000202 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
203
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000204typedef struct {
205 NAMED **p;
206 NAMED **end;
207} HASH_TABLE_ITER;
208
Benjamin Peterson52b94082019-09-25 21:33:58 -0700209#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000210#define INIT_DATA_BUF_SIZE 1024
211#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000212#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000213#define INIT_BLOCK_SIZE 1024
214#define INIT_BUFFER_SIZE 1024
215
216#define EXPAND_SPARE 24
217
218typedef struct binding {
219 struct prefix *prefix;
220 struct binding *nextTagBinding;
221 struct binding *prevPrefixBinding;
222 const struct attribute_id *attId;
223 XML_Char *uri;
224 int uriLen;
225 int uriAlloc;
226} BINDING;
227
228typedef struct prefix {
229 const XML_Char *name;
230 BINDING *binding;
231} PREFIX;
232
233typedef struct {
234 const XML_Char *str;
235 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000236 const XML_Char *prefix;
237 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000238 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000239 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000240} TAG_NAME;
241
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000242/* TAG represents an open element.
243 The name of the element is stored in both the document and API
244 encodings. The memory buffer 'buf' is a separately-allocated
245 memory area which stores the name. During the XML_Parse()/
246 XMLParseBuffer() when the element is open, the memory for the 'raw'
247 version of the name (in the document encoding) is shared with the
248 document buffer. If the element is open across calls to
249 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
250 contain the 'raw' name as well.
251
252 A parser re-uses these structures, maintaining a list of allocated
253 TAG objects in a free list.
254*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000255typedef struct tag {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700256 struct tag *parent; /* parent of this element */
257 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000258 int rawNameLength;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700259 TAG_NAME name; /* tagName in the API encoding */
260 char *buf; /* buffer for name components */
261 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000262 BINDING *bindings;
263} TAG;
264
265typedef struct {
266 const XML_Char *name;
267 const XML_Char *textPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700268 int textLen; /* length in XML_Chars */
269 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000270 const XML_Char *systemId;
271 const XML_Char *base;
272 const XML_Char *publicId;
273 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000274 XML_Bool open;
275 XML_Bool is_param;
276 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000277} ENTITY;
278
279typedef struct {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700280 enum XML_Content_Type type;
281 enum XML_Content_Quant quant;
282 const XML_Char *name;
283 int firstchild;
284 int lastchild;
285 int childcnt;
286 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000287} CONTENT_SCAFFOLD;
288
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000289#define INIT_SCAFFOLD_ELEMENTS 32
290
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000291typedef struct block {
292 struct block *next;
293 int size;
294 XML_Char s[1];
295} BLOCK;
296
297typedef struct {
298 BLOCK *blocks;
299 BLOCK *freeBlocks;
300 const XML_Char *end;
301 XML_Char *ptr;
302 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000303 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000304} STRING_POOL;
305
306/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000307 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000308typedef struct attribute_id {
309 XML_Char *name;
310 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000311 XML_Bool maybeTokenized;
312 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000313} ATTRIBUTE_ID;
314
315typedef struct {
316 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000317 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000318 const XML_Char *value;
319} DEFAULT_ATTRIBUTE;
320
321typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000322 unsigned long version;
323 unsigned long hash;
324 const XML_Char *uriName;
325} NS_ATT;
326
327typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000328 const XML_Char *name;
329 PREFIX *prefix;
330 const ATTRIBUTE_ID *idAtt;
331 int nDefaultAtts;
332 int allocDefaultAtts;
333 DEFAULT_ATTRIBUTE *defaultAtts;
334} ELEMENT_TYPE;
335
336typedef struct {
337 HASH_TABLE generalEntities;
338 HASH_TABLE elementTypes;
339 HASH_TABLE attributeIds;
340 HASH_TABLE prefixes;
341 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000342 STRING_POOL entityValuePool;
343 /* false once a parameter entity reference has been skipped */
344 XML_Bool keepProcessing;
345 /* true once an internal or external PE reference has been encountered;
346 this includes the reference to an external subset */
347 XML_Bool hasParamEntityRefs;
348 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000349#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000350 /* indicates if external PE has been read */
351 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000352 HASH_TABLE paramEntities;
353#endif /* XML_DTD */
354 PREFIX defaultPrefix;
355 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000356 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000357 CONTENT_SCAFFOLD *scaffold;
358 unsigned contentStringLen;
359 unsigned scaffSize;
360 unsigned scaffCount;
361 int scaffLevel;
362 int *scaffIndex;
363} DTD;
364
365typedef struct open_internal_entity {
366 const char *internalEventPtr;
367 const char *internalEventEndPtr;
368 struct open_internal_entity *next;
369 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000370 int startTagLevel;
371 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000372} OPEN_INTERNAL_ENTITY;
373
Benjamin Peterson52b94082019-09-25 21:33:58 -0700374typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
375 const char *end, const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000376
377static Processor prologProcessor;
378static Processor prologInitProcessor;
379static Processor contentProcessor;
380static Processor cdataSectionProcessor;
381#ifdef XML_DTD
382static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000383static Processor externalParEntProcessor;
384static Processor externalParEntInitProcessor;
385static Processor entityValueProcessor;
386static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000387#endif /* XML_DTD */
388static Processor epilogProcessor;
389static Processor errorProcessor;
390static Processor externalEntityInitProcessor;
391static Processor externalEntityInitProcessor2;
392static Processor externalEntityInitProcessor3;
393static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000394static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000395
Benjamin Peterson52b94082019-09-25 21:33:58 -0700396static enum XML_Error handleUnknownEncoding(XML_Parser parser,
397 const XML_Char *encodingName);
398static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
399 const char *s, const char *next);
400static enum XML_Error initializeEncoding(XML_Parser parser);
401static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
402 const char *s, const char *end, int tok,
403 const char *next, const char **nextPtr,
404 XML_Bool haveMore, XML_Bool allowClosingDoctype);
405static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
406 XML_Bool betweenDecl);
407static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
408 const ENCODING *enc, const char *start,
409 const char *end, const char **endPtr,
410 XML_Bool haveMore);
411static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
412 const char **startPtr, const char *end,
413 const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000414#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -0700415static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
416 const char **startPtr, const char *end,
417 const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000418#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000419
Benjamin Peterson52b94082019-09-25 21:33:58 -0700420static void freeBindings(XML_Parser parser, BINDING *bindings);
421static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
422 const char *s, TAG_NAME *tagNamePtr,
423 BINDING **bindingsPtr);
424static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
425 const ATTRIBUTE_ID *attId, const XML_Char *uri,
426 BINDING **bindingsPtr);
427static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
428 XML_Bool isId, const XML_Char *dfltValue,
429 XML_Parser parser);
430static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
431 XML_Bool isCdata, const char *,
432 const char *, STRING_POOL *);
433static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
434 XML_Bool isCdata, const char *,
435 const char *, STRING_POOL *);
436static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
437 const char *start, const char *end);
438static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
439static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
440 const char *start, const char *end);
441static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
442 const char *start, const char *end);
443static int reportComment(XML_Parser parser, const ENCODING *enc,
444 const char *start, const char *end);
445static void reportDefault(XML_Parser parser, const ENCODING *enc,
446 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000447
Benjamin Peterson52b94082019-09-25 21:33:58 -0700448static const XML_Char *getContext(XML_Parser parser);
449static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000450
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000451static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000452
Benjamin Peterson52b94082019-09-25 21:33:58 -0700453static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
Benjamin Peterson4e211002018-06-26 19:25:45 -0700454/* do not call if m_parentParser != NULL */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000455static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700456static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
457 const XML_Memory_Handling_Suite *ms);
458static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
459 const XML_Memory_Handling_Suite *ms);
460static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
461 const HASH_TABLE *);
462static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
463 size_t createSize);
464static void FASTCALL hashTableInit(HASH_TABLE *,
465 const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466static void FASTCALL hashTableClear(HASH_TABLE *);
467static void FASTCALL hashTableDestroy(HASH_TABLE *);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700468static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
469static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000470
Benjamin Peterson52b94082019-09-25 21:33:58 -0700471static void FASTCALL poolInit(STRING_POOL *,
472 const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000473static void FASTCALL poolClear(STRING_POOL *);
474static void FASTCALL poolDestroy(STRING_POOL *);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700475static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
476 const char *ptr, const char *end);
477static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
478 const char *ptr, const char *end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000479static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700480static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
481 const XML_Char *s);
482static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
483 int n);
484static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
485 const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000486
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000487static int FASTCALL nextScaffoldPart(XML_Parser parser);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700488static XML_Content *build_model(XML_Parser parser);
489static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
490 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000491
Victor Stinner93d0cb52017-08-18 23:43:54 +0200492static XML_Char *copyString(const XML_Char *s,
493 const XML_Memory_Handling_Suite *memsuite);
494
Victor Stinner23ec4b52017-06-15 00:54:36 +0200495static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700496static XML_Bool startParsing(XML_Parser parser);
497
Benjamin Peterson52b94082019-09-25 21:33:58 -0700498static XML_Parser parserCreate(const XML_Char *encodingName,
499 const XML_Memory_Handling_Suite *memsuite,
500 const XML_Char *nameSep, DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700501
Benjamin Peterson52b94082019-09-25 21:33:58 -0700502static void parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000503
504#define poolStart(pool) ((pool)->start)
505#define poolEnd(pool) ((pool)->ptr)
506#define poolLength(pool) ((pool)->ptr - (pool)->start)
507#define poolChop(pool) ((void)--(pool->ptr))
508#define poolLastChar(pool) (((pool)->ptr)[-1])
509#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
510#define poolFinish(pool) ((pool)->start = (pool)->ptr)
Benjamin Peterson52b94082019-09-25 21:33:58 -0700511#define poolAppendChar(pool, c) \
512 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
513 ? 0 \
514 : ((*((pool)->ptr)++ = c), 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000515
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000516struct XML_ParserStruct {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700517 /* The first member must be m_userData so that the XML_GetUserData
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000518 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000519 void *m_userData;
520 void *m_handlerArg;
521 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000522 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000523 /* first character to be parsed */
524 const char *m_bufferPtr;
525 /* past last character to be parsed */
526 char *m_bufferEnd;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700527 /* allocated end of m_buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000528 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000529 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000530 const char *m_parseEndPtr;
531 XML_Char *m_dataBuf;
532 XML_Char *m_dataBufEnd;
533 XML_StartElementHandler m_startElementHandler;
534 XML_EndElementHandler m_endElementHandler;
535 XML_CharacterDataHandler m_characterDataHandler;
536 XML_ProcessingInstructionHandler m_processingInstructionHandler;
537 XML_CommentHandler m_commentHandler;
538 XML_StartCdataSectionHandler m_startCdataSectionHandler;
539 XML_EndCdataSectionHandler m_endCdataSectionHandler;
540 XML_DefaultHandler m_defaultHandler;
541 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
542 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
543 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
544 XML_NotationDeclHandler m_notationDeclHandler;
545 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
546 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
547 XML_NotStandaloneHandler m_notStandaloneHandler;
548 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000549 XML_Parser m_externalEntityRefHandlerArg;
550 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000551 XML_UnknownEncodingHandler m_unknownEncodingHandler;
552 XML_ElementDeclHandler m_elementDeclHandler;
553 XML_AttlistDeclHandler m_attlistDeclHandler;
554 XML_EntityDeclHandler m_entityDeclHandler;
555 XML_XmlDeclHandler m_xmlDeclHandler;
556 const ENCODING *m_encoding;
557 INIT_ENCODING m_initEncoding;
558 const ENCODING *m_internalEncoding;
559 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000560 XML_Bool m_ns;
561 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000562 void *m_unknownEncodingMem;
563 void *m_unknownEncodingData;
564 void *m_unknownEncodingHandlerData;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700565 void(XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000566 PROLOG_STATE m_prologState;
567 Processor *m_processor;
568 enum XML_Error m_errorCode;
569 const char *m_eventPtr;
570 const char *m_eventEndPtr;
571 const char *m_positionPtr;
572 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000573 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000574 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000575 int m_tagLevel;
576 ENTITY *m_declEntity;
577 const XML_Char *m_doctypeName;
578 const XML_Char *m_doctypeSysid;
579 const XML_Char *m_doctypePubid;
580 const XML_Char *m_declAttributeType;
581 const XML_Char *m_declNotationName;
582 const XML_Char *m_declNotationPublicId;
583 ELEMENT_TYPE *m_declElementType;
584 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000585 XML_Bool m_declAttributeIsCdata;
586 XML_Bool m_declAttributeIsId;
587 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000588 const XML_Char *m_curBase;
589 TAG *m_tagStack;
590 TAG *m_freeTagList;
591 BINDING *m_inheritedBindings;
592 BINDING *m_freeBindingList;
593 int m_attsSize;
594 int m_nSpecifiedAtts;
595 int m_idAttIndex;
596 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000597 NS_ATT *m_nsAtts;
598 unsigned long m_nsAttsVersion;
599 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700600#ifdef XML_ATTR_INFO
601 XML_AttrInfo *m_attInfo;
602#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000603 POSITION m_position;
604 STRING_POOL m_tempPool;
605 STRING_POOL m_temp2Pool;
606 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000607 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000608 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000609 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000610 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000611#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000612 XML_Bool m_isParamEntity;
613 XML_Bool m_useForeignDTD;
614 enum XML_ParamEntityParsing m_paramEntityParsing;
615#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700616 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000617};
618
Benjamin Peterson52b94082019-09-25 21:33:58 -0700619#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
620#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
621#define FREE(parser, p) (parser->m_mem.free_fcn((p)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000622
Fred Drake08317ae2003-10-21 15:38:55 +0000623XML_Parser XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -0700624XML_ParserCreate(const XML_Char *encodingName) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000625 return XML_ParserCreate_MM(encodingName, NULL, NULL);
626}
627
Fred Drake08317ae2003-10-21 15:38:55 +0000628XML_Parser XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -0700629XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000630 XML_Char tmp[2];
631 *tmp = nsSep;
632 return XML_ParserCreate_MM(encodingName, NULL, tmp);
633}
634
Benjamin Peterson52b94082019-09-25 21:33:58 -0700635static const XML_Char implicitContext[]
636 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
637 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
638 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
639 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
640 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
641 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
642 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
643 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
644 '\0'};
Victor Stinner5ff71322017-06-21 14:39:22 +0200645
Benjamin Peterson4e211002018-06-26 19:25:45 -0700646/* To avoid warnings about unused functions: */
647#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
648
Benjamin Peterson52b94082019-09-25 21:33:58 -0700649# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200650
651/* Obtain entropy on Linux 3.17+ */
652static int
Benjamin Peterson52b94082019-09-25 21:33:58 -0700653writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
654 int success = 0; /* full count bytes written? */
Victor Stinner5ff71322017-06-21 14:39:22 +0200655 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200656 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200657
658 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700659 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
Victor Stinner5ff71322017-06-21 14:39:22 +0200660 const size_t bytesToWrite = count - bytesWrittenTotal;
661
662 const int bytesWrittenMore =
Benjamin Peterson52b94082019-09-25 21:33:58 -0700663# if defined(HAVE_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200664 getrandom(currentTarget, bytesToWrite, getrandomFlags);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700665# else
Victor Stinner5ff71322017-06-21 14:39:22 +0200666 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700667# endif
Victor Stinner5ff71322017-06-21 14:39:22 +0200668
669 if (bytesWrittenMore > 0) {
670 bytesWrittenTotal += bytesWrittenMore;
671 if (bytesWrittenTotal >= count)
672 success = 1;
673 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200674 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200675
676 return success;
677}
678
Benjamin Peterson52b94082019-09-25 21:33:58 -0700679# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200680
Benjamin Peterson52b94082019-09-25 21:33:58 -0700681# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200682
683/* Extract entropy from /dev/urandom */
684static int
Benjamin Peterson52b94082019-09-25 21:33:58 -0700685writeRandomBytes_dev_urandom(void *target, size_t count) {
686 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200687 size_t bytesWrittenTotal = 0;
688
689 const int fd = open("/dev/urandom", O_RDONLY);
690 if (fd < 0) {
691 return 0;
692 }
693
694 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700695 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200696 const size_t bytesToWrite = count - bytesWrittenTotal;
697
698 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
699
700 if (bytesWrittenMore > 0) {
701 bytesWrittenTotal += bytesWrittenMore;
702 if (bytesWrittenTotal >= count)
703 success = 1;
704 }
705 } while (! success && (errno == EINTR));
706
707 close(fd);
708 return success;
709}
710
Benjamin Peterson52b94082019-09-25 21:33:58 -0700711# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200712
Benjamin Peterson52b94082019-09-25 21:33:58 -0700713#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200714
Benjamin Peterson3b03b092019-06-27 20:54:44 -0700715#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200716
717static void
Benjamin Peterson52b94082019-09-25 21:33:58 -0700718writeRandomBytes_arc4random(void *target, size_t count) {
Victor Stinner93d0cb52017-08-18 23:43:54 +0200719 size_t bytesWrittenTotal = 0;
720
721 while (bytesWrittenTotal < count) {
722 const uint32_t random32 = arc4random();
723 size_t i = 0;
724
725 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700726 i++, bytesWrittenTotal++) {
Victor Stinner93d0cb52017-08-18 23:43:54 +0200727 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
728 ((uint8_t *)target)[bytesWrittenTotal] = random8;
729 }
730 }
731}
732
Benjamin Peterson52b94082019-09-25 21:33:58 -0700733#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200734
Victor Stinner5ff71322017-06-21 14:39:22 +0200735#ifdef _WIN32
736
Benjamin Peterson52b94082019-09-25 21:33:58 -0700737/* Obtain entropy on Windows using the rand_s() function which
738 * generates cryptographically secure random numbers. Internally it
739 * uses RtlGenRandom API which is present in Windows XP and later.
Victor Stinner5ff71322017-06-21 14:39:22 +0200740 */
741static int
Benjamin Peterson52b94082019-09-25 21:33:58 -0700742writeRandomBytes_rand_s(void *target, size_t count) {
743 size_t bytesWrittenTotal = 0;
Victor Stinner5ff71322017-06-21 14:39:22 +0200744
Benjamin Peterson52b94082019-09-25 21:33:58 -0700745 while (bytesWrittenTotal < count) {
746 unsigned int random32 = 0;
747 size_t i = 0;
748
749 if (rand_s(&random32))
750 return 0; /* failure */
751
752 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
753 i++, bytesWrittenTotal++) {
754 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
755 ((uint8_t *)target)[bytesWrittenTotal] = random8;
Victor Stinner5ff71322017-06-21 14:39:22 +0200756 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200757 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700758 return 1; /* success */
Victor Stinner5ff71322017-06-21 14:39:22 +0200759}
760
761#endif /* _WIN32 */
762
Victor Stinner93d0cb52017-08-18 23:43:54 +0200763#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
764
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700765static unsigned long
Benjamin Peterson52b94082019-09-25 21:33:58 -0700766gather_time_entropy(void) {
767# ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200768 FILETIME ft;
769 GetSystemTimeAsFileTime(&ft); /* never fails */
770 return ft.dwHighDateTime ^ ft.dwLowDateTime;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700771# else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200772 struct timeval tv;
773 int gettimeofday_res;
774
775 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200776
Benjamin Peterson52b94082019-09-25 21:33:58 -0700777# if defined(NDEBUG)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200778 (void)gettimeofday_res;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700779# else
780 assert(gettimeofday_res == 0);
781# endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200782
783 /* Microseconds time is <20 bits entropy */
784 return tv.tv_usec;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700785# endif
Victor Stinner23ec4b52017-06-15 00:54:36 +0200786}
787
Benjamin Peterson52b94082019-09-25 21:33:58 -0700788#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200789
790static unsigned long
Benjamin Peterson52b94082019-09-25 21:33:58 -0700791ENTROPY_DEBUG(const char *label, unsigned long entropy) {
792 const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
Victor Stinner5ff71322017-06-21 14:39:22 +0200793 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700794 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
795 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
Victor Stinner5ff71322017-06-21 14:39:22 +0200796 }
797 return entropy;
798}
799
Victor Stinner23ec4b52017-06-15 00:54:36 +0200800static unsigned long
Benjamin Peterson52b94082019-09-25 21:33:58 -0700801generate_hash_secret_salt(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200802 unsigned long entropy;
803 (void)parser;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700804
805 /* "Failproof" high quality providers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200806#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200807 arc4random_buf(&entropy, sizeof(entropy));
808 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200809#elif defined(HAVE_ARC4RANDOM)
810 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
811 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200812#else
813 /* Try high quality providers first .. */
Benjamin Peterson52b94082019-09-25 21:33:58 -0700814# ifdef _WIN32
815 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
816 return ENTROPY_DEBUG("rand_s", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200817 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700818# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200819 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200820 return ENTROPY_DEBUG("getrandom", entropy);
821 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700822# endif
823# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200824 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
825 return ENTROPY_DEBUG("/dev/urandom", entropy);
826 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700827# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200828 /* .. and self-made low quality for backup: */
829
830 /* Process ID is 0 bits entropy if attacker has local access */
831 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200832
833 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
834 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200835 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200836 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200837 return ENTROPY_DEBUG("fallback(8)",
Benjamin Peterson52b94082019-09-25 21:33:58 -0700838 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200839 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200840#endif
841}
842
843static unsigned long
844get_hash_secret_salt(XML_Parser parser) {
845 if (parser->m_parentParser != NULL)
846 return get_hash_secret_salt(parser->m_parentParser);
847 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700848}
849
Benjamin Peterson52b94082019-09-25 21:33:58 -0700850static XML_Bool /* only valid for root parser */
851startParsing(XML_Parser parser) {
852 /* hash functions must be initialized before setContext() is called */
853 if (parser->m_hash_secret_salt == 0)
854 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
855 if (parser->m_ns) {
856 /* implicit context only set for root parser, since child
857 parsers (i.e. external entity parsers) will inherit it
858 */
859 return setContext(parser, implicitContext);
860 }
861 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700862}
863
864XML_Parser XMLCALL
865XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700866 const XML_Memory_Handling_Suite *memsuite,
Benjamin Peterson52b94082019-09-25 21:33:58 -0700867 const XML_Char *nameSep) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700868 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000869}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000870
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000871static XML_Parser
872parserCreate(const XML_Char *encodingName,
Benjamin Peterson52b94082019-09-25 21:33:58 -0700873 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
874 DTD *dtd) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000875 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000876
877 if (memsuite) {
878 XML_Memory_Handling_Suite *mtemp;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700879 parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000880 if (parser != NULL) {
881 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
882 mtemp->malloc_fcn = memsuite->malloc_fcn;
883 mtemp->realloc_fcn = memsuite->realloc_fcn;
884 mtemp->free_fcn = memsuite->free_fcn;
885 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700886 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000887 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000888 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
889 if (parser != NULL) {
890 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
891 mtemp->malloc_fcn = malloc;
892 mtemp->realloc_fcn = realloc;
893 mtemp->free_fcn = free;
894 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000895 }
896
Benjamin Peterson52b94082019-09-25 21:33:58 -0700897 if (! parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000898 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000899
Benjamin Peterson4e211002018-06-26 19:25:45 -0700900 parser->m_buffer = NULL;
901 parser->m_bufferLim = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000902
Benjamin Peterson4e211002018-06-26 19:25:45 -0700903 parser->m_attsSize = INIT_ATTS_SIZE;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700904 parser->m_atts
905 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
Benjamin Peterson4e211002018-06-26 19:25:45 -0700906 if (parser->m_atts == NULL) {
907 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000908 return NULL;
909 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700910#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -0700911 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
912 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
Benjamin Peterson4e211002018-06-26 19:25:45 -0700913 if (parser->m_attInfo == NULL) {
914 FREE(parser, parser->m_atts);
915 FREE(parser, parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700916 return NULL;
917 }
918#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -0700919 parser->m_dataBuf
920 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -0700921 if (parser->m_dataBuf == NULL) {
922 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700923#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700924 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700925#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700926 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000927 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000928 }
Benjamin Peterson4e211002018-06-26 19:25:45 -0700929 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000930
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000931 if (dtd)
Benjamin Peterson4e211002018-06-26 19:25:45 -0700932 parser->m_dtd = dtd;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000933 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700934 parser->m_dtd = dtdCreate(&parser->m_mem);
935 if (parser->m_dtd == NULL) {
936 FREE(parser, parser->m_dataBuf);
937 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700938#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -0700939 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700940#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -0700941 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000942 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000943 }
944 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000945
Benjamin Peterson4e211002018-06-26 19:25:45 -0700946 parser->m_freeBindingList = NULL;
947 parser->m_freeTagList = NULL;
948 parser->m_freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000949
Benjamin Peterson4e211002018-06-26 19:25:45 -0700950 parser->m_groupSize = 0;
951 parser->m_groupConnector = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000952
Benjamin Peterson4e211002018-06-26 19:25:45 -0700953 parser->m_unknownEncodingHandler = NULL;
954 parser->m_unknownEncodingHandlerData = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000955
Benjamin Peterson4e211002018-06-26 19:25:45 -0700956 parser->m_namespaceSeparator = ASCII_EXCL;
957 parser->m_ns = XML_FALSE;
958 parser->m_ns_triplets = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000959
Benjamin Peterson4e211002018-06-26 19:25:45 -0700960 parser->m_nsAtts = NULL;
961 parser->m_nsAttsVersion = 0;
962 parser->m_nsAttsPower = 0;
Fred Drake08317ae2003-10-21 15:38:55 +0000963
Benjamin Peterson4e211002018-06-26 19:25:45 -0700964 parser->m_protocolEncodingName = NULL;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200965
Benjamin Peterson4e211002018-06-26 19:25:45 -0700966 poolInit(&parser->m_tempPool, &(parser->m_mem));
967 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000968 parserInit(parser, encodingName);
969
Benjamin Peterson52b94082019-09-25 21:33:58 -0700970 if (encodingName && ! parser->m_protocolEncodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000971 XML_ParserFree(parser);
972 return NULL;
973 }
974
975 if (nameSep) {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700976 parser->m_ns = XML_TRUE;
977 parser->m_internalEncoding = XmlGetInternalEncodingNS();
978 parser->m_namespaceSeparator = *nameSep;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700979 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700980 parser->m_internalEncoding = XmlGetInternalEncoding();
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000981 }
982
983 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000984}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000985
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000986static void
Benjamin Peterson52b94082019-09-25 21:33:58 -0700987parserInit(XML_Parser parser, const XML_Char *encodingName) {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700988 parser->m_processor = prologInitProcessor;
989 XmlPrologStateInit(&parser->m_prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200990 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700991 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Victor Stinner93d0cb52017-08-18 23:43:54 +0200992 }
Benjamin Peterson4e211002018-06-26 19:25:45 -0700993 parser->m_curBase = NULL;
994 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
995 parser->m_userData = NULL;
996 parser->m_handlerArg = NULL;
997 parser->m_startElementHandler = NULL;
998 parser->m_endElementHandler = NULL;
999 parser->m_characterDataHandler = NULL;
1000 parser->m_processingInstructionHandler = NULL;
1001 parser->m_commentHandler = NULL;
1002 parser->m_startCdataSectionHandler = NULL;
1003 parser->m_endCdataSectionHandler = NULL;
1004 parser->m_defaultHandler = NULL;
1005 parser->m_startDoctypeDeclHandler = NULL;
1006 parser->m_endDoctypeDeclHandler = NULL;
1007 parser->m_unparsedEntityDeclHandler = NULL;
1008 parser->m_notationDeclHandler = NULL;
1009 parser->m_startNamespaceDeclHandler = NULL;
1010 parser->m_endNamespaceDeclHandler = NULL;
1011 parser->m_notStandaloneHandler = NULL;
1012 parser->m_externalEntityRefHandler = NULL;
1013 parser->m_externalEntityRefHandlerArg = parser;
1014 parser->m_skippedEntityHandler = NULL;
1015 parser->m_elementDeclHandler = NULL;
1016 parser->m_attlistDeclHandler = NULL;
1017 parser->m_entityDeclHandler = NULL;
1018 parser->m_xmlDeclHandler = NULL;
1019 parser->m_bufferPtr = parser->m_buffer;
1020 parser->m_bufferEnd = parser->m_buffer;
1021 parser->m_parseEndByteIndex = 0;
1022 parser->m_parseEndPtr = NULL;
1023 parser->m_declElementType = NULL;
1024 parser->m_declAttributeId = NULL;
1025 parser->m_declEntity = NULL;
1026 parser->m_doctypeName = NULL;
1027 parser->m_doctypeSysid = NULL;
1028 parser->m_doctypePubid = NULL;
1029 parser->m_declAttributeType = NULL;
1030 parser->m_declNotationName = NULL;
1031 parser->m_declNotationPublicId = NULL;
1032 parser->m_declAttributeIsCdata = XML_FALSE;
1033 parser->m_declAttributeIsId = XML_FALSE;
1034 memset(&parser->m_position, 0, sizeof(POSITION));
1035 parser->m_errorCode = XML_ERROR_NONE;
1036 parser->m_eventPtr = NULL;
1037 parser->m_eventEndPtr = NULL;
1038 parser->m_positionPtr = NULL;
1039 parser->m_openInternalEntities = NULL;
1040 parser->m_defaultExpandInternalEntities = XML_TRUE;
1041 parser->m_tagLevel = 0;
1042 parser->m_tagStack = NULL;
1043 parser->m_inheritedBindings = NULL;
1044 parser->m_nSpecifiedAtts = 0;
1045 parser->m_unknownEncodingMem = NULL;
1046 parser->m_unknownEncodingRelease = NULL;
1047 parser->m_unknownEncodingData = NULL;
1048 parser->m_parentParser = NULL;
1049 parser->m_parsingStatus.parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001050#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001051 parser->m_isParamEntity = XML_FALSE;
1052 parser->m_useForeignDTD = XML_FALSE;
1053 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001054#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001055 parser->m_hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001056}
1057
Benjamin Peterson4e211002018-06-26 19:25:45 -07001058/* moves list of bindings to m_freeBindingList */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001059static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001060moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001061 while (bindings) {
1062 BINDING *b = bindings;
1063 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001064 b->nextTagBinding = parser->m_freeBindingList;
1065 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001066 }
1067}
1068
Fred Drake08317ae2003-10-21 15:38:55 +00001069XML_Bool XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001070XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001071 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001072 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001073
1074 if (parser == NULL)
Benjamin Peterson52b94082019-09-25 21:33:58 -07001075 return XML_FALSE;
Victor Stinner5ff71322017-06-21 14:39:22 +02001076
Benjamin Peterson4e211002018-06-26 19:25:45 -07001077 if (parser->m_parentParser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001078 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001079 /* move m_tagStack to m_freeTagList */
1080 tStk = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001081 while (tStk) {
1082 TAG *tag = tStk;
1083 tStk = tStk->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001084 tag->parent = parser->m_freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001085 moveToFreeBindingList(parser, tag->bindings);
1086 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001087 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001088 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001089 /* move m_openInternalEntities to m_freeInternalEntities */
1090 openEntityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001091 while (openEntityList) {
1092 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1093 openEntityList = openEntity->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001094 openEntity->next = parser->m_freeInternalEntities;
1095 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00001096 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001097 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1098 FREE(parser, parser->m_unknownEncodingMem);
1099 if (parser->m_unknownEncodingRelease)
1100 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1101 poolClear(&parser->m_tempPool);
1102 poolClear(&parser->m_temp2Pool);
1103 FREE(parser, (void *)parser->m_protocolEncodingName);
1104 parser->m_protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001105 parserInit(parser, encodingName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001106 dtdReset(parser->m_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001107 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001108}
1109
Fred Drake08317ae2003-10-21 15:38:55 +00001110enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001111XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001112 if (parser == NULL)
Benjamin Peterson52b94082019-09-25 21:33:58 -07001113 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001114 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1115 XXX There's no way for the caller to determine which of the
1116 XXX possible error cases caused the XML_STATUS_ERROR return.
1117 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001118 if (parser->m_parsingStatus.parsing == XML_PARSING
1119 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001120 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001121
1122 /* Get rid of any previous encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001123 FREE(parser, (void *)parser->m_protocolEncodingName);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001124
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001125 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001126 /* No new encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001127 parser->m_protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001128 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001129 /* Copy the new encoding name into allocated memory */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001130 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Benjamin Peterson52b94082019-09-25 21:33:58 -07001131 if (! parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001132 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001133 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001134 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001135}
1136
Fred Drake08317ae2003-10-21 15:38:55 +00001137XML_Parser XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001138XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1139 const XML_Char *encodingName) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001140 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001141 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001142 DTD *oldDtd;
1143 XML_StartElementHandler oldStartElementHandler;
1144 XML_EndElementHandler oldEndElementHandler;
1145 XML_CharacterDataHandler oldCharacterDataHandler;
1146 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1147 XML_CommentHandler oldCommentHandler;
1148 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1149 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1150 XML_DefaultHandler oldDefaultHandler;
1151 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1152 XML_NotationDeclHandler oldNotationDeclHandler;
1153 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1154 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1155 XML_NotStandaloneHandler oldNotStandaloneHandler;
1156 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1157 XML_SkippedEntityHandler oldSkippedEntityHandler;
1158 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1159 XML_ElementDeclHandler oldElementDeclHandler;
1160 XML_AttlistDeclHandler oldAttlistDeclHandler;
1161 XML_EntityDeclHandler oldEntityDeclHandler;
1162 XML_XmlDeclHandler oldXmlDeclHandler;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001163 ELEMENT_TYPE *oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001164
Victor Stinner5ff71322017-06-21 14:39:22 +02001165 void *oldUserData;
1166 void *oldHandlerArg;
1167 XML_Bool oldDefaultExpandInternalEntities;
1168 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001169#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001170 enum XML_ParamEntityParsing oldParamEntityParsing;
1171 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001172#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001173 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001174 /* Note that the new parser shares the same hash secret as the old
1175 parser, so that dtdCopy and copyEntityTable can lookup values
1176 from hash tables associated with either parser without us having
1177 to worry which hash secrets each table has.
1178 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001179 unsigned long oldhash_secret_salt;
1180
1181 /* Validate the oldParser parameter before we pull everything out of it */
1182 if (oldParser == NULL)
1183 return NULL;
1184
1185 /* Stash the original parser contents on the stack */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001186 oldDtd = parser->m_dtd;
1187 oldStartElementHandler = parser->m_startElementHandler;
1188 oldEndElementHandler = parser->m_endElementHandler;
1189 oldCharacterDataHandler = parser->m_characterDataHandler;
1190 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1191 oldCommentHandler = parser->m_commentHandler;
1192 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1193 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1194 oldDefaultHandler = parser->m_defaultHandler;
1195 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1196 oldNotationDeclHandler = parser->m_notationDeclHandler;
1197 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1198 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1199 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1200 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1201 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1202 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1203 oldElementDeclHandler = parser->m_elementDeclHandler;
1204 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1205 oldEntityDeclHandler = parser->m_entityDeclHandler;
1206 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1207 oldDeclElementType = parser->m_declElementType;
Victor Stinner5ff71322017-06-21 14:39:22 +02001208
Benjamin Peterson4e211002018-06-26 19:25:45 -07001209 oldUserData = parser->m_userData;
1210 oldHandlerArg = parser->m_handlerArg;
1211 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1212 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
Victor Stinner5ff71322017-06-21 14:39:22 +02001213#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001214 oldParamEntityParsing = parser->m_paramEntityParsing;
1215 oldInEntityValue = parser->m_prologState.inEntityValue;
Victor Stinner5ff71322017-06-21 14:39:22 +02001216#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001217 oldns_triplets = parser->m_ns_triplets;
Victor Stinner5ff71322017-06-21 14:39:22 +02001218 /* Note that the new parser shares the same hash secret as the old
1219 parser, so that dtdCopy and copyEntityTable can lookup values
1220 from hash tables associated with either parser without us having
1221 to worry which hash secrets each table has.
1222 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001223 oldhash_secret_salt = parser->m_hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001224
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001225#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07001226 if (! context)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001227 newDtd = oldDtd;
1228#endif /* XML_DTD */
1229
1230 /* Note that the magical uses of the pre-processor to make field
1231 access look more like C++ require that `parser' be overwritten
1232 here. This makes this function more painful to follow than it
1233 would be otherwise.
1234 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001235 if (parser->m_ns) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001236 XML_Char tmp[2];
Benjamin Peterson4e211002018-06-26 19:25:45 -07001237 *tmp = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001238 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Benjamin Peterson52b94082019-09-25 21:33:58 -07001239 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001240 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001241 }
1242
Benjamin Peterson52b94082019-09-25 21:33:58 -07001243 if (! parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001244 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001245
Benjamin Peterson4e211002018-06-26 19:25:45 -07001246 parser->m_startElementHandler = oldStartElementHandler;
1247 parser->m_endElementHandler = oldEndElementHandler;
1248 parser->m_characterDataHandler = oldCharacterDataHandler;
1249 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1250 parser->m_commentHandler = oldCommentHandler;
1251 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1252 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1253 parser->m_defaultHandler = oldDefaultHandler;
1254 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1255 parser->m_notationDeclHandler = oldNotationDeclHandler;
1256 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1257 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1258 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1259 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1260 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1261 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1262 parser->m_elementDeclHandler = oldElementDeclHandler;
1263 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1264 parser->m_entityDeclHandler = oldEntityDeclHandler;
1265 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1266 parser->m_declElementType = oldDeclElementType;
1267 parser->m_userData = oldUserData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001268 if (oldUserData == oldHandlerArg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001269 parser->m_handlerArg = parser->m_userData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001270 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001271 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001272 if (oldExternalEntityRefHandlerArg != oldParser)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001273 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1274 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1275 parser->m_ns_triplets = oldns_triplets;
1276 parser->m_hash_secret_salt = oldhash_secret_salt;
1277 parser->m_parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001278#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001279 parser->m_paramEntityParsing = oldParamEntityParsing;
1280 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001281 if (context) {
1282#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001283 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1284 || ! setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001285 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001286 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001287 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001288 parser->m_processor = externalEntityInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001289#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07001290 } else {
1291 /* The DTD instance referenced by parser->m_dtd is shared between the
1292 document's root parser and external PE parsers, therefore one does not
1293 need to call setContext. In addition, one also *must* not call
1294 setContext, because this would overwrite existing prefix->binding
1295 pointers in parser->m_dtd with ones that get destroyed with the external
1296 PE parser. This would leave those prefixes with dangling pointers.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001297 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001298 parser->m_isParamEntity = XML_TRUE;
1299 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1300 parser->m_processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001301 }
1302#endif /* XML_DTD */
1303 return parser;
1304}
1305
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001306static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001307destroyBindings(BINDING *bindings, XML_Parser parser) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001308 for (;;) {
1309 BINDING *b = bindings;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001310 if (! b)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001311 break;
1312 bindings = b->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001313 FREE(parser, b->uri);
1314 FREE(parser, b);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001315 }
1316}
1317
Fred Drake08317ae2003-10-21 15:38:55 +00001318void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001319XML_ParserFree(XML_Parser parser) {
Fred Drake31d485c2004-08-03 07:06:22 +00001320 TAG *tagList;
1321 OPEN_INTERNAL_ENTITY *entityList;
1322 if (parser == NULL)
1323 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001324 /* free m_tagStack and m_freeTagList */
1325 tagList = parser->m_tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001326 for (;;) {
1327 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001328 if (tagList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001329 if (parser->m_freeTagList == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001330 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001331 tagList = parser->m_freeTagList;
1332 parser->m_freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001333 }
Fred Drake31d485c2004-08-03 07:06:22 +00001334 p = tagList;
1335 tagList = tagList->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001336 FREE(parser, p->buf);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001337 destroyBindings(p->bindings, parser);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001338 FREE(parser, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001339 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001340 /* free m_openInternalEntities and m_freeInternalEntities */
1341 entityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001342 for (;;) {
1343 OPEN_INTERNAL_ENTITY *openEntity;
1344 if (entityList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001345 if (parser->m_freeInternalEntities == NULL)
Fred Drake31d485c2004-08-03 07:06:22 +00001346 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001347 entityList = parser->m_freeInternalEntities;
1348 parser->m_freeInternalEntities = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001349 }
1350 openEntity = entityList;
1351 entityList = entityList->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001352 FREE(parser, openEntity);
Fred Drake31d485c2004-08-03 07:06:22 +00001353 }
1354
Benjamin Peterson4e211002018-06-26 19:25:45 -07001355 destroyBindings(parser->m_freeBindingList, parser);
1356 destroyBindings(parser->m_inheritedBindings, parser);
1357 poolDestroy(&parser->m_tempPool);
1358 poolDestroy(&parser->m_temp2Pool);
1359 FREE(parser, (void *)parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001360#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001361 /* external parameter entity parsers share the DTD structure
1362 parser->m_dtd with the root parser, so we must not destroy it
1363 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001364 if (! parser->m_isParamEntity && parser->m_dtd)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001365#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001366 if (parser->m_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001367#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001368 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1369 &parser->m_mem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001370 FREE(parser, (void *)parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001371#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001372 FREE(parser, (void *)parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001373#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001374 FREE(parser, parser->m_groupConnector);
1375 FREE(parser, parser->m_buffer);
1376 FREE(parser, parser->m_dataBuf);
1377 FREE(parser, parser->m_nsAtts);
1378 FREE(parser, parser->m_unknownEncodingMem);
1379 if (parser->m_unknownEncodingRelease)
1380 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1381 FREE(parser, parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001382}
1383
Fred Drake08317ae2003-10-21 15:38:55 +00001384void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001385XML_UseParserAsHandlerArg(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001386 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001387 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001388}
1389
Fred Drake08317ae2003-10-21 15:38:55 +00001390enum XML_Error XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001391XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001392 if (parser == NULL)
1393 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001394#ifdef XML_DTD
1395 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001396 if (parser->m_parsingStatus.parsing == XML_PARSING
1397 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001398 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001399 parser->m_useForeignDTD = useDTD;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001400 return XML_ERROR_NONE;
1401#else
1402 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1403#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001404}
1405
Fred Drake08317ae2003-10-21 15:38:55 +00001406void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001407XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001408 if (parser == NULL)
1409 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001410 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001411 if (parser->m_parsingStatus.parsing == XML_PARSING
1412 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001413 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001414 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001415}
1416
Fred Drake08317ae2003-10-21 15:38:55 +00001417void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001418XML_SetUserData(XML_Parser parser, void *p) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001419 if (parser == NULL)
1420 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001421 if (parser->m_handlerArg == parser->m_userData)
1422 parser->m_handlerArg = parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001423 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001424 parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001425}
1426
Fred Drake08317ae2003-10-21 15:38:55 +00001427enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001428XML_SetBase(XML_Parser parser, const XML_Char *p) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001429 if (parser == NULL)
1430 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001431 if (p) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001432 p = poolCopyString(&parser->m_dtd->pool, p);
Benjamin Peterson52b94082019-09-25 21:33:58 -07001433 if (! p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001434 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001435 parser->m_curBase = p;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001436 } else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001437 parser->m_curBase = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001438 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001439}
1440
Benjamin Peterson52b94082019-09-25 21:33:58 -07001441const XML_Char *XMLCALL
1442XML_GetBase(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001443 if (parser == NULL)
1444 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001445 return parser->m_curBase;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001446}
1447
Fred Drake08317ae2003-10-21 15:38:55 +00001448int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001449XML_GetSpecifiedAttributeCount(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001450 if (parser == NULL)
1451 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001452 return parser->m_nSpecifiedAtts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001453}
1454
Fred Drake08317ae2003-10-21 15:38:55 +00001455int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001456XML_GetIdAttributeIndex(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001457 if (parser == NULL)
1458 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001459 return parser->m_idAttIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001460}
1461
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001462#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07001463const XML_AttrInfo *XMLCALL
1464XML_GetAttributeInfo(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001465 if (parser == NULL)
1466 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001467 return parser->m_attInfo;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001468}
1469#endif
1470
Fred Drake08317ae2003-10-21 15:38:55 +00001471void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001472XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1473 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001474 if (parser == NULL)
1475 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001476 parser->m_startElementHandler = start;
1477 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001478}
1479
Fred Drake08317ae2003-10-21 15:38:55 +00001480void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001481XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001482 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001483 parser->m_startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001484}
1485
Fred Drake08317ae2003-10-21 15:38:55 +00001486void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001487XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001488 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001489 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001490}
1491
Fred Drake08317ae2003-10-21 15:38:55 +00001492void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001493XML_SetCharacterDataHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001494 XML_CharacterDataHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001495 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001496 parser->m_characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001497}
1498
Fred Drake08317ae2003-10-21 15:38:55 +00001499void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001500XML_SetProcessingInstructionHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001501 XML_ProcessingInstructionHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001502 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001503 parser->m_processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001504}
1505
Fred Drake08317ae2003-10-21 15:38:55 +00001506void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001507XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001508 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001509 parser->m_commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001510}
1511
Fred Drake08317ae2003-10-21 15:38:55 +00001512void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001513XML_SetCdataSectionHandler(XML_Parser parser,
1514 XML_StartCdataSectionHandler start,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001515 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001516 if (parser == NULL)
1517 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001518 parser->m_startCdataSectionHandler = start;
1519 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001520}
1521
Fred Drake08317ae2003-10-21 15:38:55 +00001522void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001523XML_SetStartCdataSectionHandler(XML_Parser parser,
1524 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001525 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001526 parser->m_startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001527}
1528
Fred Drake08317ae2003-10-21 15:38:55 +00001529void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001530XML_SetEndCdataSectionHandler(XML_Parser parser,
1531 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001532 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001533 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001534}
1535
Fred Drake08317ae2003-10-21 15:38:55 +00001536void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001537XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001538 if (parser == NULL)
1539 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001540 parser->m_defaultHandler = handler;
1541 parser->m_defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001542}
1543
Fred Drake08317ae2003-10-21 15:38:55 +00001544void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001545XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001546 if (parser == NULL)
1547 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001548 parser->m_defaultHandler = handler;
1549 parser->m_defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001550}
1551
Fred Drake08317ae2003-10-21 15:38:55 +00001552void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001553XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1554 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001555 if (parser == NULL)
1556 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001557 parser->m_startDoctypeDeclHandler = start;
1558 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001559}
1560
Fred Drake08317ae2003-10-21 15:38:55 +00001561void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001562XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1563 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001564 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001565 parser->m_startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001566}
1567
Fred Drake08317ae2003-10-21 15:38:55 +00001568void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001569XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001570 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001571 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001572}
1573
Fred Drake08317ae2003-10-21 15:38:55 +00001574void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001575XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001576 XML_UnparsedEntityDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001577 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001578 parser->m_unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001579}
1580
Fred Drake08317ae2003-10-21 15:38:55 +00001581void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001582XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001583 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001584 parser->m_notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001585}
1586
Fred Drake08317ae2003-10-21 15:38:55 +00001587void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001588XML_SetNamespaceDeclHandler(XML_Parser parser,
1589 XML_StartNamespaceDeclHandler start,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001590 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001591 if (parser == NULL)
1592 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001593 parser->m_startNamespaceDeclHandler = start;
1594 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001595}
1596
Fred Drake08317ae2003-10-21 15:38:55 +00001597void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001598XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1599 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001600 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001601 parser->m_startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001602}
1603
Fred Drake08317ae2003-10-21 15:38:55 +00001604void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001605XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1606 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001607 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001608 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001609}
1610
Fred Drake08317ae2003-10-21 15:38:55 +00001611void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001612XML_SetNotStandaloneHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001613 XML_NotStandaloneHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001614 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001615 parser->m_notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001616}
1617
Fred Drake08317ae2003-10-21 15:38:55 +00001618void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001619XML_SetExternalEntityRefHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001620 XML_ExternalEntityRefHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001621 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001622 parser->m_externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001623}
1624
Fred Drake08317ae2003-10-21 15:38:55 +00001625void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001626XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001627 if (parser == NULL)
1628 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001629 if (arg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001630 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001631 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001632 parser->m_externalEntityRefHandlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001633}
1634
Fred Drake08317ae2003-10-21 15:38:55 +00001635void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001636XML_SetSkippedEntityHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001637 XML_SkippedEntityHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001638 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001639 parser->m_skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001640}
1641
Fred Drake08317ae2003-10-21 15:38:55 +00001642void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001643XML_SetUnknownEncodingHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001644 XML_UnknownEncodingHandler handler, void *data) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001645 if (parser == NULL)
1646 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001647 parser->m_unknownEncodingHandler = handler;
1648 parser->m_unknownEncodingHandlerData = data;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001649}
1650
Fred Drake08317ae2003-10-21 15:38:55 +00001651void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001652XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001653 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001654 parser->m_elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001655}
1656
Fred Drake08317ae2003-10-21 15:38:55 +00001657void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001658XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001659 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001660 parser->m_attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001661}
1662
Fred Drake08317ae2003-10-21 15:38:55 +00001663void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001664XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001665 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001666 parser->m_entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001667}
1668
Fred Drake08317ae2003-10-21 15:38:55 +00001669void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001670XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001671 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001672 parser->m_xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001673}
1674
Fred Drake08317ae2003-10-21 15:38:55 +00001675int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001676XML_SetParamEntityParsing(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001677 enum XML_ParamEntityParsing peParsing) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001678 if (parser == NULL)
1679 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001680 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001681 if (parser->m_parsingStatus.parsing == XML_PARSING
1682 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001683 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001684#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001685 parser->m_paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001686 return 1;
1687#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001688 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001689#endif
1690}
1691
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001692int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001693XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001694 if (parser == NULL)
1695 return 0;
1696 if (parser->m_parentParser)
1697 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001698 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001699 if (parser->m_parsingStatus.parsing == XML_PARSING
1700 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001701 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001702 parser->m_hash_secret_salt = hash_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001703 return 1;
1704}
1705
Fred Drake08317ae2003-10-21 15:38:55 +00001706enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001707XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001708 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001709 if (parser != NULL)
1710 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001711 return XML_STATUS_ERROR;
1712 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001713 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001714 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001715 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001716 return XML_STATUS_ERROR;
1717 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001718 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001719 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001720 case XML_INITIALIZED:
Benjamin Peterson52b94082019-09-25 21:33:58 -07001721 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001722 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001723 return XML_STATUS_ERROR;
1724 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001725 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001726 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001727 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001728 }
1729
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001730 if (len == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001731 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001732 if (! isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001733 return XML_STATUS_OK;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001734 parser->m_positionPtr = parser->m_bufferPtr;
1735 parser->m_parseEndPtr = parser->m_bufferEnd;
Fred Drake31d485c2004-08-03 07:06:22 +00001736
1737 /* If data are left over from last buffer, and we now know that these
1738 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001739 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001740 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001741 parser->m_errorCode
1742 = parser->m_processor(parser, parser->m_bufferPtr,
1743 parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001744
Benjamin Peterson4e211002018-06-26 19:25:45 -07001745 if (parser->m_errorCode == XML_ERROR_NONE) {
1746 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001747 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001748 /* It is hard to be certain, but it seems that this case
1749 * cannot occur. This code is cleaning up a previous parse
1750 * with no new data (since len == 0). Changing the parsing
1751 * state requires getting to execute a handler function, and
1752 * there doesn't seem to be an opportunity for that while in
1753 * this circumstance.
1754 *
1755 * Given the uncertainty, we retain the code but exclude it
1756 * from coverage tests.
1757 *
1758 * LCOV_EXCL_START
1759 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001760 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1761 parser->m_bufferPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001762 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001763 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001764 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001765 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001766 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001767 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001768 /* fall through */
1769 default:
1770 return XML_STATUS_OK;
1771 }
1772 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001773 parser->m_eventEndPtr = parser->m_eventPtr;
1774 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001775 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001776 }
1777#ifndef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07001778 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001779 const char *end;
1780 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001781 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001782 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001783 if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07001784 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1785 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1786 parser->m_processor = errorProcessor;
1787 return XML_STATUS_ERROR;
Victor Stinner5ff71322017-06-21 14:39:22 +02001788 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001789 parser->m_parseEndByteIndex += len;
1790 parser->m_positionPtr = s;
1791 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001792
Benjamin Peterson52b94082019-09-25 21:33:58 -07001793 parser->m_errorCode
1794 = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001795
Benjamin Peterson4e211002018-06-26 19:25:45 -07001796 if (parser->m_errorCode != XML_ERROR_NONE) {
1797 parser->m_eventEndPtr = parser->m_eventPtr;
1798 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001799 return XML_STATUS_ERROR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001800 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001801 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001802 case XML_SUSPENDED:
1803 result = XML_STATUS_SUSPENDED;
1804 break;
1805 case XML_INITIALIZED:
1806 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001807 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001808 parser->m_parsingStatus.parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001809 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001810 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001811 /* fall through */
1812 default:
1813 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001814 }
1815 }
1816
Benjamin Peterson52b94082019-09-25 21:33:58 -07001817 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1818 &parser->m_position);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001819 nLeftOver = s + len - end;
1820 if (nLeftOver) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07001821 if (parser->m_buffer == NULL
1822 || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001823 /* avoid _signed_ integer overflow */
1824 char *temp = NULL;
1825 const int bytesToAllocate = (int)((unsigned)len * 2U);
1826 if (bytesToAllocate > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001827 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
Victor Stinner5ff71322017-06-21 14:39:22 +02001828 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001829 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001830 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1831 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1832 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001833 return XML_STATUS_ERROR;
1834 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001835 parser->m_buffer = temp;
1836 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001837 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001838 memcpy(parser->m_buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001839 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001840 parser->m_bufferPtr = parser->m_buffer;
1841 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1842 parser->m_positionPtr = parser->m_bufferPtr;
1843 parser->m_parseEndPtr = parser->m_bufferEnd;
1844 parser->m_eventPtr = parser->m_bufferPtr;
1845 parser->m_eventEndPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001846 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001847 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07001848#endif /* not defined XML_CONTEXT_BYTES */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001849 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001850 void *buff = XML_GetBuffer(parser, len);
1851 if (buff == NULL)
1852 return XML_STATUS_ERROR;
1853 else {
1854 memcpy(buff, s, len);
1855 return XML_ParseBuffer(parser, len, isFinal);
1856 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001857 }
1858}
1859
Fred Drake08317ae2003-10-21 15:38:55 +00001860enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001861XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
Fred Drake31d485c2004-08-03 07:06:22 +00001862 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001863 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001864
Victor Stinner5ff71322017-06-21 14:39:22 +02001865 if (parser == NULL)
1866 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001867 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001868 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001869 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001870 return XML_STATUS_ERROR;
1871 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001872 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001873 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001874 case XML_INITIALIZED:
Benjamin Peterson52b94082019-09-25 21:33:58 -07001875 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001876 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001877 return XML_STATUS_ERROR;
1878 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001879 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001880 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001881 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001882 }
1883
Benjamin Peterson4e211002018-06-26 19:25:45 -07001884 start = parser->m_bufferPtr;
1885 parser->m_positionPtr = start;
1886 parser->m_bufferEnd += len;
1887 parser->m_parseEndPtr = parser->m_bufferEnd;
1888 parser->m_parseEndByteIndex += len;
1889 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001890
Benjamin Peterson52b94082019-09-25 21:33:58 -07001891 parser->m_errorCode = parser->m_processor(
1892 parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001893
Benjamin Peterson4e211002018-06-26 19:25:45 -07001894 if (parser->m_errorCode != XML_ERROR_NONE) {
1895 parser->m_eventEndPtr = parser->m_eventPtr;
1896 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001897 return XML_STATUS_ERROR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001898 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001899 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001900 case XML_SUSPENDED:
1901 result = XML_STATUS_SUSPENDED;
1902 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001903 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001904 case XML_PARSING:
1905 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001906 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001907 return result;
1908 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07001909 default:; /* should not happen */
Fred Drake31d485c2004-08-03 07:06:22 +00001910 }
1911 }
1912
Benjamin Peterson52b94082019-09-25 21:33:58 -07001913 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1914 parser->m_bufferPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001915 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001916 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001917}
1918
Benjamin Peterson52b94082019-09-25 21:33:58 -07001919void *XMLCALL
1920XML_GetBuffer(XML_Parser parser, int len) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001921 if (parser == NULL)
1922 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001923 if (len < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001924 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001925 return NULL;
1926 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001927 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001928 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001929 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001930 return NULL;
1931 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001932 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001933 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001934 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00001935 }
1936
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001937 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001938#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001939 int keep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001940#endif /* defined XML_CONTEXT_BYTES */
Victor Stinner23ec4b52017-06-15 00:54:36 +02001941 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001942 int neededSize = (int)((unsigned)len
1943 + (unsigned)EXPAT_SAFE_PTR_DIFF(
1944 parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001945 if (neededSize < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001946 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001947 return NULL;
1948 }
1949#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001950 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001951 if (keep > XML_CONTEXT_BYTES)
1952 keep = XML_CONTEXT_BYTES;
1953 neededSize += keep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001954#endif /* defined XML_CONTEXT_BYTES */
1955 if (neededSize
1956 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001957#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001958 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07001959 int offset
1960 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
1961 - keep;
1962 /* The buffer pointers cannot be NULL here; we have at least some bytes
1963 * in the buffer */
1964 memmove(parser->m_buffer, &parser->m_buffer[offset],
1965 parser->m_bufferEnd - parser->m_bufferPtr + keep);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001966 parser->m_bufferEnd -= offset;
1967 parser->m_bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001968 }
1969#else
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001970 if (parser->m_buffer && parser->m_bufferPtr) {
1971 memmove(parser->m_buffer, parser->m_bufferPtr,
1972 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson52b94082019-09-25 21:33:58 -07001973 parser->m_bufferEnd
1974 = parser->m_buffer
1975 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001976 parser->m_bufferPtr = parser->m_buffer;
1977 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07001978#endif /* not defined XML_CONTEXT_BYTES */
1979 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001980 char *newBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001981 int bufferSize
1982 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001983 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001984 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001985 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02001986 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001987 bufferSize = (int)(2U * (unsigned)bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001988 } while (bufferSize < neededSize && bufferSize > 0);
1989 if (bufferSize <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001990 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001991 return NULL;
1992 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001993 newBuf = (char *)MALLOC(parser, bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001994 if (newBuf == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001995 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001996 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001997 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001998 parser->m_bufferLim = newBuf + bufferSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001999#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002000 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002001 memcpy(newBuf, &parser->m_bufferPtr[-keep],
Benjamin Peterson52b94082019-09-25 21:33:58 -07002002 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2003 + keep);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002004 FREE(parser, parser->m_buffer);
2005 parser->m_buffer = newBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002006 parser->m_bufferEnd
2007 = parser->m_buffer
2008 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2009 + keep;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002010 parser->m_bufferPtr = parser->m_buffer + keep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002011 } else {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002012 /* This must be a brand new buffer with no data in it yet */
2013 parser->m_bufferEnd = newBuf;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002014 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002015 }
2016#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002017 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002018 memcpy(newBuf, parser->m_bufferPtr,
2019 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson4e211002018-06-26 19:25:45 -07002020 FREE(parser, parser->m_buffer);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002021 parser->m_bufferEnd
2022 = newBuf
2023 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2024 } else {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002025 /* This must be a brand new buffer with no data in it yet */
2026 parser->m_bufferEnd = newBuf;
2027 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002028 parser->m_bufferPtr = parser->m_buffer = newBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002029#endif /* not defined XML_CONTEXT_BYTES */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002030 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002031 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2032 parser->m_positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002033 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002034 return parser->m_bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002035}
2036
Fred Drake31d485c2004-08-03 07:06:22 +00002037enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002038XML_StopParser(XML_Parser parser, XML_Bool resumable) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002039 if (parser == NULL)
2040 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002041 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002042 case XML_SUSPENDED:
2043 if (resumable) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002044 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002045 return XML_STATUS_ERROR;
2046 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002047 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002048 break;
2049 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002050 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002051 return XML_STATUS_ERROR;
2052 default:
2053 if (resumable) {
2054#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07002055 if (parser->m_isParamEntity) {
2056 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
Fred Drake31d485c2004-08-03 07:06:22 +00002057 return XML_STATUS_ERROR;
2058 }
2059#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07002060 parser->m_parsingStatus.parsing = XML_SUSPENDED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002061 } else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002062 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002063 }
2064 return XML_STATUS_OK;
2065}
2066
2067enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002068XML_ResumeParser(XML_Parser parser) {
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002069 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002070
Victor Stinner5ff71322017-06-21 14:39:22 +02002071 if (parser == NULL)
2072 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002073 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2074 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002075 return XML_STATUS_ERROR;
2076 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002077 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002078
Benjamin Peterson52b94082019-09-25 21:33:58 -07002079 parser->m_errorCode = parser->m_processor(
2080 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00002081
Benjamin Peterson4e211002018-06-26 19:25:45 -07002082 if (parser->m_errorCode != XML_ERROR_NONE) {
2083 parser->m_eventEndPtr = parser->m_eventPtr;
2084 parser->m_processor = errorProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00002085 return XML_STATUS_ERROR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002086 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002087 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002088 case XML_SUSPENDED:
2089 result = XML_STATUS_SUSPENDED;
2090 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002091 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002092 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002093 if (parser->m_parsingStatus.finalBuffer) {
2094 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002095 return result;
2096 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002097 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00002098 }
2099 }
2100
Benjamin Peterson52b94082019-09-25 21:33:58 -07002101 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2102 parser->m_bufferPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002103 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002104 return result;
2105}
2106
2107void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002108XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002109 if (parser == NULL)
2110 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002111 assert(status != NULL);
2112 *status = parser->m_parsingStatus;
2113}
2114
Fred Drake08317ae2003-10-21 15:38:55 +00002115enum XML_Error XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002116XML_GetErrorCode(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002117 if (parser == NULL)
2118 return XML_ERROR_INVALID_ARGUMENT;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002119 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002120}
2121
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002122XML_Index XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002123XML_GetCurrentByteIndex(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002124 if (parser == NULL)
2125 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002126 if (parser->m_eventPtr)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002127 return (XML_Index)(parser->m_parseEndByteIndex
2128 - (parser->m_parseEndPtr - parser->m_eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002129 return -1;
2130}
2131
Fred Drake08317ae2003-10-21 15:38:55 +00002132int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002133XML_GetCurrentByteCount(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002134 if (parser == NULL)
2135 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002136 if (parser->m_eventEndPtr && parser->m_eventPtr)
2137 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002138 return 0;
2139}
2140
Benjamin Peterson52b94082019-09-25 21:33:58 -07002141const char *XMLCALL
2142XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002143#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002144 if (parser == NULL)
2145 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002146 if (parser->m_eventPtr && parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002147 if (offset != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002148 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
Victor Stinner5ff71322017-06-21 14:39:22 +02002149 if (size != NULL)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002150 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002151 return parser->m_buffer;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002152 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002153#else
2154 (void)parser;
2155 (void)offset;
2156 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002157#endif /* defined XML_CONTEXT_BYTES */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002158 return (char *)0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002159}
2160
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002161XML_Size XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002162XML_GetCurrentLineNumber(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002163 if (parser == NULL)
2164 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002165 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002166 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2167 parser->m_eventPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002168 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002169 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002170 return parser->m_position.lineNumber + 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002171}
2172
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002173XML_Size XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002174XML_GetCurrentColumnNumber(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002175 if (parser == NULL)
2176 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002177 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002178 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2179 parser->m_eventPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002180 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002181 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002182 return parser->m_position.columnNumber;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002183}
2184
Fred Drake08317ae2003-10-21 15:38:55 +00002185void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002186XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002187 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002188 FREE(parser, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002189}
2190
Benjamin Peterson52b94082019-09-25 21:33:58 -07002191void *XMLCALL
2192XML_MemMalloc(XML_Parser parser, size_t size) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002193 if (parser == NULL)
2194 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002195 return MALLOC(parser, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002196}
2197
Benjamin Peterson52b94082019-09-25 21:33:58 -07002198void *XMLCALL
2199XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002200 if (parser == NULL)
2201 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002202 return REALLOC(parser, ptr, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002203}
2204
Fred Drake08317ae2003-10-21 15:38:55 +00002205void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002206XML_MemFree(XML_Parser parser, void *ptr) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002207 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002208 FREE(parser, ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002209}
2210
Fred Drake08317ae2003-10-21 15:38:55 +00002211void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002212XML_DefaultCurrent(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002213 if (parser == NULL)
2214 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002215 if (parser->m_defaultHandler) {
2216 if (parser->m_openInternalEntities)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002217 reportDefault(parser, parser->m_internalEncoding,
Benjamin Peterson4e211002018-06-26 19:25:45 -07002218 parser->m_openInternalEntities->internalEventPtr,
2219 parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002220 else
Benjamin Peterson52b94082019-09-25 21:33:58 -07002221 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2222 parser->m_eventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002223 }
2224}
2225
Benjamin Peterson52b94082019-09-25 21:33:58 -07002226const XML_LChar *XMLCALL
2227XML_ErrorString(enum XML_Error code) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002228 switch (code) {
2229 case XML_ERROR_NONE:
2230 return NULL;
2231 case XML_ERROR_NO_MEMORY:
2232 return XML_L("out of memory");
2233 case XML_ERROR_SYNTAX:
2234 return XML_L("syntax error");
2235 case XML_ERROR_NO_ELEMENTS:
2236 return XML_L("no element found");
2237 case XML_ERROR_INVALID_TOKEN:
2238 return XML_L("not well-formed (invalid token)");
2239 case XML_ERROR_UNCLOSED_TOKEN:
2240 return XML_L("unclosed token");
2241 case XML_ERROR_PARTIAL_CHAR:
2242 return XML_L("partial character");
2243 case XML_ERROR_TAG_MISMATCH:
2244 return XML_L("mismatched tag");
2245 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2246 return XML_L("duplicate attribute");
2247 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2248 return XML_L("junk after document element");
2249 case XML_ERROR_PARAM_ENTITY_REF:
2250 return XML_L("illegal parameter entity reference");
2251 case XML_ERROR_UNDEFINED_ENTITY:
2252 return XML_L("undefined entity");
2253 case XML_ERROR_RECURSIVE_ENTITY_REF:
2254 return XML_L("recursive entity reference");
2255 case XML_ERROR_ASYNC_ENTITY:
2256 return XML_L("asynchronous entity");
2257 case XML_ERROR_BAD_CHAR_REF:
2258 return XML_L("reference to invalid character number");
2259 case XML_ERROR_BINARY_ENTITY_REF:
2260 return XML_L("reference to binary entity");
2261 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2262 return XML_L("reference to external entity in attribute");
2263 case XML_ERROR_MISPLACED_XML_PI:
2264 return XML_L("XML or text declaration not at start of entity");
2265 case XML_ERROR_UNKNOWN_ENCODING:
2266 return XML_L("unknown encoding");
2267 case XML_ERROR_INCORRECT_ENCODING:
2268 return XML_L("encoding specified in XML declaration is incorrect");
2269 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2270 return XML_L("unclosed CDATA section");
2271 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2272 return XML_L("error in processing external entity reference");
2273 case XML_ERROR_NOT_STANDALONE:
2274 return XML_L("document is not standalone");
2275 case XML_ERROR_UNEXPECTED_STATE:
2276 return XML_L("unexpected parser state - please send a bug report");
2277 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2278 return XML_L("entity declared in parameter entity");
2279 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2280 return XML_L("requested feature requires XML_DTD support in Expat");
2281 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2282 return XML_L("cannot change setting once parsing has begun");
2283 /* Added in 1.95.7. */
2284 case XML_ERROR_UNBOUND_PREFIX:
2285 return XML_L("unbound prefix");
2286 /* Added in 1.95.8. */
2287 case XML_ERROR_UNDECLARING_PREFIX:
2288 return XML_L("must not undeclare prefix");
2289 case XML_ERROR_INCOMPLETE_PE:
2290 return XML_L("incomplete markup in parameter entity");
2291 case XML_ERROR_XML_DECL:
2292 return XML_L("XML declaration not well-formed");
2293 case XML_ERROR_TEXT_DECL:
2294 return XML_L("text declaration not well-formed");
2295 case XML_ERROR_PUBLICID:
2296 return XML_L("illegal character(s) in public id");
2297 case XML_ERROR_SUSPENDED:
2298 return XML_L("parser suspended");
2299 case XML_ERROR_NOT_SUSPENDED:
2300 return XML_L("parser not suspended");
2301 case XML_ERROR_ABORTED:
2302 return XML_L("parsing aborted");
2303 case XML_ERROR_FINISHED:
2304 return XML_L("parsing finished");
2305 case XML_ERROR_SUSPEND_PE:
2306 return XML_L("cannot suspend in external parameter entity");
2307 /* Added in 2.0.0. */
2308 case XML_ERROR_RESERVED_PREFIX_XML:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002309 return XML_L(
2310 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
Benjamin Peterson4e211002018-06-26 19:25:45 -07002311 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2312 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2313 case XML_ERROR_RESERVED_NAMESPACE_URI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002314 return XML_L(
2315 "prefix must not be bound to one of the reserved namespace names");
Benjamin Peterson4e211002018-06-26 19:25:45 -07002316 /* Added in 2.2.5. */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002317 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002318 return XML_L("invalid argument");
2319 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002320 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002321}
2322
Benjamin Peterson52b94082019-09-25 21:33:58 -07002323const XML_LChar *XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002324XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002325 /* V1 is used to string-ize the version number. However, it would
2326 string-ize the actual version macro *names* unless we get them
2327 substituted before being passed to V1. CPP is defined to expand
2328 a macro, then rescan for more expansions. Thus, we use V2 to expand
2329 the version macros, then CPP will expand the resulting V1() macro
2330 with the correct numerals. */
2331 /* ### I'm assuming cpp is portable in this respect... */
2332
Benjamin Peterson52b94082019-09-25 21:33:58 -07002333#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2334#define V2(a, b, c) XML_L("expat_") V1(a, b, c)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002335
2336 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2337
2338#undef V1
2339#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002340}
2341
Fred Drake08317ae2003-10-21 15:38:55 +00002342XML_Expat_Version XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002343XML_ExpatVersionInfo(void) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002344 XML_Expat_Version version;
2345
2346 version.major = XML_MAJOR_VERSION;
2347 version.minor = XML_MINOR_VERSION;
2348 version.micro = XML_MICRO_VERSION;
2349
2350 return version;
2351}
2352
Benjamin Peterson52b94082019-09-25 21:33:58 -07002353const XML_Feature *XMLCALL
2354XML_GetFeatureList(void) {
2355 static const XML_Feature features[]
2356 = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2357 sizeof(XML_Char)},
2358 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2359 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002360#ifdef XML_UNICODE
Benjamin Peterson52b94082019-09-25 21:33:58 -07002361 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002362#endif
2363#ifdef XML_UNICODE_WCHAR_T
Benjamin Peterson52b94082019-09-25 21:33:58 -07002364 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002365#endif
2366#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07002367 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002368#endif
2369#ifdef XML_CONTEXT_BYTES
Benjamin Peterson52b94082019-09-25 21:33:58 -07002370 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2371 XML_CONTEXT_BYTES},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002372#endif
2373#ifdef XML_MIN_SIZE
Benjamin Peterson52b94082019-09-25 21:33:58 -07002374 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002375#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002376#ifdef XML_NS
Benjamin Peterson52b94082019-09-25 21:33:58 -07002377 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002378#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002379#ifdef XML_LARGE_SIZE
Benjamin Peterson52b94082019-09-25 21:33:58 -07002380 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002381#endif
2382#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07002383 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002384#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -07002385 {XML_FEATURE_END, NULL, 0}};
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002386
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002387 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002388}
2389
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002390/* Initially tag->rawName always points into the parse buffer;
2391 for those TAG instances opened while the current parse buffer was
2392 processed, and not yet closed, we need to store tag->rawName in a more
2393 permanent location, since the parse buffer is about to be discarded.
2394*/
2395static XML_Bool
Benjamin Peterson52b94082019-09-25 21:33:58 -07002396storeRawNames(XML_Parser parser) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002397 TAG *tag = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002398 while (tag) {
2399 int bufSize;
2400 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2401 char *rawNameBuf = tag->buf + nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002402 /* Stop if already stored. Since m_tagStack is a stack, we can stop
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002403 at the first entry that has already been copied; everything
2404 below it in the stack is already been accounted for in a
2405 previous call to this function.
2406 */
2407 if (tag->rawName == rawNameBuf)
2408 break;
2409 /* For re-use purposes we need to ensure that the
2410 size of tag->buf is a multiple of sizeof(XML_Char).
2411 */
2412 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2413 if (bufSize > tag->bufEnd - tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002414 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002415 if (temp == NULL)
2416 return XML_FALSE;
2417 /* if tag->name.str points to tag->buf (only when namespace
2418 processing is off) then we have to update it
2419 */
2420 if (tag->name.str == (XML_Char *)tag->buf)
2421 tag->name.str = (XML_Char *)temp;
2422 /* if tag->name.localPart is set (when namespace processing is on)
2423 then update it as well, since it will always point into tag->buf
2424 */
2425 if (tag->name.localPart)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002426 tag->name.localPart
2427 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002428 tag->buf = temp;
2429 tag->bufEnd = temp + bufSize;
2430 rawNameBuf = temp + nameLen;
2431 }
2432 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2433 tag->rawName = rawNameBuf;
2434 tag = tag->parent;
2435 }
2436 return XML_TRUE;
2437}
2438
2439static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002440contentProcessor(XML_Parser parser, const char *start, const char *end,
2441 const char **endPtr) {
2442 enum XML_Error result
2443 = doContent(parser, 0, parser->m_encoding, start, end, endPtr,
2444 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002445 if (result == XML_ERROR_NONE) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002446 if (! storeRawNames(parser))
Fred Drake31d485c2004-08-03 07:06:22 +00002447 return XML_ERROR_NO_MEMORY;
2448 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002449 return result;
2450}
2451
2452static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002453externalEntityInitProcessor(XML_Parser parser, const char *start,
2454 const char *end, const char **endPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002455 enum XML_Error result = initializeEncoding(parser);
2456 if (result != XML_ERROR_NONE)
2457 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002458 parser->m_processor = externalEntityInitProcessor2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002459 return externalEntityInitProcessor2(parser, start, end, endPtr);
2460}
2461
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002462static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002463externalEntityInitProcessor2(XML_Parser parser, const char *start,
2464 const char *end, const char **endPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002465 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002466 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002467 switch (tok) {
2468 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002469 /* If we are at the end of the buffer, this would cause the next stage,
2470 i.e. externalEntityInitProcessor3, to pass control directly to
2471 doContent (by detecting XML_TOK_NONE) without processing any xml text
2472 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2473 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002474 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002475 *endPtr = next;
2476 return XML_ERROR_NONE;
2477 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002478 start = next;
2479 break;
2480 case XML_TOK_PARTIAL:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002481 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002482 *endPtr = start;
2483 return XML_ERROR_NONE;
2484 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002485 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002486 return XML_ERROR_UNCLOSED_TOKEN;
2487 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002488 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002489 *endPtr = start;
2490 return XML_ERROR_NONE;
2491 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002492 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002493 return XML_ERROR_PARTIAL_CHAR;
2494 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002495 parser->m_processor = externalEntityInitProcessor3;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002496 return externalEntityInitProcessor3(parser, start, end, endPtr);
2497}
2498
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002499static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002500externalEntityInitProcessor3(XML_Parser parser, const char *start,
2501 const char *end, const char **endPtr) {
Fred Drake31d485c2004-08-03 07:06:22 +00002502 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002503 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002504 parser->m_eventPtr = start;
2505 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2506 parser->m_eventEndPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00002507
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002508 switch (tok) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002509 case XML_TOK_XML_DECL: {
2510 enum XML_Error result;
2511 result = processXmlDecl(parser, 1, start, next);
2512 if (result != XML_ERROR_NONE)
2513 return result;
2514 switch (parser->m_parsingStatus.parsing) {
2515 case XML_SUSPENDED:
2516 *endPtr = next;
2517 return XML_ERROR_NONE;
2518 case XML_FINISHED:
2519 return XML_ERROR_ABORTED;
2520 default:
2521 start = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002522 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002523 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002524 case XML_TOK_PARTIAL:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002525 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002526 *endPtr = start;
2527 return XML_ERROR_NONE;
2528 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002529 return XML_ERROR_UNCLOSED_TOKEN;
2530 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002531 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002532 *endPtr = start;
2533 return XML_ERROR_NONE;
2534 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002535 return XML_ERROR_PARTIAL_CHAR;
2536 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002537 parser->m_processor = externalEntityContentProcessor;
2538 parser->m_tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002539 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002540}
2541
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002542static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002543externalEntityContentProcessor(XML_Parser parser, const char *start,
2544 const char *end, const char **endPtr) {
2545 enum XML_Error result
2546 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2547 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002548 if (result == XML_ERROR_NONE) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002549 if (! storeRawNames(parser))
Fred Drake31d485c2004-08-03 07:06:22 +00002550 return XML_ERROR_NO_MEMORY;
2551 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002552 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002553}
2554
2555static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07002556doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2557 const char *s, const char *end, const char **nextPtr,
2558 XML_Bool haveMore) {
Fred Drake31d485c2004-08-03 07:06:22 +00002559 /* save one level of indirection */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002560 DTD *const dtd = parser->m_dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002561
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002562 const char **eventPP;
2563 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002564 if (enc == parser->m_encoding) {
2565 eventPP = &parser->m_eventPtr;
2566 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002567 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002568 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2569 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002570 }
2571 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002572
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002573 for (;;) {
2574 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2575 int tok = XmlContentTok(enc, s, end, &next);
2576 *eventEndPP = next;
2577 switch (tok) {
2578 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002579 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002580 *nextPtr = s;
2581 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002582 }
2583 *eventEndPP = end;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002584 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002585 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002586 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002587 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002588 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002589 /* We are at the end of the final buffer, should we check for
2590 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002591 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002592 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002593 return XML_ERROR_NO_ELEMENTS;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002594 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002595 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002596 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002597 return XML_ERROR_NONE;
2598 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002599 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002600 *nextPtr = s;
2601 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002602 }
2603 if (startTagLevel > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002604 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002605 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002606 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002607 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002608 }
2609 return XML_ERROR_NO_ELEMENTS;
2610 case XML_TOK_INVALID:
2611 *eventPP = next;
2612 return XML_ERROR_INVALID_TOKEN;
2613 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002614 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002615 *nextPtr = s;
2616 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002617 }
2618 return XML_ERROR_UNCLOSED_TOKEN;
2619 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002620 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002621 *nextPtr = s;
2622 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002623 }
2624 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002625 case XML_TOK_ENTITY_REF: {
2626 const XML_Char *name;
2627 ENTITY *entity;
2628 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2629 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2630 if (ch) {
2631 if (parser->m_characterDataHandler)
2632 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002633 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002634 reportDefault(parser, enc, s, next);
2635 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002636 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002637 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2638 next - enc->minBytesPerChar);
2639 if (! name)
2640 return XML_ERROR_NO_MEMORY;
2641 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2642 poolDiscard(&dtd->pool);
2643 /* First, determine if a check for an existing declaration is needed;
2644 if yes, check that the entity exists, and that it is internal,
2645 otherwise call the skipped entity or default handler.
2646 */
2647 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2648 if (! entity)
2649 return XML_ERROR_UNDEFINED_ENTITY;
2650 else if (! entity->is_internal)
2651 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2652 } else if (! entity) {
2653 if (parser->m_skippedEntityHandler)
2654 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2655 else if (parser->m_defaultHandler)
2656 reportDefault(parser, enc, s, next);
2657 break;
2658 }
2659 if (entity->open)
2660 return XML_ERROR_RECURSIVE_ENTITY_REF;
2661 if (entity->notation)
2662 return XML_ERROR_BINARY_ENTITY_REF;
2663 if (entity->textPtr) {
2664 enum XML_Error result;
2665 if (! parser->m_defaultExpandInternalEntities) {
2666 if (parser->m_skippedEntityHandler)
2667 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2668 0);
2669 else if (parser->m_defaultHandler)
2670 reportDefault(parser, enc, s, next);
2671 break;
2672 }
2673 result = processInternalEntity(parser, entity, XML_FALSE);
2674 if (result != XML_ERROR_NONE)
2675 return result;
2676 } else if (parser->m_externalEntityRefHandler) {
2677 const XML_Char *context;
2678 entity->open = XML_TRUE;
2679 context = getContext(parser);
2680 entity->open = XML_FALSE;
2681 if (! context)
2682 return XML_ERROR_NO_MEMORY;
2683 if (! parser->m_externalEntityRefHandler(
2684 parser->m_externalEntityRefHandlerArg, context, entity->base,
2685 entity->systemId, entity->publicId))
2686 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2687 poolDiscard(&parser->m_tempPool);
2688 } else if (parser->m_defaultHandler)
2689 reportDefault(parser, enc, s, next);
2690 break;
2691 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002692 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002693 /* fall through */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002694 case XML_TOK_START_TAG_WITH_ATTS: {
2695 TAG *tag;
2696 enum XML_Error result;
2697 XML_Char *toPtr;
2698 if (parser->m_freeTagList) {
2699 tag = parser->m_freeTagList;
2700 parser->m_freeTagList = parser->m_freeTagList->parent;
2701 } else {
2702 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2703 if (! tag)
2704 return XML_ERROR_NO_MEMORY;
2705 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2706 if (! tag->buf) {
2707 FREE(parser, tag);
2708 return XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002709 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002710 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002711 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002712 tag->bindings = NULL;
2713 tag->parent = parser->m_tagStack;
2714 parser->m_tagStack = tag;
2715 tag->name.localPart = NULL;
2716 tag->name.prefix = NULL;
2717 tag->rawName = s + enc->minBytesPerChar;
2718 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2719 ++parser->m_tagLevel;
2720 {
2721 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2722 const char *fromPtr = tag->rawName;
2723 toPtr = (XML_Char *)tag->buf;
2724 for (;;) {
2725 int bufSize;
2726 int convLen;
2727 const enum XML_Convert_Result convert_res
2728 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2729 (ICHAR *)tag->bufEnd - 1);
2730 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2731 if ((fromPtr >= rawNameEnd)
2732 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2733 tag->name.strLen = convLen;
2734 break;
2735 }
2736 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2737 {
2738 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2739 if (temp == NULL)
2740 return XML_ERROR_NO_MEMORY;
2741 tag->buf = temp;
2742 tag->bufEnd = temp + bufSize;
2743 toPtr = (XML_Char *)temp + convLen;
2744 }
2745 }
2746 }
2747 tag->name.str = (XML_Char *)tag->buf;
2748 *toPtr = XML_T('\0');
2749 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2750 if (result)
2751 return result;
2752 if (parser->m_startElementHandler)
2753 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2754 (const XML_Char **)parser->m_atts);
2755 else if (parser->m_defaultHandler)
2756 reportDefault(parser, enc, s, next);
2757 poolClear(&parser->m_tempPool);
2758 break;
2759 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002760 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002761 /* fall through */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002762 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2763 const char *rawName = s + enc->minBytesPerChar;
2764 enum XML_Error result;
2765 BINDING *bindings = NULL;
2766 XML_Bool noElmHandlers = XML_TRUE;
2767 TAG_NAME name;
2768 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2769 rawName + XmlNameLength(enc, rawName));
2770 if (! name.str)
2771 return XML_ERROR_NO_MEMORY;
2772 poolFinish(&parser->m_tempPool);
2773 result = storeAtts(parser, enc, s, &name, &bindings);
2774 if (result != XML_ERROR_NONE) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002775 freeBindings(parser, bindings);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002776 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002777 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002778 poolFinish(&parser->m_tempPool);
2779 if (parser->m_startElementHandler) {
2780 parser->m_startElementHandler(parser->m_handlerArg, name.str,
2781 (const XML_Char **)parser->m_atts);
2782 noElmHandlers = XML_FALSE;
2783 }
2784 if (parser->m_endElementHandler) {
2785 if (parser->m_startElementHandler)
2786 *eventPP = *eventEndPP;
2787 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2788 noElmHandlers = XML_FALSE;
2789 }
2790 if (noElmHandlers && parser->m_defaultHandler)
2791 reportDefault(parser, enc, s, next);
2792 poolClear(&parser->m_tempPool);
2793 freeBindings(parser, bindings);
2794 }
2795 if ((parser->m_tagLevel == 0)
2796 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002797 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2798 parser->m_processor = epilogProcessor;
2799 else
2800 return epilogProcessor(parser, next, end, nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002801 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002802 break;
2803 case XML_TOK_END_TAG:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002804 if (parser->m_tagLevel == startTagLevel)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002805 return XML_ERROR_ASYNC_ENTITY;
2806 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002807 int len;
2808 const char *rawName;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002809 TAG *tag = parser->m_tagStack;
2810 parser->m_tagStack = tag->parent;
2811 tag->parent = parser->m_freeTagList;
2812 parser->m_freeTagList = tag;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002813 rawName = s + enc->minBytesPerChar * 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002814 len = XmlNameLength(enc, rawName);
2815 if (len != tag->rawNameLength
2816 || memcmp(tag->rawName, rawName, len) != 0) {
2817 *eventPP = rawName;
2818 return XML_ERROR_TAG_MISMATCH;
2819 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002820 --parser->m_tagLevel;
2821 if (parser->m_endElementHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002822 const XML_Char *localPart;
2823 const XML_Char *prefix;
2824 XML_Char *uri;
2825 localPart = tag->name.localPart;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002826 if (parser->m_ns && localPart) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002827 /* localPart and prefix may have been overwritten in
2828 tag->name.str, since this points to the binding->uri
2829 buffer which gets re-used; so we have to add them again
2830 */
2831 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2832 /* don't need to check for space - already done in storeAtts() */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002833 while (*localPart)
2834 *uri++ = *localPart++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002835 prefix = (XML_Char *)tag->name.prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002836 if (parser->m_ns_triplets && prefix) {
2837 *uri++ = parser->m_namespaceSeparator;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002838 while (*prefix)
2839 *uri++ = *prefix++;
2840 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002841 *uri = XML_T('\0');
2842 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002843 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002844 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002845 reportDefault(parser, enc, s, next);
2846 while (tag->bindings) {
2847 BINDING *b = tag->bindings;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002848 if (parser->m_endNamespaceDeclHandler)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002849 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
2850 b->prefix->name);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002851 tag->bindings = tag->bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002852 b->nextTagBinding = parser->m_freeBindingList;
2853 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002854 b->prefix->binding = b->prevPrefixBinding;
2855 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002856 if ((parser->m_tagLevel == 0)
2857 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
2858 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2859 parser->m_processor = epilogProcessor;
2860 else
2861 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002862 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002863 }
2864 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002865 case XML_TOK_CHAR_REF: {
2866 int n = XmlCharRefNumber(enc, s);
2867 if (n < 0)
2868 return XML_ERROR_BAD_CHAR_REF;
2869 if (parser->m_characterDataHandler) {
2870 XML_Char buf[XML_ENCODE_MAX];
2871 parser->m_characterDataHandler(parser->m_handlerArg, buf,
2872 XmlEncode(n, (ICHAR *)buf));
2873 } else if (parser->m_defaultHandler)
2874 reportDefault(parser, enc, s, next);
2875 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002876 case XML_TOK_XML_DECL:
2877 return XML_ERROR_MISPLACED_XML_PI;
2878 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002879 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002880 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002881 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002882 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002883 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002884 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002885 case XML_TOK_CDATA_SECT_OPEN: {
2886 enum XML_Error result;
2887 if (parser->m_startCdataSectionHandler)
2888 parser->m_startCdataSectionHandler(parser->m_handlerArg);
2889 /* BEGIN disabled code */
2890 /* Suppose you doing a transformation on a document that involves
2891 changing only the character data. You set up a defaultHandler
2892 and a characterDataHandler. The defaultHandler simply copies
2893 characters through. The characterDataHandler does the
2894 transformation and writes the characters out escaping them as
2895 necessary. This case will fail to work if we leave out the
2896 following two lines (because & and < inside CDATA sections will
2897 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002898
Benjamin Peterson52b94082019-09-25 21:33:58 -07002899 However, now we have a start/endCdataSectionHandler, so it seems
2900 easier to let the user deal with this.
2901 */
2902 else if (0 && parser->m_characterDataHandler)
2903 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
2904 0);
2905 /* END disabled code */
2906 else if (parser->m_defaultHandler)
2907 reportDefault(parser, enc, s, next);
2908 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
2909 if (result != XML_ERROR_NONE)
2910 return result;
2911 else if (! next) {
2912 parser->m_processor = cdataSectionProcessor;
2913 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002914 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002915 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002916 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00002917 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002918 *nextPtr = s;
2919 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002920 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002921 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002922 if (MUST_CONVERT(enc, s)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002923 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
2924 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002925 parser->m_characterDataHandler(
2926 parser->m_handlerArg, parser->m_dataBuf,
2927 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
2928 } else
2929 parser->m_characterDataHandler(
2930 parser->m_handlerArg, (XML_Char *)s,
2931 (int)((XML_Char *)end - (XML_Char *)s));
2932 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002933 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002934 /* We are at the end of the final buffer, should we check for
2935 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002936 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002937 if (startTagLevel == 0) {
2938 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002939 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002940 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002941 if (parser->m_tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002942 *eventPP = end;
2943 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002944 }
Fred Drake31d485c2004-08-03 07:06:22 +00002945 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002946 return XML_ERROR_NONE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002947 case XML_TOK_DATA_CHARS: {
2948 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
2949 if (charDataHandler) {
2950 if (MUST_CONVERT(enc, s)) {
2951 for (;;) {
2952 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
2953 const enum XML_Convert_Result convert_res = XmlConvert(
2954 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
2955 *eventEndPP = s;
2956 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
2957 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
2958 if ((convert_res == XML_CONVERT_COMPLETED)
2959 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
2960 break;
2961 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002962 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002963 } else
2964 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
2965 (int)((XML_Char *)next - (XML_Char *)s));
2966 } else if (parser->m_defaultHandler)
2967 reportDefault(parser, enc, s, next);
2968 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002969 case XML_TOK_PI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002970 if (! reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002971 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002972 break;
2973 case XML_TOK_COMMENT:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002974 if (! reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002975 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002976 break;
2977 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02002978 /* All of the tokens produced by XmlContentTok() have their own
2979 * explicit cases, so this default is not strictly necessary.
2980 * However it is a useful safety net, so we retain the code and
2981 * simply exclude it from the coverage tests.
2982 *
2983 * LCOV_EXCL_START
2984 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002985 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002986 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002987 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02002988 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002989 }
2990 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002991 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002992 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002993 *nextPtr = next;
2994 return XML_ERROR_NONE;
2995 case XML_FINISHED:
2996 return XML_ERROR_ABORTED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002997 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00002998 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002999 }
3000 /* not reached */
3001}
3002
Victor Stinner5ff71322017-06-21 14:39:22 +02003003/* This function does not call free() on the allocated memory, merely
Benjamin Peterson4e211002018-06-26 19:25:45 -07003004 * moving it to the parser's m_freeBindingList where it can be freed or
Victor Stinner5ff71322017-06-21 14:39:22 +02003005 * reused as appropriate.
3006 */
3007static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07003008freeBindings(XML_Parser parser, BINDING *bindings) {
Victor Stinner5ff71322017-06-21 14:39:22 +02003009 while (bindings) {
3010 BINDING *b = bindings;
3011
Benjamin Peterson4e211002018-06-26 19:25:45 -07003012 /* m_startNamespaceDeclHandler will have been called for this
Victor Stinner5ff71322017-06-21 14:39:22 +02003013 * binding in addBindings(), so call the end handler now.
3014 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003015 if (parser->m_endNamespaceDeclHandler)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003016 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Victor Stinner5ff71322017-06-21 14:39:22 +02003017
3018 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003019 b->nextTagBinding = parser->m_freeBindingList;
3020 parser->m_freeBindingList = b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003021 b->prefix->binding = b->prevPrefixBinding;
3022 }
3023}
3024
Fred Drake4faea012003-01-28 06:42:40 +00003025/* Precondition: all arguments must be non-NULL;
3026 Purpose:
3027 - normalize attributes
3028 - check attributes for well-formedness
3029 - generate namespace aware attribute names (URI, prefix)
3030 - build list of attributes for startElementHandler
3031 - default attributes
3032 - process namespace declarations (check and report them)
3033 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003034*/
3035static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003036storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3037 TAG_NAME *tagNamePtr, BINDING **bindingsPtr) {
3038 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003039 ELEMENT_TYPE *elementType;
3040 int nDefaultAtts;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003041 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003042 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003043 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003044 int i;
3045 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003046 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003047 int nPrefixes = 0;
3048 BINDING *binding;
3049 const XML_Char *localPart;
3050
3051 /* lookup the element type name */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003052 elementType
3053 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3054 if (! elementType) {
Fred Drake4faea012003-01-28 06:42:40 +00003055 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003056 if (! name)
Fred Drake4faea012003-01-28 06:42:40 +00003057 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003058 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003059 sizeof(ELEMENT_TYPE));
Benjamin Peterson52b94082019-09-25 21:33:58 -07003060 if (! elementType)
Fred Drake4faea012003-01-28 06:42:40 +00003061 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003062 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
Fred Drake4faea012003-01-28 06:42:40 +00003063 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003064 }
Fred Drake4faea012003-01-28 06:42:40 +00003065 nDefaultAtts = elementType->nDefaultAtts;
3066
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003067 /* get the attributes from the tokenizer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003068 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3069 if (n + nDefaultAtts > parser->m_attsSize) {
3070 int oldAttsSize = parser->m_attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003071 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003072#ifdef XML_ATTR_INFO
3073 XML_AttrInfo *temp2;
3074#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003075 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003076 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3077 parser->m_attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003078 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003079 parser->m_attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003080 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003081 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003082 parser->m_atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003083#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07003084 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3085 parser->m_attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003086 if (temp2 == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003087 parser->m_attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003088 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003089 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003090 parser->m_attInfo = temp2;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003091#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003092 if (n > oldAttsSize)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003093 XmlGetAttributes(enc, attStr, n, parser->m_atts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003094 }
Fred Drake4faea012003-01-28 06:42:40 +00003095
Benjamin Peterson4e211002018-06-26 19:25:45 -07003096 appAtts = (const XML_Char **)parser->m_atts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003097 for (i = 0; i < n; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003098 ATTRIBUTE *currAtt = &parser->m_atts[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003099#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003100 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003101#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003102 /* add the name and value to the attribute list */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003103 ATTRIBUTE_ID *attId
3104 = getAttributeId(parser, enc, currAtt->name,
3105 currAtt->name + XmlNameLength(enc, currAtt->name));
3106 if (! attId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003107 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003108#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07003109 currAttInfo->nameStart
3110 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3111 currAttInfo->nameEnd
3112 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3113 currAttInfo->valueStart = parser->m_parseEndByteIndex
3114 - (parser->m_parseEndPtr - currAtt->valuePtr);
3115 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3116 - (parser->m_parseEndPtr - currAtt->valueEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003117#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003118 /* Detect duplicate attributes by their QNames. This does not work when
3119 namespace processing is turned on and different prefixes for the same
3120 namespace are used. For this case we have a check further down.
3121 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003122 if ((attId->name)[-1]) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003123 if (enc == parser->m_encoding)
3124 parser->m_eventPtr = parser->m_atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003125 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3126 }
3127 (attId->name)[-1] = 1;
3128 appAtts[attIndex++] = attId->name;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003129 if (! parser->m_atts[i].normalized) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003130 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003131 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003132
3133 /* figure out whether declared as other than CDATA */
3134 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003135 int j;
3136 for (j = 0; j < nDefaultAtts; j++) {
3137 if (attId == elementType->defaultAtts[j].id) {
3138 isCdata = elementType->defaultAtts[j].isCdata;
3139 break;
3140 }
3141 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003142 }
3143
3144 /* normalize the attribute value */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003145 result = storeAttributeValue(
3146 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3147 parser->m_atts[i].valueEnd, &parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003148 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003149 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003150 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3151 poolFinish(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003152 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003153 /* the value did not need normalizing */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003154 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3155 parser->m_atts[i].valuePtr,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003156 parser->m_atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003157 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003158 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003159 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003160 }
3161 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003162 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003163 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003164 /* deal with namespace declarations here */
3165 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3166 appAtts[attIndex], bindingsPtr);
3167 if (result)
3168 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003169 --attIndex;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003170 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003171 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003172 attIndex++;
3173 nPrefixes++;
3174 (attId->name)[-1] = 2;
3175 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003176 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003177 attIndex++;
3178 }
Fred Drake4faea012003-01-28 06:42:40 +00003179
3180 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003181 parser->m_nSpecifiedAtts = attIndex;
Fred Drake4faea012003-01-28 06:42:40 +00003182 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3183 for (i = 0; i < attIndex; i += 2)
3184 if (appAtts[i] == elementType->idAtt->name) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003185 parser->m_idAttIndex = i;
Fred Drake4faea012003-01-28 06:42:40 +00003186 break;
3187 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003188 } else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003189 parser->m_idAttIndex = -1;
Fred Drake4faea012003-01-28 06:42:40 +00003190
3191 /* do attribute defaulting */
3192 for (i = 0; i < nDefaultAtts; i++) {
3193 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003194 if (! (da->id->name)[-1] && da->value) {
Fred Drake4faea012003-01-28 06:42:40 +00003195 if (da->id->prefix) {
3196 if (da->id->xmlns) {
3197 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3198 da->value, bindingsPtr);
3199 if (result)
3200 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003201 } else {
Fred Drake4faea012003-01-28 06:42:40 +00003202 (da->id->name)[-1] = 2;
3203 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003204 appAtts[attIndex++] = da->id->name;
3205 appAtts[attIndex++] = da->value;
3206 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003207 } else {
Fred Drake4faea012003-01-28 06:42:40 +00003208 (da->id->name)[-1] = 1;
3209 appAtts[attIndex++] = da->id->name;
3210 appAtts[attIndex++] = da->value;
3211 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003212 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003213 }
Fred Drake4faea012003-01-28 06:42:40 +00003214 appAtts[attIndex] = 0;
3215
Fred Drake08317ae2003-10-21 15:38:55 +00003216 /* expand prefixed attribute names, check for duplicates,
3217 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003218 i = 0;
3219 if (nPrefixes) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003220 int j; /* hash table index */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003221 unsigned long version = parser->m_nsAttsVersion;
3222 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3223 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003224 /* size of hash table must be at least 2 * (# of prefixed attributes) */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003225 if ((nPrefixes << 1)
3226 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
Fred Drake08317ae2003-10-21 15:38:55 +00003227 NS_ATT *temp;
3228 /* hash table size must also be a power of 2 and >= 8 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003229 while (nPrefixes >> parser->m_nsAttsPower++)
3230 ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003231 if (parser->m_nsAttsPower < 3)
3232 parser->m_nsAttsPower = 3;
3233 nsAttsSize = (int)1 << parser->m_nsAttsPower;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003234 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3235 nsAttsSize * sizeof(NS_ATT));
3236 if (! temp) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003237 /* Restore actual size of memory in m_nsAtts */
3238 parser->m_nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003239 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003240 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003241 parser->m_nsAtts = temp;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003242 version = 0; /* force re-initialization of m_nsAtts hash table */
Fred Drake08317ae2003-10-21 15:38:55 +00003243 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003244 /* using a version flag saves us from initializing m_nsAtts every time */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003245 if (! version) { /* initialize version flags when version wraps around */
Fred Drake08317ae2003-10-21 15:38:55 +00003246 version = INIT_ATTS_VERSION;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003247 for (j = nsAttsSize; j != 0;)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003248 parser->m_nsAtts[--j].version = version;
Fred Drake08317ae2003-10-21 15:38:55 +00003249 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003250 parser->m_nsAttsVersion = --version;
Fred Drake08317ae2003-10-21 15:38:55 +00003251
3252 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003253 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003254 const XML_Char *s = appAtts[i];
Benjamin Peterson52b94082019-09-25 21:33:58 -07003255 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003256 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003257 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003258 unsigned long uriHash;
3259 struct siphash sip_state;
3260 struct sipkey sip_key;
3261
3262 copy_salt_to_sipkey(parser, &sip_key);
3263 sip24_init(&sip_state, &sip_key);
3264
Benjamin Peterson52b94082019-09-25 21:33:58 -07003265 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003266 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003267 if (! id || ! id->prefix) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003268 /* This code is walking through the appAtts array, dealing
3269 * with (in this case) a prefixed attribute name. To be in
3270 * the array, the attribute must have already been bound, so
3271 * has to have passed through the hash table lookup once
3272 * already. That implies that an entry for it already
3273 * exists, so the lookup above will return a pointer to
Benjamin Peterson52b94082019-09-25 21:33:58 -07003274 * already allocated memory. There is no opportunaity for
Victor Stinner93d0cb52017-08-18 23:43:54 +02003275 * the allocator to fail, so the condition above cannot be
3276 * fulfilled.
3277 *
3278 * Since it is difficult to be certain that the above
3279 * analysis is complete, we retain the test and merely
3280 * remove the code from coverage tests.
3281 */
3282 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3283 }
Fred Drake08317ae2003-10-21 15:38:55 +00003284 b = id->prefix->binding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003285 if (! b)
Fred Drake08317ae2003-10-21 15:38:55 +00003286 return XML_ERROR_UNBOUND_PREFIX;
3287
Fred Drake08317ae2003-10-21 15:38:55 +00003288 for (j = 0; j < b->uriLen; j++) {
3289 const XML_Char c = b->uri[j];
Benjamin Peterson52b94082019-09-25 21:33:58 -07003290 if (! poolAppendChar(&parser->m_tempPool, c))
Fred Drake08317ae2003-10-21 15:38:55 +00003291 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003292 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003293
3294 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3295
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003296 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003297 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003298
3299 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3300
Benjamin Peterson52b94082019-09-25 21:33:58 -07003301 do { /* copies null terminator */
3302 if (! poolAppendChar(&parser->m_tempPool, *s))
Fred Drake08317ae2003-10-21 15:38:55 +00003303 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003304 } while (*s++);
3305
Victor Stinner5ff71322017-06-21 14:39:22 +02003306 uriHash = (unsigned long)sip24_final(&sip_state);
3307
Fred Drake08317ae2003-10-21 15:38:55 +00003308 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003309 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003310 */
3311 unsigned char step = 0;
3312 unsigned long mask = nsAttsSize - 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003313 j = uriHash & mask; /* index into hash table */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003314 while (parser->m_nsAtts[j].version == version) {
Fred Drake08317ae2003-10-21 15:38:55 +00003315 /* for speed we compare stored hash values first */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003316 if (uriHash == parser->m_nsAtts[j].hash) {
3317 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3318 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
Fred Drake08317ae2003-10-21 15:38:55 +00003319 /* s1 is null terminated, but not s2 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003320 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3321 ;
Fred Drake08317ae2003-10-21 15:38:55 +00003322 if (*s1 == 0)
3323 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3324 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003325 if (! step)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003326 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003327 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003328 }
Fred Drake08317ae2003-10-21 15:38:55 +00003329 }
3330
Benjamin Peterson52b94082019-09-25 21:33:58 -07003331 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003332 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
Fred Drake08317ae2003-10-21 15:38:55 +00003333 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003334 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003335 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003336 return XML_ERROR_NO_MEMORY;
3337 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003338 }
Fred Drake08317ae2003-10-21 15:38:55 +00003339
3340 /* store expanded name in attribute list */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003341 s = poolStart(&parser->m_tempPool);
3342 poolFinish(&parser->m_tempPool);
Fred Drake08317ae2003-10-21 15:38:55 +00003343 appAtts[i] = s;
3344
3345 /* fill empty slot with new version, uriName and hash value */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003346 parser->m_nsAtts[j].version = version;
3347 parser->m_nsAtts[j].hash = uriHash;
3348 parser->m_nsAtts[j].uriName = s;
Fred Drake08317ae2003-10-21 15:38:55 +00003349
Benjamin Peterson52b94082019-09-25 21:33:58 -07003350 if (! --nPrefixes) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003351 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003352 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003353 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003354 } else /* not prefixed */
3355 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003356 }
3357 }
Fred Drake08317ae2003-10-21 15:38:55 +00003358 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003359 for (; i < attIndex; i += 2)
3360 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003361 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3362 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003363
Benjamin Peterson52b94082019-09-25 21:33:58 -07003364 if (! parser->m_ns)
Fred Drake08317ae2003-10-21 15:38:55 +00003365 return XML_ERROR_NONE;
3366
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003367 /* expand the element type name */
3368 if (elementType->prefix) {
3369 binding = elementType->prefix->binding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003370 if (! binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003371 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003372 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003373 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003374 ;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003375 } else if (dtd->defaultPrefix.binding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003376 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003377 localPart = tagNamePtr->str;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003378 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003379 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003380 prefixLen = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003381 if (parser->m_ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003382 for (; binding->prefix->name[prefixLen++];)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003383 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003384 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003385 tagNamePtr->localPart = localPart;
3386 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003387 tagNamePtr->prefix = binding->prefix->name;
3388 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003389 for (i = 0; localPart[i++];)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003390 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003391 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003392 if (n > binding->uriAlloc) {
3393 TAG *p;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003394 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
Benjamin Peterson52b94082019-09-25 21:33:58 -07003395 if (! uri)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003396 return XML_ERROR_NO_MEMORY;
3397 binding->uriAlloc = n + EXPAND_SPARE;
3398 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003399 for (p = parser->m_tagStack; p; p = p->parent)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003400 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003401 p->name.str = uri;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003402 FREE(parser, binding->uri);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003403 binding->uri = uri;
3404 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003405 /* if m_namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003406 uri = binding->uri + binding->uriLen;
3407 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003408 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003409 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003410 uri += i - 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003411 *uri = parser->m_namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003412 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3413 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003414 tagNamePtr->str = binding->uri;
3415 return XML_ERROR_NONE;
3416}
3417
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003418/* addBinding() overwrites the value of prefix->binding without checking.
3419 Therefore one must keep track of the old value outside of addBinding().
3420*/
3421static enum XML_Error
3422addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
Benjamin Peterson52b94082019-09-25 21:33:58 -07003423 const XML_Char *uri, BINDING **bindingsPtr) {
3424 static const XML_Char xmlNamespace[]
3425 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3426 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3427 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3428 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3429 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3430 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3431 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3432 ASCII_e, '\0'};
3433 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3434 static const XML_Char xmlnsNamespace[]
3435 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3436 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3437 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3438 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3439 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3440 static const int xmlnsLen
3441 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003442
3443 XML_Bool mustBeXML = XML_FALSE;
3444 XML_Bool isXML = XML_TRUE;
3445 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003446
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003447 BINDING *b;
3448 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003449
Fred Drake31d485c2004-08-03 07:06:22 +00003450 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003451 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003452 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003453
Benjamin Peterson52b94082019-09-25 21:33:58 -07003454 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003455 && prefix->name[1] == XML_T(ASCII_m)
3456 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003457 /* Not allowed to bind xmlns */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003458 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003459 && prefix->name[5] == XML_T('\0'))
3460 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3461
3462 if (prefix->name[3] == XML_T('\0'))
3463 mustBeXML = XML_TRUE;
3464 }
3465
3466 for (len = 0; uri[len]; len++) {
3467 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3468 isXML = XML_FALSE;
3469
Benjamin Peterson52b94082019-09-25 21:33:58 -07003470 if (! mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003471 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3472 isXMLNS = XML_FALSE;
3473 }
3474 isXML = isXML && len == xmlLen;
3475 isXMLNS = isXMLNS && len == xmlnsLen;
3476
3477 if (mustBeXML != isXML)
3478 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3479 : XML_ERROR_RESERVED_NAMESPACE_URI;
3480
3481 if (isXMLNS)
3482 return XML_ERROR_RESERVED_NAMESPACE_URI;
3483
Benjamin Peterson4e211002018-06-26 19:25:45 -07003484 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003485 len++;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003486 if (parser->m_freeBindingList) {
3487 b = parser->m_freeBindingList;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003488 if (len > b->uriAlloc) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003489 XML_Char *temp = (XML_Char *)REALLOC(
3490 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003491 if (temp == NULL)
3492 return XML_ERROR_NO_MEMORY;
3493 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003494 b->uriAlloc = len + EXPAND_SPARE;
3495 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003496 parser->m_freeBindingList = b->nextTagBinding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003497 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003498 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
Benjamin Peterson52b94082019-09-25 21:33:58 -07003499 if (! b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003500 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003501 b->uri
3502 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3503 if (! b->uri) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003504 FREE(parser, b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003505 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003506 }
3507 b->uriAlloc = len + EXPAND_SPARE;
3508 }
3509 b->uriLen = len;
3510 memcpy(b->uri, uri, len * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003511 if (parser->m_namespaceSeparator)
3512 b->uri[len - 1] = parser->m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003513 b->prefix = prefix;
3514 b->attId = attId;
3515 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003516 /* NULL binding when default namespace undeclared */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003517 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003518 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003519 else
3520 prefix->binding = b;
3521 b->nextTagBinding = *bindingsPtr;
3522 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003523 /* if attId == NULL then we are not starting a namespace scope */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003524 if (attId && parser->m_startNamespaceDeclHandler)
3525 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
Benjamin Peterson52b94082019-09-25 21:33:58 -07003526 prefix->binding ? uri : 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003527 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003528}
3529
3530/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003531 the whole file is parsed with one call.
3532*/
3533static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003534cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3535 const char **endPtr) {
3536 enum XML_Error result
3537 = doCdataSection(parser, parser->m_encoding, &start, end, endPtr,
3538 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003539 if (result != XML_ERROR_NONE)
3540 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003541 if (start) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003542 if (parser->m_parentParser) { /* we are parsing an external entity */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003543 parser->m_processor = externalEntityContentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003544 return externalEntityContentProcessor(parser, start, end, endPtr);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003545 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003546 parser->m_processor = contentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003547 return contentProcessor(parser, start, end, endPtr);
3548 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003549 }
3550 return result;
3551}
3552
Fred Drake31d485c2004-08-03 07:06:22 +00003553/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003554 the section is not yet closed.
3555*/
3556static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003557doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3558 const char *end, const char **nextPtr, XML_Bool haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003559 const char *s = *startPtr;
3560 const char **eventPP;
3561 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003562 if (enc == parser->m_encoding) {
3563 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003564 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003565 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003566 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003567 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3568 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003569 }
3570 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003571 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003572
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003573 for (;;) {
3574 const char *next;
3575 int tok = XmlCdataSectionTok(enc, s, end, &next);
3576 *eventEndPP = next;
3577 switch (tok) {
3578 case XML_TOK_CDATA_SECT_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003579 if (parser->m_endCdataSectionHandler)
3580 parser->m_endCdataSectionHandler(parser->m_handlerArg);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003581 /* BEGIN disabled code */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003582 /* see comment under XML_TOK_CDATA_SECT_OPEN */
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003583 else if (0 && parser->m_characterDataHandler)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003584 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3585 0);
3586 /* END disabled code */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003587 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003588 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003589 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003590 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003591 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003592 return XML_ERROR_ABORTED;
3593 else
3594 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003595 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003596 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003597 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003598 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003599 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003600 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003601 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003602 case XML_TOK_DATA_CHARS: {
3603 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3604 if (charDataHandler) {
3605 if (MUST_CONVERT(enc, s)) {
3606 for (;;) {
3607 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3608 const enum XML_Convert_Result convert_res = XmlConvert(
3609 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3610 *eventEndPP = next;
3611 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3612 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3613 if ((convert_res == XML_CONVERT_COMPLETED)
3614 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3615 break;
3616 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003617 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003618 } else
3619 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3620 (int)((XML_Char *)next - (XML_Char *)s));
3621 } else if (parser->m_defaultHandler)
3622 reportDefault(parser, enc, s, next);
3623 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003624 case XML_TOK_INVALID:
3625 *eventPP = next;
3626 return XML_ERROR_INVALID_TOKEN;
3627 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003628 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003629 *nextPtr = s;
3630 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003631 }
3632 return XML_ERROR_PARTIAL_CHAR;
3633 case XML_TOK_PARTIAL:
3634 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003635 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003636 *nextPtr = s;
3637 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003638 }
3639 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3640 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003641 /* Every token returned by XmlCdataSectionTok() has its own
3642 * explicit case, so this default case will never be executed.
3643 * We retain it as a safety net and exclude it from the coverage
3644 * statistics.
3645 *
3646 * LCOV_EXCL_START
Benjamin Peterson52b94082019-09-25 21:33:58 -07003647 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003648 *eventPP = next;
3649 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003650 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003651 }
Fred Drake31d485c2004-08-03 07:06:22 +00003652
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003653 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003654 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003655 case XML_SUSPENDED:
3656 *nextPtr = next;
3657 return XML_ERROR_NONE;
3658 case XML_FINISHED:
3659 return XML_ERROR_ABORTED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003660 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00003661 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003662 }
3663 /* not reached */
3664}
3665
3666#ifdef XML_DTD
3667
3668/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003669 the whole file is parsed with one call.
3670*/
3671static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003672ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
3673 const char **endPtr) {
3674 enum XML_Error result
3675 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
3676 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003677 if (result != XML_ERROR_NONE)
3678 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003679 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003680 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003681 return prologProcessor(parser, start, end, endPtr);
3682 }
3683 return result;
3684}
3685
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003686/* startPtr gets set to non-null is the section is closed, and to null
3687 if the section is not yet closed.
3688*/
3689static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003690doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3691 const char *end, const char **nextPtr, XML_Bool haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003692 const char *next;
3693 int tok;
3694 const char *s = *startPtr;
3695 const char **eventPP;
3696 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003697 if (enc == parser->m_encoding) {
3698 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003699 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003700 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003701 } else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003702 /* It's not entirely clear, but it seems the following two lines
3703 * of code cannot be executed. The only occasions on which 'enc'
Benjamin Peterson4e211002018-06-26 19:25:45 -07003704 * is not 'encoding' are when this function is called
Victor Stinner93d0cb52017-08-18 23:43:54 +02003705 * from the internal entity processing, and IGNORE sections are an
3706 * error in internal entities.
3707 *
3708 * Since it really isn't clear that this is true, we keep the code
3709 * and just remove it from our coverage tests.
3710 *
3711 * LCOV_EXCL_START
3712 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003713 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3714 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003715 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003716 }
3717 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003718 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003719 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3720 *eventEndPP = next;
3721 switch (tok) {
3722 case XML_TOK_IGNORE_SECT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003723 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003724 reportDefault(parser, enc, s, next);
3725 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003726 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003727 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003728 return XML_ERROR_ABORTED;
3729 else
3730 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003731 case XML_TOK_INVALID:
3732 *eventPP = next;
3733 return XML_ERROR_INVALID_TOKEN;
3734 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003735 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003736 *nextPtr = s;
3737 return XML_ERROR_NONE;
3738 }
3739 return XML_ERROR_PARTIAL_CHAR;
3740 case XML_TOK_PARTIAL:
3741 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003742 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003743 *nextPtr = s;
3744 return XML_ERROR_NONE;
3745 }
3746 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3747 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003748 /* All of the tokens that XmlIgnoreSectionTok() returns have
3749 * explicit cases to handle them, so this default case is never
3750 * executed. We keep it as a safety net anyway, and remove it
3751 * from our test coverage statistics.
3752 *
3753 * LCOV_EXCL_START
3754 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003755 *eventPP = next;
3756 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003757 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003758 }
3759 /* not reached */
3760}
3761
3762#endif /* XML_DTD */
3763
3764static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003765initializeEncoding(XML_Parser parser) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003766 const char *s;
3767#ifdef XML_UNICODE
3768 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003769 /* See comments abount `protoclEncodingName` in parserInit() */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003770 if (! parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003771 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003772 else {
3773 int i;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003774 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003775 if (i == sizeof(encodingBuf) - 1
Benjamin Peterson4e211002018-06-26 19:25:45 -07003776 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003777 encodingBuf[0] = '\0';
3778 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003779 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003780 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003781 }
3782 encodingBuf[i] = '\0';
3783 s = encodingBuf;
3784 }
3785#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003786 s = parser->m_protocolEncodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003787#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -07003788 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
3789 &parser->m_initEncoding, &parser->m_encoding, s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003790 return XML_ERROR_NONE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003791 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003792}
3793
3794static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003795processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
3796 const char *next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003797 const char *encodingName = NULL;
3798 const XML_Char *storedEncName = NULL;
3799 const ENCODING *newEncoding = NULL;
3800 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003801 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003802 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003803 int standalone = -1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003804 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
3805 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
3806 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
Fred Drake31d485c2004-08-03 07:06:22 +00003807 if (isGeneralTextEntity)
3808 return XML_ERROR_TEXT_DECL;
3809 else
3810 return XML_ERROR_XML_DECL;
3811 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003812 if (! isGeneralTextEntity && standalone == 1) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003813 parser->m_dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003814#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07003815 if (parser->m_paramEntityParsing
3816 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003817 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003818#endif /* XML_DTD */
3819 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003820 if (parser->m_xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003821 if (encodingName != NULL) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003822 storedEncName = poolStoreString(
3823 &parser->m_temp2Pool, parser->m_encoding, encodingName,
3824 encodingName + XmlNameLength(parser->m_encoding, encodingName));
3825 if (! storedEncName)
3826 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003827 poolFinish(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003828 }
3829 if (version) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003830 storedversion
3831 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
3832 versionend - parser->m_encoding->minBytesPerChar);
3833 if (! storedversion)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003834 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003835 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003836 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
3837 standalone);
3838 } else if (parser->m_defaultHandler)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003839 reportDefault(parser, parser->m_encoding, s, next);
3840 if (parser->m_protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003841 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003842 /* Check that the specified encoding does not conflict with what
3843 * the parser has already deduced. Do we have the same number
3844 * of bytes in the smallest representation of a character? If
3845 * this is UTF-16, is it the same endianness?
3846 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003847 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
Benjamin Peterson52b94082019-09-25 21:33:58 -07003848 || (newEncoding->minBytesPerChar == 2
3849 && newEncoding != parser->m_encoding)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003850 parser->m_eventPtr = encodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003851 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003852 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003853 parser->m_encoding = newEncoding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003854 } else if (encodingName) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003855 enum XML_Error result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003856 if (! storedEncName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003857 storedEncName = poolStoreString(
Benjamin Peterson52b94082019-09-25 21:33:58 -07003858 &parser->m_temp2Pool, parser->m_encoding, encodingName,
3859 encodingName + XmlNameLength(parser->m_encoding, encodingName));
3860 if (! storedEncName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003861 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003862 }
3863 result = handleUnknownEncoding(parser, storedEncName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07003864 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003865 if (result == XML_ERROR_UNKNOWN_ENCODING)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003866 parser->m_eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003867 return result;
3868 }
3869 }
3870
3871 if (storedEncName || storedversion)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003872 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003873
3874 return XML_ERROR_NONE;
3875}
3876
3877static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003878handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003879 if (parser->m_unknownEncodingHandler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003880 XML_Encoding info;
3881 int i;
3882 for (i = 0; i < 256; i++)
3883 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003884 info.convert = NULL;
3885 info.data = NULL;
3886 info.release = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003887 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
3888 encodingName, &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003889 ENCODING *enc;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003890 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
Benjamin Peterson52b94082019-09-25 21:33:58 -07003891 if (! parser->m_unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003892 if (info.release)
3893 info.release(info.data);
3894 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003895 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003896 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
3897 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003898 if (enc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003899 parser->m_unknownEncodingData = info.data;
3900 parser->m_unknownEncodingRelease = info.release;
3901 parser->m_encoding = enc;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003902 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003903 }
3904 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003905 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003906 info.release(info.data);
3907 }
3908 return XML_ERROR_UNKNOWN_ENCODING;
3909}
3910
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003911static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003912prologInitProcessor(XML_Parser parser, const char *s, const char *end,
3913 const char **nextPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003914 enum XML_Error result = initializeEncoding(parser);
3915 if (result != XML_ERROR_NONE)
3916 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003917 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003918 return prologProcessor(parser, s, end, nextPtr);
3919}
3920
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003921#ifdef XML_DTD
3922
3923static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003924externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
3925 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003926 enum XML_Error result = initializeEncoding(parser);
3927 if (result != XML_ERROR_NONE)
3928 return result;
3929
3930 /* we know now that XML_Parse(Buffer) has been called,
3931 so we consider the external parameter entity read */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003932 parser->m_dtd->paramEntityRead = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003933
Benjamin Peterson4e211002018-06-26 19:25:45 -07003934 if (parser->m_prologState.inEntityValue) {
3935 parser->m_processor = entityValueInitProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003936 return entityValueInitProcessor(parser, s, end, nextPtr);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003937 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003938 parser->m_processor = externalParEntProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003939 return externalParEntProcessor(parser, s, end, nextPtr);
3940 }
3941}
3942
3943static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003944entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
3945 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003946 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00003947 const char *start = s;
3948 const char *next = start;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003949 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003950
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003951 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003952 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
3953 parser->m_eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003954 if (tok <= 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003955 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00003956 *nextPtr = s;
3957 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003958 }
3959 switch (tok) {
3960 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00003961 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003962 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00003963 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003964 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003965 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003966 case XML_TOK_NONE: /* start == end */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003967 default:
3968 break;
3969 }
Fred Drake31d485c2004-08-03 07:06:22 +00003970 /* found end of entity value - can store it now */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003971 return storeEntityValue(parser, parser->m_encoding, s, end);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003972 } else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00003973 enum XML_Error result;
3974 result = processXmlDecl(parser, 0, start, next);
3975 if (result != XML_ERROR_NONE)
3976 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003977 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
3978 * that to happen, a parameter entity parsing handler must have attempted
3979 * to suspend the parser, which fails and raises an error. The parser can
3980 * be aborted, but can't be suspended.
Victor Stinner93d0cb52017-08-18 23:43:54 +02003981 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003982 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003983 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003984 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003985 /* stop scanning for text declaration - we found one */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003986 parser->m_processor = entityValueProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003987 return entityValueProcessor(parser, next, end, nextPtr);
3988 }
3989 /* If we are at the end of the buffer, this would cause XmlPrologTok to
3990 return XML_TOK_NONE on the next call, which would then cause the
3991 function to exit with *nextPtr set to s - that is what we want for other
3992 tokens, but not for the BOM - we would rather like to skip it;
3993 then, when this routine is entered the next time, XmlPrologTok will
3994 return XML_TOK_INVALID, since the BOM is still in the buffer
3995 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003996 else if (tok == XML_TOK_BOM && next == end
3997 && ! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003998 *nextPtr = next;
3999 return XML_ERROR_NONE;
4000 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004001 /* If we get this token, we have the start of what might be a
4002 normal tag, but not a declaration (i.e. it doesn't begin with
4003 "<!"). In a DTD context, that isn't legal.
4004 */
4005 else if (tok == XML_TOK_INSTANCE_START) {
4006 *nextPtr = next;
4007 return XML_ERROR_SYNTAX;
4008 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004009 start = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004010 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004011 }
4012}
4013
4014static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004015externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4016 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004017 const char *next = s;
4018 int tok;
4019
Benjamin Peterson4e211002018-06-26 19:25:45 -07004020 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004021 if (tok <= 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004022 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004023 *nextPtr = s;
4024 return XML_ERROR_NONE;
4025 }
4026 switch (tok) {
4027 case XML_TOK_INVALID:
4028 return XML_ERROR_INVALID_TOKEN;
4029 case XML_TOK_PARTIAL:
4030 return XML_ERROR_UNCLOSED_TOKEN;
4031 case XML_TOK_PARTIAL_CHAR:
4032 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004033 case XML_TOK_NONE: /* start == end */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004034 default:
4035 break;
4036 }
4037 }
4038 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4039 However, when parsing an external subset, doProlog will not accept a BOM
4040 as valid, and report a syntax error, so we have to skip the BOM
4041 */
4042 else if (tok == XML_TOK_BOM) {
4043 s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004044 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004045 }
4046
Benjamin Peterson4e211002018-06-26 19:25:45 -07004047 parser->m_processor = prologProcessor;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004048 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4049 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004050}
4051
4052static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004053entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4054 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004055 const char *start = s;
4056 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004057 const ENCODING *enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004058 int tok;
4059
4060 for (;;) {
4061 tok = XmlPrologTok(enc, start, end, &next);
4062 if (tok <= 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004063 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004064 *nextPtr = s;
4065 return XML_ERROR_NONE;
4066 }
4067 switch (tok) {
4068 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004069 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004070 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004071 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004072 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004073 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004074 case XML_TOK_NONE: /* start == end */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004075 default:
4076 break;
4077 }
Fred Drake31d485c2004-08-03 07:06:22 +00004078 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004079 return storeEntityValue(parser, enc, s, end);
4080 }
4081 start = next;
4082 }
4083}
4084
4085#endif /* XML_DTD */
4086
4087static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004088prologProcessor(XML_Parser parser, const char *s, const char *end,
4089 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004090 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004091 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004092 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4093 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004094}
4095
4096static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07004097doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4098 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4099 XML_Bool allowClosingDoctype) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004100#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004101 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004102#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004103 static const XML_Char atypeCDATA[]
4104 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4105 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4106 static const XML_Char atypeIDREF[]
4107 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4108 static const XML_Char atypeIDREFS[]
4109 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4110 static const XML_Char atypeENTITY[]
4111 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4112 static const XML_Char atypeENTITIES[]
4113 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4114 ASCII_I, ASCII_E, ASCII_S, '\0'};
4115 static const XML_Char atypeNMTOKEN[]
4116 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4117 static const XML_Char atypeNMTOKENS[]
4118 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4119 ASCII_E, ASCII_N, ASCII_S, '\0'};
4120 static const XML_Char notationPrefix[]
4121 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4122 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4123 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4124 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004125
Fred Drake31d485c2004-08-03 07:06:22 +00004126 /* save one level of indirection */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004127 DTD *const dtd = parser->m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004128
4129 const char **eventPP;
4130 const char **eventEndPP;
4131 enum XML_Content_Quant quant;
4132
Benjamin Peterson4e211002018-06-26 19:25:45 -07004133 if (enc == parser->m_encoding) {
4134 eventPP = &parser->m_eventPtr;
4135 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004136 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004137 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4138 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004139 }
Fred Drake31d485c2004-08-03 07:06:22 +00004140
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004141 for (;;) {
4142 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004143 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004144 *eventPP = s;
4145 *eventEndPP = next;
4146 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004147 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004148 *nextPtr = s;
4149 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004150 }
4151 switch (tok) {
4152 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004153 *eventPP = next;
4154 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004155 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004156 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004157 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004158 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004159 case -XML_TOK_PROLOG_S:
4160 tok = -tok;
4161 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004162 case XML_TOK_NONE:
4163#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004164 /* for internal PE NOT referenced between declarations */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004165 if (enc != parser->m_encoding
4166 && ! parser->m_openInternalEntities->betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00004167 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004168 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004169 }
4170 /* WFC: PE Between Declarations - must check that PE contains
4171 complete markup, not only for external PEs, but also for
4172 internal PEs if the reference occurs between declarations.
4173 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004174 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4175 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004176 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004177 return XML_ERROR_INCOMPLETE_PE;
4178 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004179 return XML_ERROR_NONE;
4180 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004181#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004182 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004183 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004184 tok = -tok;
4185 next = end;
4186 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004187 }
4188 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004189 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004190 switch (role) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004191 case XML_ROLE_XML_DECL: {
4192 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4193 if (result != XML_ERROR_NONE)
4194 return result;
4195 enc = parser->m_encoding;
4196 handleDefault = XML_FALSE;
4197 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004198 case XML_ROLE_DOCTYPE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004199 if (parser->m_startDoctypeDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004200 parser->m_doctypeName
4201 = poolStoreString(&parser->m_tempPool, enc, s, next);
4202 if (! parser->m_doctypeName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004203 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004204 poolFinish(&parser->m_tempPool);
4205 parser->m_doctypePubid = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004206 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004207 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004208 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004209 break;
4210 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004211 if (parser->m_startDoctypeDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004212 parser->m_startDoctypeDeclHandler(
4213 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4214 parser->m_doctypePubid, 1);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004215 parser->m_doctypeName = NULL;
4216 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004217 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004218 }
4219 break;
4220#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004221 case XML_ROLE_TEXT_DECL: {
4222 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4223 if (result != XML_ERROR_NONE)
4224 return result;
4225 enc = parser->m_encoding;
4226 handleDefault = XML_FALSE;
4227 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004228#endif /* XML_DTD */
4229 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004230#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004231 parser->m_useForeignDTD = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004232 parser->m_declEntity = (ENTITY *)lookup(
4233 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4234 if (! parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004235 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004236#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004237 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004238 if (parser->m_startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004239 XML_Char *pubId;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004240 if (! XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004241 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004242 pubId = poolStoreString(&parser->m_tempPool, enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004243 s + enc->minBytesPerChar,
4244 next - enc->minBytesPerChar);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004245 if (! pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004246 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004247 normalizePublicId(pubId);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004248 poolFinish(&parser->m_tempPool);
4249 parser->m_doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004250 handleDefault = XML_FALSE;
4251 goto alreadyChecked;
4252 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004253 /* fall through */
4254 case XML_ROLE_ENTITY_PUBLIC_ID:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004255 if (! XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004256 return XML_ERROR_PUBLICID;
4257 alreadyChecked:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004258 if (dtd->keepProcessing && parser->m_declEntity) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004259 XML_Char *tem
4260 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4261 next - enc->minBytesPerChar);
4262 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004263 return XML_ERROR_NO_MEMORY;
4264 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004265 parser->m_declEntity->publicId = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004266 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004267 /* Don't suppress the default handler if we fell through from
4268 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4269 */
4270 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004271 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004272 }
4273 break;
4274 case XML_ROLE_DOCTYPE_CLOSE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004275 if (allowClosingDoctype != XML_TRUE) {
4276 /* Must not close doctype from within expanded parameter entities */
4277 return XML_ERROR_INVALID_TOKEN;
4278 }
4279
Benjamin Peterson4e211002018-06-26 19:25:45 -07004280 if (parser->m_doctypeName) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004281 parser->m_startDoctypeDeclHandler(
4282 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4283 parser->m_doctypePubid, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004284 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004285 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004286 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004287 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4288 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004289 was not set, indicating an external subset
4290 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004291#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004292 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004293 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4294 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004295 if (parser->m_paramEntityParsing
4296 && parser->m_externalEntityRefHandler) {
4297 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4298 externalSubsetName, sizeof(ENTITY));
4299 if (! entity) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004300 /* The external subset name "#" will have already been
4301 * inserted into the hash table at the start of the
4302 * external entity parsing, so no allocation will happen
4303 * and lookup() cannot fail.
4304 */
4305 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4306 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004307 if (parser->m_useForeignDTD)
4308 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004309 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004310 if (! parser->m_externalEntityRefHandler(
4311 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4312 entity->systemId, entity->publicId))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004313 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004314 if (dtd->paramEntityRead) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004315 if (! dtd->standalone && parser->m_notStandaloneHandler
4316 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004317 return XML_ERROR_NOT_STANDALONE;
4318 }
4319 /* if we didn't read the foreign DTD then this means that there
4320 is no external subset and we must reset dtd->hasParamEntityRefs
4321 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004322 else if (! parser->m_doctypeSysid)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004323 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004324 /* end of DTD - no need to update dtd->keepProcessing */
4325 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004326 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004327 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004328#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004329 if (parser->m_endDoctypeDeclHandler) {
4330 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004331 handleDefault = XML_FALSE;
4332 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004333 break;
4334 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004335#ifdef XML_DTD
4336 /* if there is no DOCTYPE declaration then now is the
4337 last chance to read the foreign DTD
4338 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004339 if (parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004340 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004341 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004342 if (parser->m_paramEntityParsing
4343 && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004344 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Benjamin Peterson52b94082019-09-25 21:33:58 -07004345 externalSubsetName, sizeof(ENTITY));
4346 if (! entity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004347 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004348 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004349 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004350 if (! parser->m_externalEntityRefHandler(
4351 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4352 entity->systemId, entity->publicId))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004353 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004354 if (dtd->paramEntityRead) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004355 if (! dtd->standalone && parser->m_notStandaloneHandler
4356 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004357 return XML_ERROR_NOT_STANDALONE;
4358 }
4359 /* if we didn't read the foreign DTD then this means that there
4360 is no external subset and we must reset dtd->hasParamEntityRefs
4361 */
4362 else
4363 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004364 /* end of DTD - no need to update dtd->keepProcessing */
4365 }
4366 }
4367#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004368 parser->m_processor = contentProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004369 return contentProcessor(parser, s, end, nextPtr);
4370 case XML_ROLE_ATTLIST_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004371 parser->m_declElementType = getElementType(parser, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004372 if (! parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004373 return XML_ERROR_NO_MEMORY;
4374 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004375 case XML_ROLE_ATTRIBUTE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004376 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004377 if (! parser->m_declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004378 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004379 parser->m_declAttributeIsCdata = XML_FALSE;
4380 parser->m_declAttributeType = NULL;
4381 parser->m_declAttributeIsId = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004382 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004383 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004384 parser->m_declAttributeIsCdata = XML_TRUE;
4385 parser->m_declAttributeType = atypeCDATA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004386 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004387 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004388 parser->m_declAttributeIsId = XML_TRUE;
4389 parser->m_declAttributeType = atypeID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004390 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004391 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004392 parser->m_declAttributeType = atypeIDREF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004393 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004394 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004395 parser->m_declAttributeType = atypeIDREFS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004396 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004397 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004398 parser->m_declAttributeType = atypeENTITY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004399 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004400 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004401 parser->m_declAttributeType = atypeENTITIES;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004402 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004403 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004404 parser->m_declAttributeType = atypeNMTOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004405 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004406 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004407 parser->m_declAttributeType = atypeNMTOKENS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004408 checkAttListDeclHandler:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004409 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004410 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004411 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004412 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4413 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004414 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004415 const XML_Char *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004416 if (parser->m_declAttributeType) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004417 prefix = enumValueSep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004418 } else {
4419 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4420 : enumValueStart);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004421 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004422 if (! poolAppendString(&parser->m_tempPool, prefix))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004423 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004424 if (! poolAppend(&parser->m_tempPool, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004425 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004426 parser->m_declAttributeType = parser->m_tempPool.start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004427 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004428 }
4429 break;
4430 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4431 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004432 if (dtd->keepProcessing) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004433 if (! defineAttribute(parser->m_declElementType,
4434 parser->m_declAttributeId,
4435 parser->m_declAttributeIsCdata,
4436 parser->m_declAttributeIsId, 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004437 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004438 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4439 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4440 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4441 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004442 /* Enumerated or Notation type */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004443 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4444 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004445 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004446 parser->m_declAttributeType = parser->m_tempPool.start;
4447 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004448 }
4449 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004450 parser->m_attlistDeclHandler(
4451 parser->m_handlerArg, parser->m_declElementType->name,
4452 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4453 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004454 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004455 handleDefault = XML_FALSE;
4456 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004457 }
4458 break;
4459 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4460 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004461 if (dtd->keepProcessing) {
4462 const XML_Char *attVal;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004463 enum XML_Error result = storeAttributeValue(
4464 parser, enc, parser->m_declAttributeIsCdata,
4465 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004466 if (result)
4467 return result;
4468 attVal = poolStart(&dtd->pool);
4469 poolFinish(&dtd->pool);
4470 /* ID attributes aren't allowed to have a default */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004471 if (! defineAttribute(
4472 parser->m_declElementType, parser->m_declAttributeId,
4473 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004474 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004475 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4476 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4477 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4478 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004479 /* Enumerated or Notation type */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004480 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4481 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004482 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004483 parser->m_declAttributeType = parser->m_tempPool.start;
4484 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004485 }
4486 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004487 parser->m_attlistDeclHandler(
4488 parser->m_handlerArg, parser->m_declElementType->name,
4489 parser->m_declAttributeId->name, parser->m_declAttributeType,
4490 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004491 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004492 handleDefault = XML_FALSE;
4493 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004494 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004495 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004496 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004497 if (dtd->keepProcessing) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004498 enum XML_Error result = storeEntityValue(
4499 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004500 if (parser->m_declEntity) {
4501 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004502 parser->m_declEntity->textLen
4503 = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004504 poolFinish(&dtd->entityValuePool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004505 if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004506 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004507 parser->m_entityDeclHandler(
4508 parser->m_handlerArg, parser->m_declEntity->name,
4509 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
4510 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004511 handleDefault = XML_FALSE;
4512 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004513 } else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004514 poolDiscard(&dtd->entityValuePool);
4515 if (result != XML_ERROR_NONE)
4516 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004517 }
4518 break;
4519 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004520#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004521 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004522#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004523 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004524 if (parser->m_startDoctypeDeclHandler) {
4525 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
Benjamin Peterson52b94082019-09-25 21:33:58 -07004526 s + enc->minBytesPerChar,
4527 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004528 if (parser->m_doctypeSysid == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004529 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004530 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004531 handleDefault = XML_FALSE;
4532 }
4533#ifdef XML_DTD
4534 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07004535 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4536 for the case where no parser->m_startDoctypeDeclHandler is set */
4537 parser->m_doctypeSysid = externalSubsetName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004538#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004539 if (! dtd->standalone
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004540#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004541 && ! parser->m_paramEntityParsing
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004542#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004543 && parser->m_notStandaloneHandler
Benjamin Peterson52b94082019-09-25 21:33:58 -07004544 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004545 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004546#ifndef XML_DTD
4547 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004548#else /* XML_DTD */
4549 if (! parser->m_declEntity) {
4550 parser->m_declEntity = (ENTITY *)lookup(
4551 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4552 if (! parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004553 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004554 parser->m_declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004555 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004556#endif /* XML_DTD */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07004557 /* fall through */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004558 case XML_ROLE_ENTITY_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004559 if (dtd->keepProcessing && parser->m_declEntity) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004560 parser->m_declEntity->systemId
4561 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4562 next - enc->minBytesPerChar);
4563 if (! parser->m_declEntity->systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004564 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004565 parser->m_declEntity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004566 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004567 /* Don't suppress the default handler if we fell through from
4568 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4569 */
4570 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004571 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004572 }
4573 break;
4574 case XML_ROLE_ENTITY_COMPLETE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004575 if (dtd->keepProcessing && parser->m_declEntity
4576 && parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004577 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004578 parser->m_entityDeclHandler(
4579 parser->m_handlerArg, parser->m_declEntity->name,
4580 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
4581 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004582 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004583 }
4584 break;
4585 case XML_ROLE_ENTITY_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004586 if (dtd->keepProcessing && parser->m_declEntity) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004587 parser->m_declEntity->notation
4588 = poolStoreString(&dtd->pool, enc, s, next);
4589 if (! parser->m_declEntity->notation)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004590 return XML_ERROR_NO_MEMORY;
4591 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004592 if (parser->m_unparsedEntityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004593 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004594 parser->m_unparsedEntityDeclHandler(
4595 parser->m_handlerArg, parser->m_declEntity->name,
4596 parser->m_declEntity->base, parser->m_declEntity->systemId,
4597 parser->m_declEntity->publicId, parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004598 handleDefault = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004599 } else if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004600 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004601 parser->m_entityDeclHandler(
4602 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
4603 parser->m_declEntity->base, parser->m_declEntity->systemId,
4604 parser->m_declEntity->publicId, parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004605 handleDefault = XML_FALSE;
4606 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004607 }
4608 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004609 case XML_ROLE_GENERAL_ENTITY_NAME: {
4610 if (XmlPredefinedEntityName(enc, s, next)) {
4611 parser->m_declEntity = NULL;
4612 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004613 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004614 if (dtd->keepProcessing) {
4615 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004616 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004617 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004618 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
4619 name, sizeof(ENTITY));
4620 if (! parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004621 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004622 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004623 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004624 parser->m_declEntity = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004625 } else {
4626 poolFinish(&dtd->pool);
4627 parser->m_declEntity->publicId = NULL;
4628 parser->m_declEntity->is_param = XML_FALSE;
4629 /* if we have a parent parser or are reading an internal parameter
4630 entity, then the entity declaration is not considered "internal"
4631 */
4632 parser->m_declEntity->is_internal
4633 = ! (parser->m_parentParser || parser->m_openInternalEntities);
4634 if (parser->m_entityDeclHandler)
4635 handleDefault = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004636 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004637 } else {
4638 poolDiscard(&dtd->pool);
4639 parser->m_declEntity = NULL;
4640 }
4641 } break;
4642 case XML_ROLE_PARAM_ENTITY_NAME:
4643#ifdef XML_DTD
4644 if (dtd->keepProcessing) {
4645 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4646 if (! name)
4647 return XML_ERROR_NO_MEMORY;
4648 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4649 name, sizeof(ENTITY));
4650 if (! parser->m_declEntity)
4651 return XML_ERROR_NO_MEMORY;
4652 if (parser->m_declEntity->name != name) {
4653 poolDiscard(&dtd->pool);
4654 parser->m_declEntity = NULL;
4655 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004656 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004657 parser->m_declEntity->publicId = NULL;
4658 parser->m_declEntity->is_param = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004659 /* if we have a parent parser or are reading an internal parameter
4660 entity, then the entity declaration is not considered "internal"
4661 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004662 parser->m_declEntity->is_internal
4663 = ! (parser->m_parentParser || parser->m_openInternalEntities);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004664 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004665 handleDefault = XML_FALSE;
4666 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004667 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004668 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004669 parser->m_declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004670 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004671#else /* not XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004672 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004673#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004674 break;
4675 case XML_ROLE_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004676 parser->m_declNotationPublicId = NULL;
4677 parser->m_declNotationName = NULL;
4678 if (parser->m_notationDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004679 parser->m_declNotationName
4680 = poolStoreString(&parser->m_tempPool, enc, s, next);
4681 if (! parser->m_declNotationName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004682 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004683 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004684 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004685 }
4686 break;
4687 case XML_ROLE_NOTATION_PUBLIC_ID:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004688 if (! XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004689 return XML_ERROR_PUBLICID;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004690 if (parser
4691 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4692 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004693 s + enc->minBytesPerChar,
4694 next - enc->minBytesPerChar);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004695 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004696 return XML_ERROR_NO_MEMORY;
4697 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004698 parser->m_declNotationPublicId = tem;
4699 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004700 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004701 }
4702 break;
4703 case XML_ROLE_NOTATION_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004704 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004705 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
4706 s + enc->minBytesPerChar,
4707 next - enc->minBytesPerChar);
4708 if (! systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004709 return XML_ERROR_NO_MEMORY;
4710 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004711 parser->m_notationDeclHandler(
4712 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
4713 systemId, parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004714 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004715 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004716 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004717 break;
4718 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004719 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004720 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004721 parser->m_notationDeclHandler(
4722 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
4723 0, parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004724 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004725 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004726 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004727 break;
4728 case XML_ROLE_ERROR:
4729 switch (tok) {
4730 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004731 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004732 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004733 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004734 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004735 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004736 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004737 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004738 }
4739#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004740 case XML_ROLE_IGNORE_SECT: {
4741 enum XML_Error result;
4742 if (parser->m_defaultHandler)
4743 reportDefault(parser, enc, s, next);
4744 handleDefault = XML_FALSE;
4745 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4746 if (result != XML_ERROR_NONE)
4747 return result;
4748 else if (! next) {
4749 parser->m_processor = ignoreSectionProcessor;
4750 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004751 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004752 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004753#endif /* XML_DTD */
4754 case XML_ROLE_GROUP_OPEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004755 if (parser->m_prologState.level >= parser->m_groupSize) {
4756 if (parser->m_groupSize) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004757 {
4758 char *const new_connector = (char *)REALLOC(
4759 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
4760 if (new_connector == NULL) {
4761 parser->m_groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004762 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004763 }
4764 parser->m_groupConnector = new_connector;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004765 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004766
4767 if (dtd->scaffIndex) {
4768 int *const new_scaff_index = (int *)REALLOC(
4769 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
4770 if (new_scaff_index == NULL)
4771 return XML_ERROR_NO_MEMORY;
4772 dtd->scaffIndex = new_scaff_index;
4773 }
4774 } else {
4775 parser->m_groupConnector
4776 = (char *)MALLOC(parser, parser->m_groupSize = 32);
4777 if (! parser->m_groupConnector) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004778 parser->m_groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004779 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004780 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004781 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004782 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004783 parser->m_groupConnector[parser->m_prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004784 if (dtd->in_eldecl) {
4785 int myindex = nextScaffoldPart(parser);
4786 if (myindex < 0)
4787 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004788 assert(dtd->scaffIndex != NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004789 dtd->scaffIndex[dtd->scaffLevel] = myindex;
4790 dtd->scaffLevel++;
4791 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004792 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004793 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004794 }
4795 break;
4796 case XML_ROLE_GROUP_SEQUENCE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004797 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004798 return XML_ERROR_SYNTAX;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004799 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
4800 if (dtd->in_eldecl && parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004801 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004802 break;
4803 case XML_ROLE_GROUP_CHOICE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004804 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004805 return XML_ERROR_SYNTAX;
4806 if (dtd->in_eldecl
Benjamin Peterson52b94082019-09-25 21:33:58 -07004807 && ! parser->m_groupConnector[parser->m_prologState.level]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004808 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
Benjamin Peterson52b94082019-09-25 21:33:58 -07004809 != XML_CTYPE_MIXED)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004810 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4811 = XML_CTYPE_CHOICE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004812 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004813 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004814 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004815 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004816 break;
4817 case XML_ROLE_PARAM_ENTITY_REF:
4818#ifdef XML_DTD
4819 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004820 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004821 if (! parser->m_paramEntityParsing)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004822 dtd->keepProcessing = dtd->standalone;
4823 else {
4824 const XML_Char *name;
4825 ENTITY *entity;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004826 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4827 next - enc->minBytesPerChar);
4828 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004829 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004830 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004831 poolDiscard(&dtd->pool);
4832 /* first, determine if a check for an existing declaration is needed;
4833 if yes, check that the entity exists, and that it is internal,
4834 otherwise call the skipped entity handler
4835 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004836 if (parser->m_prologState.documentEntity
4837 && (dtd->standalone ? ! parser->m_openInternalEntities
4838 : ! dtd->hasParamEntityRefs)) {
4839 if (! entity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004840 return XML_ERROR_UNDEFINED_ENTITY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004841 else if (! entity->is_internal) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004842 /* It's hard to exhaustively search the code to be sure,
4843 * but there doesn't seem to be a way of executing the
4844 * following line. There are two cases:
4845 *
4846 * If 'standalone' is false, the DTD must have no
4847 * parameter entities or we wouldn't have passed the outer
4848 * 'if' statement. That measn the only entity in the hash
4849 * table is the external subset name "#" which cannot be
4850 * given as a parameter entity name in XML syntax, so the
4851 * lookup must have returned NULL and we don't even reach
4852 * the test for an internal entity.
4853 *
4854 * If 'standalone' is true, it does not seem to be
4855 * possible to create entities taking this code path that
4856 * are not internal entities, so fail the test above.
4857 *
4858 * Because this analysis is very uncertain, the code is
4859 * being left in place and merely removed from the
4860 * coverage test statistics.
4861 */
4862 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
4863 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004864 } else if (! entity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004865 dtd->keepProcessing = dtd->standalone;
4866 /* cannot report skipped entities in declarations */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004867 if ((role == XML_ROLE_PARAM_ENTITY_REF)
4868 && parser->m_skippedEntityHandler) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004869 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004870 handleDefault = XML_FALSE;
4871 }
4872 break;
4873 }
4874 if (entity->open)
4875 return XML_ERROR_RECURSIVE_ENTITY_REF;
4876 if (entity->textPtr) {
4877 enum XML_Error result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004878 XML_Bool betweenDecl
4879 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00004880 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004881 if (result != XML_ERROR_NONE)
4882 return result;
4883 handleDefault = XML_FALSE;
4884 break;
4885 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004886 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004887 dtd->paramEntityRead = XML_FALSE;
4888 entity->open = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004889 if (! parser->m_externalEntityRefHandler(
4890 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4891 entity->systemId, entity->publicId)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004892 entity->open = XML_FALSE;
4893 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
4894 }
4895 entity->open = XML_FALSE;
4896 handleDefault = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004897 if (! dtd->paramEntityRead) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004898 dtd->keepProcessing = dtd->standalone;
4899 break;
4900 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004901 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004902 dtd->keepProcessing = dtd->standalone;
4903 break;
4904 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004905 }
4906#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004907 if (! dtd->standalone && parser->m_notStandaloneHandler
4908 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004909 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004910 break;
4911
Benjamin Peterson52b94082019-09-25 21:33:58 -07004912 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004913
4914 case XML_ROLE_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004915 if (parser->m_elementDeclHandler) {
4916 parser->m_declElementType = getElementType(parser, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004917 if (! parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004918 return XML_ERROR_NO_MEMORY;
4919 dtd->scaffLevel = 0;
4920 dtd->scaffCount = 0;
4921 dtd->in_eldecl = XML_TRUE;
4922 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004923 }
4924 break;
4925
4926 case XML_ROLE_CONTENT_ANY:
4927 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004928 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004929 if (parser->m_elementDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004930 XML_Content *content
4931 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
4932 if (! content)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004933 return XML_ERROR_NO_MEMORY;
4934 content->quant = XML_CQUANT_NONE;
4935 content->name = NULL;
4936 content->numchildren = 0;
4937 content->children = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004938 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
4939 : XML_CTYPE_EMPTY);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004940 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004941 parser->m_elementDeclHandler(
4942 parser->m_handlerArg, parser->m_declElementType->name, content);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004943 handleDefault = XML_FALSE;
4944 }
4945 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004946 }
4947 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004948
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004949 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004950 if (dtd->in_eldecl) {
4951 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
4952 = XML_CTYPE_MIXED;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004953 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004954 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004955 }
4956 break;
4957
4958 case XML_ROLE_CONTENT_ELEMENT:
4959 quant = XML_CQUANT_NONE;
4960 goto elementContent;
4961 case XML_ROLE_CONTENT_ELEMENT_OPT:
4962 quant = XML_CQUANT_OPT;
4963 goto elementContent;
4964 case XML_ROLE_CONTENT_ELEMENT_REP:
4965 quant = XML_CQUANT_REP;
4966 goto elementContent;
4967 case XML_ROLE_CONTENT_ELEMENT_PLUS:
4968 quant = XML_CQUANT_PLUS;
4969 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004970 if (dtd->in_eldecl) {
4971 ELEMENT_TYPE *el;
4972 const XML_Char *name;
4973 int nameLen;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004974 const char *nxt
4975 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004976 int myindex = nextScaffoldPart(parser);
4977 if (myindex < 0)
4978 return XML_ERROR_NO_MEMORY;
4979 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
4980 dtd->scaffold[myindex].quant = quant;
4981 el = getElementType(parser, enc, s, nxt);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004982 if (! el)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004983 return XML_ERROR_NO_MEMORY;
4984 name = el->name;
4985 dtd->scaffold[myindex].name = name;
4986 nameLen = 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004987 for (; name[nameLen++];)
4988 ;
4989 dtd->contentStringLen += nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004990 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004991 handleDefault = XML_FALSE;
4992 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004993 break;
4994
4995 case XML_ROLE_GROUP_CLOSE:
4996 quant = XML_CQUANT_NONE;
4997 goto closeGroup;
4998 case XML_ROLE_GROUP_CLOSE_OPT:
4999 quant = XML_CQUANT_OPT;
5000 goto closeGroup;
5001 case XML_ROLE_GROUP_CLOSE_REP:
5002 quant = XML_CQUANT_REP;
5003 goto closeGroup;
5004 case XML_ROLE_GROUP_CLOSE_PLUS:
5005 quant = XML_CQUANT_PLUS;
5006 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005007 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005008 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005009 handleDefault = XML_FALSE;
5010 dtd->scaffLevel--;
5011 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5012 if (dtd->scaffLevel == 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005013 if (! handleDefault) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005014 XML_Content *model = build_model(parser);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005015 if (! model)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005016 return XML_ERROR_NO_MEMORY;
5017 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005018 parser->m_elementDeclHandler(
5019 parser->m_handlerArg, parser->m_declElementType->name, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005020 }
5021 dtd->in_eldecl = XML_FALSE;
5022 dtd->contentStringLen = 0;
5023 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005024 }
5025 break;
5026 /* End element declaration stuff */
5027
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005028 case XML_ROLE_PI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005029 if (! reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005030 return XML_ERROR_NO_MEMORY;
5031 handleDefault = XML_FALSE;
5032 break;
5033 case XML_ROLE_COMMENT:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005034 if (! reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005035 return XML_ERROR_NO_MEMORY;
5036 handleDefault = XML_FALSE;
5037 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005038 case XML_ROLE_NONE:
5039 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005040 case XML_TOK_BOM:
5041 handleDefault = XML_FALSE;
5042 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005043 }
5044 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005045 case XML_ROLE_DOCTYPE_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005046 if (parser->m_startDoctypeDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005047 handleDefault = XML_FALSE;
5048 break;
5049 case XML_ROLE_ENTITY_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005050 if (dtd->keepProcessing && parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005051 handleDefault = XML_FALSE;
5052 break;
5053 case XML_ROLE_NOTATION_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005054 if (parser->m_notationDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005055 handleDefault = XML_FALSE;
5056 break;
5057 case XML_ROLE_ATTLIST_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005058 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005059 handleDefault = XML_FALSE;
5060 break;
5061 case XML_ROLE_ELEMENT_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005062 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005063 handleDefault = XML_FALSE;
5064 break;
5065 } /* end of big switch */
5066
Benjamin Peterson4e211002018-06-26 19:25:45 -07005067 if (handleDefault && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005068 reportDefault(parser, enc, s, next);
5069
Benjamin Peterson4e211002018-06-26 19:25:45 -07005070 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005071 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005072 *nextPtr = next;
5073 return XML_ERROR_NONE;
5074 case XML_FINISHED:
5075 return XML_ERROR_ABORTED;
5076 default:
5077 s = next;
5078 tok = XmlPrologTok(enc, s, end, &next);
5079 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005080 }
5081 /* not reached */
5082}
5083
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005084static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005085epilogProcessor(XML_Parser parser, const char *s, const char *end,
5086 const char **nextPtr) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005087 parser->m_processor = epilogProcessor;
5088 parser->m_eventPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005089 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005090 const char *next = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005091 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5092 parser->m_eventEndPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005093 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005094 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005095 case -XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005096 if (parser->m_defaultHandler) {
5097 reportDefault(parser, parser->m_encoding, s, next);
5098 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005099 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005100 }
Fred Drake31d485c2004-08-03 07:06:22 +00005101 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005102 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005103 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005104 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005105 return XML_ERROR_NONE;
5106 case XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005107 if (parser->m_defaultHandler)
5108 reportDefault(parser, parser->m_encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005109 break;
5110 case XML_TOK_PI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005111 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005112 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005113 break;
5114 case XML_TOK_COMMENT:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005115 if (! reportComment(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005116 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005117 break;
5118 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005119 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005120 return XML_ERROR_INVALID_TOKEN;
5121 case XML_TOK_PARTIAL:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005122 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005123 *nextPtr = s;
5124 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005125 }
5126 return XML_ERROR_UNCLOSED_TOKEN;
5127 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005128 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005129 *nextPtr = s;
5130 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005131 }
5132 return XML_ERROR_PARTIAL_CHAR;
5133 default:
5134 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5135 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005136 parser->m_eventPtr = s = next;
5137 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005138 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005139 *nextPtr = next;
5140 return XML_ERROR_NONE;
5141 case XML_FINISHED:
5142 return XML_ERROR_ABORTED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005143 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00005144 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005145 }
5146}
5147
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005148static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07005149processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00005150 const char *textStart, *textEnd;
5151 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005152 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005153 OPEN_INTERNAL_ENTITY *openEntity;
5154
Benjamin Peterson4e211002018-06-26 19:25:45 -07005155 if (parser->m_freeInternalEntities) {
5156 openEntity = parser->m_freeInternalEntities;
5157 parser->m_freeInternalEntities = openEntity->next;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005158 } else {
5159 openEntity
5160 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5161 if (! openEntity)
Fred Drake31d485c2004-08-03 07:06:22 +00005162 return XML_ERROR_NO_MEMORY;
5163 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005164 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005165 entity->processed = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005166 openEntity->next = parser->m_openInternalEntities;
5167 parser->m_openInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005168 openEntity->entity = entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005169 openEntity->startTagLevel = parser->m_tagLevel;
Fred Drake31d485c2004-08-03 07:06:22 +00005170 openEntity->betweenDecl = betweenDecl;
5171 openEntity->internalEventPtr = NULL;
5172 openEntity->internalEventEndPtr = NULL;
5173 textStart = (char *)entity->textPtr;
5174 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005175 /* Set a safe default value in case 'next' does not get set */
5176 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005177
5178#ifdef XML_DTD
5179 if (entity->is_param) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005180 int tok
5181 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5182 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5183 tok, next, &next, XML_FALSE, XML_FALSE);
5184 } else
Fred Drake31d485c2004-08-03 07:06:22 +00005185#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005186 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
5187 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005188
5189 if (result == XML_ERROR_NONE) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005190 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005191 entity->processed = (int)(next - textStart);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005192 parser->m_processor = internalEntityProcessor;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005193 } else {
Fred Drake31d485c2004-08-03 07:06:22 +00005194 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005195 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005196 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005197 openEntity->next = parser->m_freeInternalEntities;
5198 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005199 }
5200 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005201 return result;
5202}
5203
Fred Drake31d485c2004-08-03 07:06:22 +00005204static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005205internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5206 const char **nextPtr) {
Fred Drake31d485c2004-08-03 07:06:22 +00005207 ENTITY *entity;
5208 const char *textStart, *textEnd;
5209 const char *next;
5210 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005211 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005212 if (! openEntity)
Fred Drake31d485c2004-08-03 07:06:22 +00005213 return XML_ERROR_UNEXPECTED_STATE;
5214
5215 entity = openEntity->entity;
5216 textStart = ((char *)entity->textPtr) + entity->processed;
5217 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005218 /* Set a safe default value in case 'next' does not get set */
5219 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005220
5221#ifdef XML_DTD
5222 if (entity->is_param) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005223 int tok
5224 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5225 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
5226 tok, next, &next, XML_FALSE, XML_TRUE);
5227 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005228#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005229 result = doContent(parser, openEntity->startTagLevel,
5230 parser->m_internalEncoding, textStart, textEnd, &next,
5231 XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005232
5233 if (result != XML_ERROR_NONE)
5234 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005235 else if (textEnd != next
5236 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005237 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005238 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005239 } else {
Fred Drake31d485c2004-08-03 07:06:22 +00005240 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005241 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005242 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005243 openEntity->next = parser->m_freeInternalEntities;
5244 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005245 }
5246
5247#ifdef XML_DTD
5248 if (entity->is_param) {
5249 int tok;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005250 parser->m_processor = prologProcessor;
5251 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5252 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
Benjamin Peterson52b94082019-09-25 21:33:58 -07005253 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
5254 } else
Fred Drake31d485c2004-08-03 07:06:22 +00005255#endif /* XML_DTD */
5256 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005257 parser->m_processor = contentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005258 /* see externalEntityContentProcessor vs contentProcessor */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005259 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
5260 s, end, nextPtr,
5261 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005262 }
Fred Drake31d485c2004-08-03 07:06:22 +00005263}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005264
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005265static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005266errorProcessor(XML_Parser parser, const char *s, const char *end,
5267 const char **nextPtr) {
5268 UNUSED_P(s);
5269 UNUSED_P(end);
5270 UNUSED_P(nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005271 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005272}
5273
5274static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005275storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
Benjamin Peterson52b94082019-09-25 21:33:58 -07005276 const char *ptr, const char *end, STRING_POOL *pool) {
5277 enum XML_Error result
5278 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005279 if (result)
5280 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005281 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005282 poolChop(pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005283 if (! poolAppendChar(pool, XML_T('\0')))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005284 return XML_ERROR_NO_MEMORY;
5285 return XML_ERROR_NONE;
5286}
5287
5288static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005289appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
Benjamin Peterson52b94082019-09-25 21:33:58 -07005290 const char *ptr, const char *end, STRING_POOL *pool) {
5291 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005292 for (;;) {
5293 const char *next;
5294 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5295 switch (tok) {
5296 case XML_TOK_NONE:
5297 return XML_ERROR_NONE;
5298 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005299 if (enc == parser->m_encoding)
5300 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005301 return XML_ERROR_INVALID_TOKEN;
5302 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005303 if (enc == parser->m_encoding)
5304 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005305 return XML_ERROR_INVALID_TOKEN;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005306 case XML_TOK_CHAR_REF: {
5307 XML_Char buf[XML_ENCODE_MAX];
5308 int i;
5309 int n = XmlCharRefNumber(enc, ptr);
5310 if (n < 0) {
5311 if (enc == parser->m_encoding)
5312 parser->m_eventPtr = ptr;
5313 return XML_ERROR_BAD_CHAR_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005314 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005315 if (! isCdata && n == 0x20 /* space */
5316 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5317 break;
5318 n = XmlEncode(n, (ICHAR *)buf);
5319 /* The XmlEncode() functions can never return 0 here. That
5320 * error return happens if the code point passed in is either
5321 * negative or greater than or equal to 0x110000. The
5322 * XmlCharRefNumber() functions will all return a number
5323 * strictly less than 0x110000 or a negative value if an error
5324 * occurred. The negative value is intercepted above, so
5325 * XmlEncode() is never passed a value it might return an
5326 * error for.
5327 */
5328 for (i = 0; i < n; i++) {
5329 if (! poolAppendChar(pool, buf[i]))
5330 return XML_ERROR_NO_MEMORY;
5331 }
5332 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005333 case XML_TOK_DATA_CHARS:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005334 if (! poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005335 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005336 break;
5337 case XML_TOK_TRAILING_CR:
5338 next = ptr + enc->minBytesPerChar;
5339 /* fall through */
5340 case XML_TOK_ATTRIBUTE_VALUE_S:
5341 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005342 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005343 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005344 if (! poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005345 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005346 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005347 case XML_TOK_ENTITY_REF: {
5348 const XML_Char *name;
5349 ENTITY *entity;
5350 char checkEntityDecl;
5351 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5352 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5353 if (ch) {
5354 if (! poolAppendChar(pool, ch))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005355 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005356 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005357 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005358 name = poolStoreString(&parser->m_temp2Pool, enc,
5359 ptr + enc->minBytesPerChar,
5360 next - enc->minBytesPerChar);
5361 if (! name)
5362 return XML_ERROR_NO_MEMORY;
5363 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5364 poolDiscard(&parser->m_temp2Pool);
5365 /* First, determine if a check for an existing declaration is needed;
5366 if yes, check that the entity exists, and that it is internal.
5367 */
5368 if (pool == &dtd->pool) /* are we called from prolog? */
5369 checkEntityDecl =
5370#ifdef XML_DTD
5371 parser->m_prologState.documentEntity &&
5372#endif /* XML_DTD */
5373 (dtd->standalone ? ! parser->m_openInternalEntities
5374 : ! dtd->hasParamEntityRefs);
5375 else /* if (pool == &parser->m_tempPool): we are called from content */
5376 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5377 if (checkEntityDecl) {
5378 if (! entity)
5379 return XML_ERROR_UNDEFINED_ENTITY;
5380 else if (! entity->is_internal)
5381 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5382 } else if (! entity) {
5383 /* Cannot report skipped entity here - see comments on
5384 parser->m_skippedEntityHandler.
5385 if (parser->m_skippedEntityHandler)
5386 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5387 */
5388 /* Cannot call the default handler because this would be
5389 out of sync with the call to the startElementHandler.
5390 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5391 reportDefault(parser, enc, ptr, next);
5392 */
5393 break;
5394 }
5395 if (entity->open) {
5396 if (enc == parser->m_encoding) {
5397 /* It does not appear that this line can be executed.
5398 *
5399 * The "if (entity->open)" check catches recursive entity
5400 * definitions. In order to be called with an open
5401 * entity, it must have gone through this code before and
5402 * been through the recursive call to
5403 * appendAttributeValue() some lines below. That call
5404 * sets the local encoding ("enc") to the parser's
5405 * internal encoding (internal_utf8 or internal_utf16),
5406 * which can never be the same as the principle encoding.
5407 * It doesn't appear there is another code path that gets
5408 * here with entity->open being TRUE.
5409 *
5410 * Since it is not certain that this logic is watertight,
5411 * we keep the line and merely exclude it from coverage
5412 * tests.
5413 */
5414 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5415 }
5416 return XML_ERROR_RECURSIVE_ENTITY_REF;
5417 }
5418 if (entity->notation) {
5419 if (enc == parser->m_encoding)
5420 parser->m_eventPtr = ptr;
5421 return XML_ERROR_BINARY_ENTITY_REF;
5422 }
5423 if (! entity->textPtr) {
5424 if (enc == parser->m_encoding)
5425 parser->m_eventPtr = ptr;
5426 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5427 } else {
5428 enum XML_Error result;
5429 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5430 entity->open = XML_TRUE;
5431 result = appendAttributeValue(parser, parser->m_internalEncoding,
5432 isCdata, (char *)entity->textPtr,
5433 (char *)textEnd, pool);
5434 entity->open = XML_FALSE;
5435 if (result)
5436 return result;
5437 }
5438 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005439 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005440 /* The only token returned by XmlAttributeValueTok() that does
5441 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5442 * Getting that would require an entity name to contain an
5443 * incomplete XML character (e.g. \xE2\x82); however previous
5444 * tokenisers will have already recognised and rejected such
5445 * names before XmlAttributeValueTok() gets a look-in. This
5446 * default case should be retained as a safety net, but the code
5447 * excluded from coverage tests.
5448 *
5449 * LCOV_EXCL_START
5450 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005451 if (enc == parser->m_encoding)
5452 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005453 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005454 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005455 }
5456 ptr = next;
5457 }
5458 /* not reached */
5459}
5460
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005461static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07005462storeEntityValue(XML_Parser parser, const ENCODING *enc,
5463 const char *entityTextPtr, const char *entityTextEnd) {
5464 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005465 STRING_POOL *pool = &(dtd->entityValuePool);
5466 enum XML_Error result = XML_ERROR_NONE;
5467#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005468 int oldInEntityValue = parser->m_prologState.inEntityValue;
5469 parser->m_prologState.inEntityValue = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005470#endif /* XML_DTD */
5471 /* never return Null for the value argument in EntityDeclHandler,
5472 since this would indicate an external entity; therefore we
5473 have to make sure that entityValuePool.start is not null */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005474 if (! pool->blocks) {
5475 if (! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005476 return XML_ERROR_NO_MEMORY;
5477 }
5478
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005479 for (;;) {
5480 const char *next;
5481 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5482 switch (tok) {
5483 case XML_TOK_PARAM_ENTITY_REF:
5484#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005485 if (parser->m_isParamEntity || enc != parser->m_encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005486 const XML_Char *name;
5487 ENTITY *entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005488 name = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005489 entityTextPtr + enc->minBytesPerChar,
5490 next - enc->minBytesPerChar);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005491 if (! name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005492 result = XML_ERROR_NO_MEMORY;
5493 goto endEntityValue;
5494 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005495 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005496 poolDiscard(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005497 if (! entity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005498 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5499 /* cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005500 parser->m_skippedEntityHandler
5501 if (parser->m_skippedEntityHandler)
5502 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005503 */
5504 dtd->keepProcessing = dtd->standalone;
5505 goto endEntityValue;
5506 }
5507 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005508 if (enc == parser->m_encoding)
5509 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005510 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5511 goto endEntityValue;
5512 }
5513 if (entity->systemId) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005514 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005515 dtd->paramEntityRead = XML_FALSE;
5516 entity->open = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005517 if (! parser->m_externalEntityRefHandler(
5518 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5519 entity->systemId, entity->publicId)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005520 entity->open = XML_FALSE;
5521 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5522 goto endEntityValue;
5523 }
5524 entity->open = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005525 if (! dtd->paramEntityRead)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005526 dtd->keepProcessing = dtd->standalone;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005527 } else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005528 dtd->keepProcessing = dtd->standalone;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005529 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005530 entity->open = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005531 result = storeEntityValue(
5532 parser, parser->m_internalEncoding, (char *)entity->textPtr,
5533 (char *)(entity->textPtr + entity->textLen));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005534 entity->open = XML_FALSE;
5535 if (result)
5536 goto endEntityValue;
5537 }
5538 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005539 }
5540#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005541 /* In the internal subset, PE references are not legal
5542 within markup declarations, e.g entity values in this case. */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005543 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005544 result = XML_ERROR_PARAM_ENTITY_REF;
5545 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005546 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005547 result = XML_ERROR_NONE;
5548 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005549 case XML_TOK_ENTITY_REF:
5550 case XML_TOK_DATA_CHARS:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005551 if (! poolAppend(pool, enc, entityTextPtr, next)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005552 result = XML_ERROR_NO_MEMORY;
5553 goto endEntityValue;
5554 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005555 break;
5556 case XML_TOK_TRAILING_CR:
5557 next = entityTextPtr + enc->minBytesPerChar;
5558 /* fall through */
5559 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005560 if (pool->end == pool->ptr && ! poolGrow(pool)) {
5561 result = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005562 goto endEntityValue;
5563 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005564 *(pool->ptr)++ = 0xA;
5565 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005566 case XML_TOK_CHAR_REF: {
5567 XML_Char buf[XML_ENCODE_MAX];
5568 int i;
5569 int n = XmlCharRefNumber(enc, entityTextPtr);
5570 if (n < 0) {
5571 if (enc == parser->m_encoding)
5572 parser->m_eventPtr = entityTextPtr;
5573 result = XML_ERROR_BAD_CHAR_REF;
5574 goto endEntityValue;
5575 }
5576 n = XmlEncode(n, (ICHAR *)buf);
5577 /* The XmlEncode() functions can never return 0 here. That
5578 * error return happens if the code point passed in is either
5579 * negative or greater than or equal to 0x110000. The
5580 * XmlCharRefNumber() functions will all return a number
5581 * strictly less than 0x110000 or a negative value if an error
5582 * occurred. The negative value is intercepted above, so
5583 * XmlEncode() is never passed a value it might return an
5584 * error for.
5585 */
5586 for (i = 0; i < n; i++) {
5587 if (pool->end == pool->ptr && ! poolGrow(pool)) {
5588 result = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005589 goto endEntityValue;
5590 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005591 *(pool->ptr)++ = buf[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005592 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005593 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005594 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005595 if (enc == parser->m_encoding)
5596 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005597 result = XML_ERROR_INVALID_TOKEN;
5598 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005599 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005600 if (enc == parser->m_encoding)
5601 parser->m_eventPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005602 result = XML_ERROR_INVALID_TOKEN;
5603 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005604 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005605 /* This default case should be unnecessary -- all the tokens
5606 * that XmlEntityValueTok() can return have their own explicit
5607 * cases -- but should be retained for safety. We do however
5608 * exclude it from the coverage statistics.
5609 *
5610 * LCOV_EXCL_START
5611 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005612 if (enc == parser->m_encoding)
5613 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005614 result = XML_ERROR_UNEXPECTED_STATE;
5615 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005616 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005617 }
5618 entityTextPtr = next;
5619 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005620endEntityValue:
5621#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005622 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005623#endif /* XML_DTD */
5624 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005625}
5626
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005627static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005628normalizeLines(XML_Char *s) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005629 XML_Char *p;
5630 for (;; s++) {
5631 if (*s == XML_T('\0'))
5632 return;
5633 if (*s == 0xD)
5634 break;
5635 }
5636 p = s;
5637 do {
5638 if (*s == 0xD) {
5639 *p++ = 0xA;
5640 if (*++s == 0xA)
5641 s++;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005642 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005643 *p++ = *s++;
5644 } while (*s);
5645 *p = XML_T('\0');
5646}
5647
5648static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005649reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
Benjamin Peterson52b94082019-09-25 21:33:58 -07005650 const char *start, const char *end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005651 const XML_Char *target;
5652 XML_Char *data;
5653 const char *tem;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005654 if (! parser->m_processingInstructionHandler) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005655 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005656 reportDefault(parser, enc, start, end);
5657 return 1;
5658 }
5659 start += enc->minBytesPerChar * 2;
5660 tem = start + XmlNameLength(enc, start);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005661 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005662 if (! target)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005663 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005664 poolFinish(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005665 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
5666 end - enc->minBytesPerChar * 2);
5667 if (! data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005668 return 0;
5669 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005670 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
5671 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005672 return 1;
5673}
5674
5675static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07005676reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
5677 const char *end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005678 XML_Char *data;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005679 if (! parser->m_commentHandler) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005680 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005681 reportDefault(parser, enc, start, end);
5682 return 1;
5683 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005684 data = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005685 start + enc->minBytesPerChar * 4,
5686 end - enc->minBytesPerChar * 3);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005687 if (! data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005688 return 0;
5689 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005690 parser->m_commentHandler(parser->m_handlerArg, data);
5691 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005692 return 1;
5693}
5694
5695static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07005696reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
5697 const char *end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005698 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005699 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005700 const char **eventPP;
5701 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005702 if (enc == parser->m_encoding) {
5703 eventPP = &parser->m_eventPtr;
5704 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005705 } else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005706 /* To get here, two things must be true; the parser must be
5707 * using a character encoding that is not the same as the
5708 * encoding passed in, and the encoding passed in must need
5709 * conversion to the internal format (UTF-8 unless XML_UNICODE
5710 * is defined). The only occasions on which the encoding passed
5711 * in is not the same as the parser's encoding are when it is
5712 * the internal encoding (e.g. a previously defined parameter
5713 * entity, already converted to internal format). This by
5714 * definition doesn't need conversion, so the whole branch never
5715 * gets executed.
5716 *
5717 * For safety's sake we don't delete these lines and merely
5718 * exclude them from coverage statistics.
5719 *
5720 * LCOV_EXCL_START
5721 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005722 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
5723 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005724 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005725 }
5726 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005727 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005728 convert_res
5729 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005730 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005731 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
5732 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005733 *eventPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005734 } while ((convert_res != XML_CONVERT_COMPLETED)
5735 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
5736 } else
5737 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
5738 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005739}
5740
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005741static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005742defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
Benjamin Peterson52b94082019-09-25 21:33:58 -07005743 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005744 DEFAULT_ATTRIBUTE *att;
5745 if (value || isId) {
5746 /* The handling of default attributes gets messed up if we have
5747 a default which duplicates a non-default. */
5748 int i;
5749 for (i = 0; i < type->nDefaultAtts; i++)
5750 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005751 return 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005752 if (isId && ! type->idAtt && ! attId->xmlns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005753 type->idAtt = attId;
5754 }
5755 if (type->nDefaultAtts == type->allocDefaultAtts) {
5756 if (type->allocDefaultAtts == 0) {
5757 type->allocDefaultAtts = 8;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005758 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
5759 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
5760 if (! type->defaultAtts) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005761 type->allocDefaultAtts = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005762 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005763 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005764 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005765 DEFAULT_ATTRIBUTE *temp;
5766 int count = type->allocDefaultAtts * 2;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005767 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
5768 (count * sizeof(DEFAULT_ATTRIBUTE)));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005769 if (temp == NULL)
5770 return 0;
5771 type->allocDefaultAtts = count;
5772 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005773 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005774 }
5775 att = type->defaultAtts + type->nDefaultAtts;
5776 att->id = attId;
5777 att->value = value;
5778 att->isCdata = isCdata;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005779 if (! isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005780 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005781 type->nDefaultAtts += 1;
5782 return 1;
5783}
5784
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005785static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07005786setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
5787 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005788 const XML_Char *name;
5789 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005790 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005791 PREFIX *prefix;
5792 const XML_Char *s;
5793 for (s = elementType->name; s != name; s++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005794 if (! poolAppendChar(&dtd->pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005795 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005796 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005797 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005798 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005799 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005800 sizeof(PREFIX));
Benjamin Peterson52b94082019-09-25 21:33:58 -07005801 if (! prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005802 return 0;
5803 if (prefix->name == poolStart(&dtd->pool))
5804 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005805 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005806 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005807 elementType->prefix = prefix;
Benjamin Peterson3b03b092019-06-27 20:54:44 -07005808 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005809 }
5810 }
5811 return 1;
5812}
5813
5814static ATTRIBUTE_ID *
Benjamin Peterson52b94082019-09-25 21:33:58 -07005815getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
5816 const char *end) {
5817 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005818 ATTRIBUTE_ID *id;
5819 const XML_Char *name;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005820 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005821 return NULL;
5822 name = poolStoreString(&dtd->pool, enc, start, end);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005823 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005824 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00005825 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005826 ++name;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005827 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
5828 sizeof(ATTRIBUTE_ID));
5829 if (! id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005830 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005831 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005832 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005833 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005834 poolFinish(&dtd->pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005835 if (! parser->m_ns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005836 ;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005837 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
5838 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
5839 && name[4] == XML_T(ASCII_s)
5840 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005841 if (name[5] == XML_T('\0'))
5842 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005843 else
Benjamin Peterson52b94082019-09-25 21:33:58 -07005844 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
5845 sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005846 id->xmlns = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005847 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005848 int i;
5849 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00005850 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005851 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005852 int j;
5853 for (j = 0; j < i; j++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005854 if (! poolAppendChar(&dtd->pool, name[j]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005855 return NULL;
5856 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005857 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005858 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005859 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
5860 poolStart(&dtd->pool), sizeof(PREFIX));
5861 if (! id->prefix)
Benjamin Peterson196d7db2016-06-11 13:28:56 -07005862 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005863 if (id->prefix->name == poolStart(&dtd->pool))
5864 poolFinish(&dtd->pool);
5865 else
5866 poolDiscard(&dtd->pool);
5867 break;
5868 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005869 }
5870 }
5871 }
5872 return id;
5873}
5874
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005875#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005876
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005877static const XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07005878getContext(XML_Parser parser) {
5879 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005880 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005881 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005882
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005883 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005884 int i;
5885 int len;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005886 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005887 return NULL;
5888 len = dtd->defaultPrefix.binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005889 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005890 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005891 for (i = 0; i < len; i++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005892 if (! poolAppendChar(&parser->m_tempPool,
5893 dtd->defaultPrefix.binding->uri[i])) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005894 /* Because of memory caching, I don't believe this line can be
5895 * executed.
5896 *
5897 * This is part of a loop copying the default prefix binding
5898 * URI into the parser's temporary string pool. Previously,
5899 * that URI was copied into the same string pool, with a
5900 * terminating NUL character, as part of setContext(). When
5901 * the pool was cleared, that leaves a block definitely big
5902 * enough to hold the URI on the free block list of the pool.
5903 * The URI copy in getContext() therefore cannot run out of
5904 * memory.
5905 *
5906 * If the pool is used between the setContext() and
5907 * getContext() calls, the worst it can do is leave a bigger
5908 * block on the front of the free list. Given that this is
5909 * all somewhat inobvious and program logic can be changed, we
5910 * don't delete the line but we do exclude it from the test
5911 * coverage statistics.
5912 */
5913 return NULL; /* LCOV_EXCL_LINE */
5914 }
5915 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005916 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005917 }
5918
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005919 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005920 for (;;) {
5921 int i;
5922 int len;
5923 const XML_Char *s;
5924 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005925 if (! prefix)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005926 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005927 if (! prefix->binding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005928 /* This test appears to be (justifiable) paranoia. There does
5929 * not seem to be a way of injecting a prefix without a binding
5930 * that doesn't get errored long before this function is called.
5931 * The test should remain for safety's sake, so we instead
5932 * exclude the following line from the coverage statistics.
5933 */
5934 continue; /* LCOV_EXCL_LINE */
5935 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005936 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005937 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005938 for (s = prefix->name; *s; s++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07005939 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005940 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005941 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005942 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005943 len = prefix->binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005944 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005945 len--;
5946 for (i = 0; i < len; i++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07005947 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005948 return NULL;
5949 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005950 }
5951
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005952 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005953 for (;;) {
5954 const XML_Char *s;
5955 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005956 if (! e)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005957 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005958 if (! e->open)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005959 continue;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005960 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005961 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005962 for (s = e->name; *s; s++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07005963 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005964 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005965 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005966 }
5967
Benjamin Peterson52b94082019-09-25 21:33:58 -07005968 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005969 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005970 return parser->m_tempPool.start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005971}
5972
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005973static XML_Bool
Benjamin Peterson52b94082019-09-25 21:33:58 -07005974setContext(XML_Parser parser, const XML_Char *context) {
5975 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005976 const XML_Char *s = context;
5977
5978 while (*context != XML_T('\0')) {
5979 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
5980 ENTITY *e;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005981 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005982 return XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005983 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
5984 poolStart(&parser->m_tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005985 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005986 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005987 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005988 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005989 context = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005990 poolDiscard(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005991 } else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005992 PREFIX *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005993 if (poolLength(&parser->m_tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005994 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005995 else {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005996 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005997 return XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005998 prefix
5999 = (PREFIX *)lookup(parser, &dtd->prefixes,
6000 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6001 if (! prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006002 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006003 if (prefix->name == poolStart(&parser->m_tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006004 prefix->name = poolCopyString(&dtd->pool, prefix->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006005 if (! prefix->name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006006 return XML_FALSE;
6007 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006008 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006009 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006010 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006011 context++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006012 if (! poolAppendChar(&parser->m_tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006013 return XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006014 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006015 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006016 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
Benjamin Peterson52b94082019-09-25 21:33:58 -07006017 &parser->m_inheritedBindings)
6018 != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006019 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006020 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006021 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006022 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006023 s = context;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006024 } else {
6025 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006026 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006027 s++;
6028 }
6029 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006030 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006031}
6032
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006033static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006034normalizePublicId(XML_Char *publicId) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006035 XML_Char *p = publicId;
6036 XML_Char *s;
6037 for (s = publicId; *s; s++) {
6038 switch (*s) {
6039 case 0x20:
6040 case 0xD:
6041 case 0xA:
6042 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006043 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006044 break;
6045 default:
6046 *p++ = *s;
6047 }
6048 }
6049 if (p != publicId && p[-1] == 0x20)
6050 --p;
6051 *p = XML_T('\0');
6052}
6053
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006054static DTD *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006055dtdCreate(const XML_Memory_Handling_Suite *ms) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006056 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6057 if (p == NULL)
6058 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006059 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006060 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006061 hashTableInit(&(p->generalEntities), ms);
6062 hashTableInit(&(p->elementTypes), ms);
6063 hashTableInit(&(p->attributeIds), ms);
6064 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006065#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006066 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006067 hashTableInit(&(p->paramEntities), ms);
6068#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006069 p->defaultPrefix.name = NULL;
6070 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006071
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006072 p->in_eldecl = XML_FALSE;
6073 p->scaffIndex = NULL;
6074 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006075 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006076 p->scaffSize = 0;
6077 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006078 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006079
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006080 p->keepProcessing = XML_TRUE;
6081 p->hasParamEntityRefs = XML_FALSE;
6082 p->standalone = XML_FALSE;
6083 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006084}
6085
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006086static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006087dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006088 HASH_TABLE_ITER iter;
6089 hashTableIterInit(&iter, &(p->elementTypes));
6090 for (;;) {
6091 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006092 if (! e)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006093 break;
6094 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006095 ms->free_fcn(e->defaultAtts);
6096 }
6097 hashTableClear(&(p->generalEntities));
6098#ifdef XML_DTD
6099 p->paramEntityRead = XML_FALSE;
6100 hashTableClear(&(p->paramEntities));
6101#endif /* XML_DTD */
6102 hashTableClear(&(p->elementTypes));
6103 hashTableClear(&(p->attributeIds));
6104 hashTableClear(&(p->prefixes));
6105 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006106 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006107 p->defaultPrefix.name = NULL;
6108 p->defaultPrefix.binding = NULL;
6109
6110 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006111
6112 ms->free_fcn(p->scaffIndex);
6113 p->scaffIndex = NULL;
6114 ms->free_fcn(p->scaffold);
6115 p->scaffold = NULL;
6116
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006117 p->scaffLevel = 0;
6118 p->scaffSize = 0;
6119 p->scaffCount = 0;
6120 p->contentStringLen = 0;
6121
6122 p->keepProcessing = XML_TRUE;
6123 p->hasParamEntityRefs = XML_FALSE;
6124 p->standalone = XML_FALSE;
6125}
6126
6127static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006128dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006129 HASH_TABLE_ITER iter;
6130 hashTableIterInit(&iter, &(p->elementTypes));
6131 for (;;) {
6132 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006133 if (! e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006134 break;
6135 if (e->allocDefaultAtts != 0)
6136 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006137 }
6138 hashTableDestroy(&(p->generalEntities));
6139#ifdef XML_DTD
6140 hashTableDestroy(&(p->paramEntities));
6141#endif /* XML_DTD */
6142 hashTableDestroy(&(p->elementTypes));
6143 hashTableDestroy(&(p->attributeIds));
6144 hashTableDestroy(&(p->prefixes));
6145 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006146 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006147 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006148 ms->free_fcn(p->scaffIndex);
6149 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006150 }
6151 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006152}
6153
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006154/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6155 The new DTD has already been initialized.
6156*/
6157static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07006158dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6159 const XML_Memory_Handling_Suite *ms) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006160 HASH_TABLE_ITER iter;
6161
6162 /* Copy the prefix table. */
6163
6164 hashTableIterInit(&iter, &(oldDtd->prefixes));
6165 for (;;) {
6166 const XML_Char *name;
6167 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006168 if (! oldP)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006169 break;
6170 name = poolCopyString(&(newDtd->pool), oldP->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006171 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006172 return 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006173 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006174 return 0;
6175 }
6176
6177 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6178
6179 /* Copy the attribute id table. */
6180
6181 for (;;) {
6182 ATTRIBUTE_ID *newA;
6183 const XML_Char *name;
6184 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6185
Benjamin Peterson52b94082019-09-25 21:33:58 -07006186 if (! oldA)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006187 break;
6188 /* Remember to allocate the scratch byte before the name. */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006189 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006190 return 0;
6191 name = poolCopyString(&(newDtd->pool), oldA->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006192 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006193 return 0;
6194 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006195 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006196 sizeof(ATTRIBUTE_ID));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006197 if (! newA)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006198 return 0;
6199 newA->maybeTokenized = oldA->maybeTokenized;
6200 if (oldA->prefix) {
6201 newA->xmlns = oldA->xmlns;
6202 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006203 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006204 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006205 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006206 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006207 }
6208 }
6209
6210 /* Copy the element type table. */
6211
6212 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6213
6214 for (;;) {
6215 int i;
6216 ELEMENT_TYPE *newE;
6217 const XML_Char *name;
6218 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006219 if (! oldE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006220 break;
6221 name = poolCopyString(&(newDtd->pool), oldE->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006222 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006223 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006224 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006225 sizeof(ELEMENT_TYPE));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006226 if (! newE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006227 return 0;
6228 if (oldE->nDefaultAtts) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006229 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
6230 oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6231 if (! newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006232 return 0;
6233 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006234 }
6235 if (oldE->idAtt)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006236 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6237 oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006238 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6239 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006240 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006241 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006242 for (i = 0; i < newE->nDefaultAtts; i++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006243 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6244 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006245 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6246 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006247 newE->defaultAtts[i].value
6248 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006249 if (! newE->defaultAtts[i].value)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006250 return 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006251 } else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006252 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006253 }
6254 }
6255
6256 /* Copy the entity tables. */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006257 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6258 &(oldDtd->generalEntities)))
6259 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006260
6261#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07006262 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6263 &(oldDtd->paramEntities)))
6264 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006265 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006266#endif /* XML_DTD */
6267
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006268 newDtd->keepProcessing = oldDtd->keepProcessing;
6269 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006270 newDtd->standalone = oldDtd->standalone;
6271
6272 /* Don't want deep copying for scaffolding */
6273 newDtd->in_eldecl = oldDtd->in_eldecl;
6274 newDtd->scaffold = oldDtd->scaffold;
6275 newDtd->contentStringLen = oldDtd->contentStringLen;
6276 newDtd->scaffSize = oldDtd->scaffSize;
6277 newDtd->scaffLevel = oldDtd->scaffLevel;
6278 newDtd->scaffIndex = oldDtd->scaffIndex;
6279
6280 return 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006281} /* End dtdCopy */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006282
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006283static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07006284copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6285 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006286 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006287 const XML_Char *cachedOldBase = NULL;
6288 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006289
6290 hashTableIterInit(&iter, oldTable);
6291
6292 for (;;) {
6293 ENTITY *newE;
6294 const XML_Char *name;
6295 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006296 if (! oldE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006297 break;
6298 name = poolCopyString(newPool, oldE->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006299 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006300 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006301 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006302 if (! newE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006303 return 0;
6304 if (oldE->systemId) {
6305 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006306 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006307 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006308 newE->systemId = tem;
6309 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006310 if (oldE->base == cachedOldBase)
6311 newE->base = cachedNewBase;
6312 else {
6313 cachedOldBase = oldE->base;
6314 tem = poolCopyString(newPool, cachedOldBase);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006315 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006316 return 0;
6317 cachedNewBase = newE->base = tem;
6318 }
6319 }
6320 if (oldE->publicId) {
6321 tem = poolCopyString(newPool, oldE->publicId);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006322 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006323 return 0;
6324 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006325 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006326 } else {
6327 const XML_Char *tem
6328 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6329 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006330 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006331 newE->textPtr = tem;
6332 newE->textLen = oldE->textLen;
6333 }
6334 if (oldE->notation) {
6335 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006336 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006337 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006338 newE->notation = tem;
6339 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006340 newE->is_param = oldE->is_param;
6341 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006342 }
6343 return 1;
6344}
6345
Fred Drake08317ae2003-10-21 15:38:55 +00006346#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006347
Fred Drake08317ae2003-10-21 15:38:55 +00006348static XML_Bool FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006349keyeq(KEY s1, KEY s2) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006350 for (; *s1 == *s2; s1++, s2++)
6351 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006352 return XML_TRUE;
6353 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006354}
6355
Victor Stinner5ff71322017-06-21 14:39:22 +02006356static size_t
Benjamin Peterson52b94082019-09-25 21:33:58 -07006357keylen(KEY s) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006358 size_t len = 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006359 for (; *s; s++, len++)
6360 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02006361 return len;
6362}
6363
6364static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006365copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006366 key->k[0] = 0;
6367 key->k[1] = get_hash_secret_salt(parser);
6368}
6369
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006370static unsigned long FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006371hash(XML_Parser parser, KEY s) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006372 struct siphash state;
6373 struct sipkey key;
Victor Stinner5ff71322017-06-21 14:39:22 +02006374 (void)sip24_valid;
6375 copy_salt_to_sipkey(parser, &key);
6376 sip24_init(&state, &key);
6377 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6378 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006379}
6380
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006381static NAMED *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006382lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006383 size_t i;
6384 if (table->size == 0) {
6385 size_t tsize;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006386 if (! createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006387 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006388 table->power = INIT_POWER;
6389 /* table->size is a power of 2 */
6390 table->size = (size_t)1 << INIT_POWER;
6391 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006392 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006393 if (! table->v) {
Fred Drake31d485c2004-08-03 07:06:22 +00006394 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006395 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006396 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006397 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006398 i = hash(parser, name) & ((unsigned long)table->size - 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006399 } else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006400 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006401 unsigned long mask = (unsigned long)table->size - 1;
6402 unsigned char step = 0;
6403 i = h & mask;
6404 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006405 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006406 return table->v[i];
Benjamin Peterson52b94082019-09-25 21:33:58 -07006407 if (! step)
Fred Drake08317ae2003-10-21 15:38:55 +00006408 step = PROBE_STEP(h, mask, table->power);
6409 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006410 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006411 if (! createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006412 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006413
6414 /* check for overflow (table is half full) */
6415 if (table->used >> (table->power - 1)) {
6416 unsigned char newPower = table->power + 1;
6417 size_t newSize = (size_t)1 << newPower;
6418 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006419 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006420 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006421 if (! newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006422 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006423 memset(newV, 0, tsize);
6424 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006425 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006426 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006427 size_t j = newHash & newMask;
6428 step = 0;
6429 while (newV[j]) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006430 if (! step)
Fred Drake08317ae2003-10-21 15:38:55 +00006431 step = PROBE_STEP(newHash, newMask, newPower);
6432 j < step ? (j += newSize - step) : (j -= step);
6433 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006434 newV[j] = table->v[i];
6435 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006436 table->mem->free_fcn(table->v);
6437 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006438 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006439 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006440 i = h & newMask;
6441 step = 0;
6442 while (table->v[i]) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006443 if (! step)
Fred Drake08317ae2003-10-21 15:38:55 +00006444 step = PROBE_STEP(h, newMask, newPower);
6445 i < step ? (i += newSize - step) : (i -= step);
6446 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006447 }
6448 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006449 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006450 if (! table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006451 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006452 memset(table->v[i], 0, createSize);
6453 table->v[i]->name = name;
6454 (table->used)++;
6455 return table->v[i];
6456}
6457
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006458static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006459hashTableClear(HASH_TABLE *table) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006460 size_t i;
6461 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006462 table->mem->free_fcn(table->v[i]);
6463 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006464 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006465 table->used = 0;
6466}
6467
6468static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006469hashTableDestroy(HASH_TABLE *table) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006470 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006471 for (i = 0; i < table->size; i++)
6472 table->mem->free_fcn(table->v[i]);
6473 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006474}
6475
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006476static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006477hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
Fred Drake08317ae2003-10-21 15:38:55 +00006478 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006479 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006480 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006481 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006482 p->mem = ms;
6483}
6484
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006485static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006486hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006487 iter->p = table->v;
6488 iter->end = iter->p + table->size;
6489}
6490
Benjamin Peterson52b94082019-09-25 21:33:58 -07006491static NAMED *FASTCALL
6492hashTableIterNext(HASH_TABLE_ITER *iter) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006493 while (iter->p != iter->end) {
6494 NAMED *tem = *(iter->p)++;
6495 if (tem)
6496 return tem;
6497 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006498 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006499}
6500
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006501static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006502poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006503 pool->blocks = NULL;
6504 pool->freeBlocks = NULL;
6505 pool->start = NULL;
6506 pool->ptr = NULL;
6507 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006508 pool->mem = ms;
6509}
6510
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006511static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006512poolClear(STRING_POOL *pool) {
6513 if (! pool->freeBlocks)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006514 pool->freeBlocks = pool->blocks;
6515 else {
6516 BLOCK *p = pool->blocks;
6517 while (p) {
6518 BLOCK *tem = p->next;
6519 p->next = pool->freeBlocks;
6520 pool->freeBlocks = p;
6521 p = tem;
6522 }
6523 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006524 pool->blocks = NULL;
6525 pool->start = NULL;
6526 pool->ptr = NULL;
6527 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006528}
6529
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006530static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006531poolDestroy(STRING_POOL *pool) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006532 BLOCK *p = pool->blocks;
6533 while (p) {
6534 BLOCK *tem = p->next;
6535 pool->mem->free_fcn(p);
6536 p = tem;
6537 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006538 p = pool->freeBlocks;
6539 while (p) {
6540 BLOCK *tem = p->next;
6541 pool->mem->free_fcn(p);
6542 p = tem;
6543 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006544}
6545
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006546static XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006547poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
6548 const char *end) {
6549 if (! pool->ptr && ! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006550 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006551 for (;;) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006552 const enum XML_Convert_Result convert_res = XmlConvert(
6553 enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6554 if ((convert_res == XML_CONVERT_COMPLETED)
6555 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006556 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006557 if (! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006558 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006559 }
6560 return pool->start;
6561}
6562
Benjamin Peterson52b94082019-09-25 21:33:58 -07006563static const XML_Char *FASTCALL
6564poolCopyString(STRING_POOL *pool, const XML_Char *s) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006565 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006566 if (! poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006567 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006568 } while (*s++);
6569 s = pool->start;
6570 poolFinish(pool);
6571 return s;
6572}
6573
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006574static const XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006575poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
6576 if (! pool->ptr && ! poolGrow(pool)) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006577 /* The following line is unreachable given the current usage of
6578 * poolCopyStringN(). Currently it is called from exactly one
6579 * place to copy the text of a simple general entity. By that
6580 * point, the name of the entity is already stored in the pool, so
6581 * pool->ptr cannot be NULL.
6582 *
6583 * If poolCopyStringN() is used elsewhere as it well might be,
6584 * this line may well become executable again. Regardless, this
6585 * sort of check shouldn't be removed lightly, so we just exclude
6586 * it from the coverage statistics.
6587 */
6588 return NULL; /* LCOV_EXCL_LINE */
6589 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006590 for (; n > 0; --n, s++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006591 if (! poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006592 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006593 }
6594 s = pool->start;
6595 poolFinish(pool);
6596 return s;
6597}
6598
Benjamin Peterson52b94082019-09-25 21:33:58 -07006599static const XML_Char *FASTCALL
6600poolAppendString(STRING_POOL *pool, const XML_Char *s) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006601 while (*s) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006602 if (! poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006603 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006604 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006605 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006606 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006607}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006608
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006609static XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006610poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
6611 const char *end) {
6612 if (! poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006613 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006614 if (pool->ptr == pool->end && ! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006615 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006616 *(pool->ptr)++ = 0;
6617 return pool->start;
6618}
6619
Victor Stinner5ff71322017-06-21 14:39:22 +02006620static size_t
Benjamin Peterson52b94082019-09-25 21:33:58 -07006621poolBytesToAllocateFor(int blockSize) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006622 /* Unprotected math would be:
6623 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6624 **
6625 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6626 ** For a + b * c we check b * c in isolation first, so that addition of a
6627 ** on top has no chance of making us accept a small non-negative number
6628 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006629 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
Victor Stinner5ff71322017-06-21 14:39:22 +02006630
6631 if (blockSize <= 0)
6632 return 0;
6633
6634 if (blockSize > (int)(INT_MAX / stretch))
6635 return 0;
6636
6637 {
6638 const int stretchedBlockSize = blockSize * (int)stretch;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006639 const int bytesToAllocate
6640 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
Victor Stinner5ff71322017-06-21 14:39:22 +02006641 if (bytesToAllocate < 0)
6642 return 0;
6643
6644 return (size_t)bytesToAllocate;
6645 }
6646}
6647
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006648static XML_Bool FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006649poolGrow(STRING_POOL *pool) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006650 if (pool->freeBlocks) {
6651 if (pool->start == 0) {
6652 pool->blocks = pool->freeBlocks;
6653 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006654 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006655 pool->start = pool->blocks->s;
6656 pool->end = pool->start + pool->blocks->size;
6657 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006658 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006659 }
6660 if (pool->end - pool->start < pool->freeBlocks->size) {
6661 BLOCK *tem = pool->freeBlocks->next;
6662 pool->freeBlocks->next = pool->blocks;
6663 pool->blocks = pool->freeBlocks;
6664 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006665 memcpy(pool->blocks->s, pool->start,
6666 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006667 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6668 pool->start = pool->blocks->s;
6669 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006670 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006671 }
6672 }
6673 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006674 BLOCK *temp;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006675 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02006676 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006677
Benjamin Peterson4e211002018-06-26 19:25:45 -07006678 /* NOTE: Needs to be calculated prior to calling `realloc`
6679 to avoid dangling pointers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +02006680 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6681
6682 if (blockSize < 0) {
6683 /* This condition traps a situation where either more than
6684 * INT_MAX/2 bytes have already been allocated. This isn't
6685 * readily testable, since it is unlikely that an average
6686 * machine will have that much memory, so we exclude it from the
6687 * coverage statistics.
6688 */
6689 return XML_FALSE; /* LCOV_EXCL_LINE */
6690 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02006691
Victor Stinner5ff71322017-06-21 14:39:22 +02006692 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6693 if (bytesToAllocate == 0)
6694 return XML_FALSE;
6695
Benjamin Peterson52b94082019-09-25 21:33:58 -07006696 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
6697 (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006698 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006699 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006700 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006701 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006702 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006703 pool->start = pool->blocks->s;
6704 pool->end = pool->start + blockSize;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006705 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006706 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006707 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02006708 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006709
Victor Stinner93d0cb52017-08-18 23:43:54 +02006710 if (blockSize < 0) {
6711 /* This condition traps a situation where either more than
6712 * INT_MAX bytes have already been allocated (which is prevented
6713 * by various pieces of program logic, not least this one, never
6714 * mind the unlikelihood of actually having that much memory) or
6715 * the pool control fields have been corrupted (which could
6716 * conceivably happen in an extremely buggy user handler
6717 * function). Either way it isn't readily testable, so we
6718 * exclude it from the coverage statistics.
6719 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006720 return XML_FALSE; /* LCOV_EXCL_LINE */
Victor Stinner93d0cb52017-08-18 23:43:54 +02006721 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02006722
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006723 if (blockSize < INIT_BLOCK_SIZE)
6724 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02006725 else {
6726 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
6727 if ((int)((unsigned)blockSize * 2U) < 0) {
6728 return XML_FALSE;
6729 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006730 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02006731 }
6732
6733 bytesToAllocate = poolBytesToAllocateFor(blockSize);
6734 if (bytesToAllocate == 0)
6735 return XML_FALSE;
6736
6737 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006738 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006739 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006740 tem->size = blockSize;
6741 tem->next = pool->blocks;
6742 pool->blocks = tem;
6743 if (pool->ptr != pool->start)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006744 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006745 pool->ptr = tem->s + (pool->ptr - pool->start);
6746 pool->start = tem->s;
6747 pool->end = tem->s + blockSize;
6748 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006749 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006750}
6751
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006752static int FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006753nextScaffoldPart(XML_Parser parser) {
6754 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6755 CONTENT_SCAFFOLD *me;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006756 int next;
6757
Benjamin Peterson52b94082019-09-25 21:33:58 -07006758 if (! dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006759 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006760 if (! dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006761 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006762 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006763 }
6764
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006765 if (dtd->scaffCount >= dtd->scaffSize) {
6766 CONTENT_SCAFFOLD *temp;
6767 if (dtd->scaffold) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006768 temp = (CONTENT_SCAFFOLD *)REALLOC(
6769 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006770 if (temp == NULL)
6771 return -1;
6772 dtd->scaffSize *= 2;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006773 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006774 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
Benjamin Peterson52b94082019-09-25 21:33:58 -07006775 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006776 if (temp == NULL)
6777 return -1;
6778 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006779 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006780 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006781 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006782 next = dtd->scaffCount++;
6783 me = &dtd->scaffold[next];
6784 if (dtd->scaffLevel) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006785 CONTENT_SCAFFOLD *parent
6786 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006787 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006788 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006789 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006790 if (! parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006791 parent->firstchild = next;
6792 parent->lastchild = next;
6793 parent->childcnt++;
6794 }
6795 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
6796 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006797}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006798
6799static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006800build_node(XML_Parser parser, int src_node, XML_Content *dest,
6801 XML_Content **contpos, XML_Char **strpos) {
6802 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006803 dest->type = dtd->scaffold[src_node].type;
6804 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006805 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006806 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006807 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006808 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006809 for (;;) {
6810 *(*strpos)++ = *src;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006811 if (! *src)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006812 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006813 src++;
6814 }
6815 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006816 dest->children = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006817 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006818 unsigned int i;
6819 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006820 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006821 dest->children = *contpos;
6822 *contpos += dest->numchildren;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006823 for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006824 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006825 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
6826 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006827 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006828 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006829}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006830
6831static XML_Content *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006832build_model(XML_Parser parser) {
6833 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006834 XML_Content *ret;
6835 XML_Content *cpos;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006836 XML_Char *str;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006837 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
6838 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006839
Benjamin Peterson4e211002018-06-26 19:25:45 -07006840 ret = (XML_Content *)MALLOC(parser, allocsize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006841 if (! ret)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006842 return NULL;
6843
Benjamin Peterson52b94082019-09-25 21:33:58 -07006844 str = (XML_Char *)(&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006845 cpos = &ret[1];
6846
6847 build_node(parser, 0, ret, &cpos, &str);
6848 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006849}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006850
6851static ELEMENT_TYPE *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006852getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
6853 const char *end) {
6854 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006855 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006856 ELEMENT_TYPE *ret;
6857
Benjamin Peterson52b94082019-09-25 21:33:58 -07006858 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006859 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006860 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
6861 sizeof(ELEMENT_TYPE));
6862 if (! ret)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006863 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006864 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006865 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006866 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006867 poolFinish(&dtd->pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006868 if (! setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006869 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006870 }
6871 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006872}
Victor Stinner93d0cb52017-08-18 23:43:54 +02006873
6874static XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006875copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
6876 int charsRequired = 0;
6877 XML_Char *result;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006878
Benjamin Peterson52b94082019-09-25 21:33:58 -07006879 /* First determine how long the string is */
6880 while (s[charsRequired] != 0) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006881 charsRequired++;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006882 }
6883 /* Include the terminator */
6884 charsRequired++;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006885
Benjamin Peterson52b94082019-09-25 21:33:58 -07006886 /* Now allocate space for the copy */
6887 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
6888 if (result == NULL)
6889 return NULL;
6890 /* Copy the original into place */
6891 memcpy(result, s, charsRequired * sizeof(XML_Char));
6892 return result;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006893}