blob: 5ba56eaea6357a22c36152be8ac0b25641208955 [file] [log] [blame]
Miss Islington (bot)27067852021-08-29 07:32:50 -07001/* 8539b9040d9d901366a62560a064af7cb99811335784b363abc039c5b0ebc416 (2.4.1+)
Victor Stinner759e30e2017-09-05 01:58:08 +02002 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
Victor Stinner5ff71322017-06-21 14:39:22 +02008
Victor Stinner759e30e2017-09-05 01:58:08 +02009 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
Miss Islington (bot)27067852021-08-29 07:32:50 -070010 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <ssolie@users.sourceforge.net>
15 Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
16 Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed@nuxi.nl>
22 Copyright (c) 2017-2018 Rhodri James <rhodri@wildebeest.org.uk>
23 Copyright (c) 2017 Václav Slavík <vaclav@slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit@vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park@samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike@sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans@chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo@vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
Victor Stinner759e30e2017-09-05 01:58:08 +020035 Licensed under the MIT license:
36
37 Permission is hereby granted, free of charge, to any person obtaining
38 a copy of this software and associated documentation files (the
39 "Software"), to deal in the Software without restriction, including
40 without limitation the rights to use, copy, modify, merge, publish,
41 distribute, sublicense, and/or sell copies of the Software, and to permit
42 persons to whom the Software is furnished to do so, subject to the
43 following conditions:
44
45 The above copyright notice and this permission notice shall be included
46 in all copies or substantial portions of the Software.
47
48 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
49 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
50 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
51 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
52 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
53 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
54 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000055*/
56
Benjamin Peterson52b94082019-09-25 21:33:58 -070057#if ! defined(_GNU_SOURCE)
58# define _GNU_SOURCE 1 /* syscall prototype */
59#endif
60
61#ifdef _WIN32
62/* force stdlib to define rand_s() */
Miss Islington (bot)27067852021-08-29 07:32:50 -070063# if ! defined(_CRT_RAND_S)
64# define _CRT_RAND_S
65# endif
Victor Stinner93d0cb52017-08-18 23:43:54 +020066#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020067
Victor Stinner23ec4b52017-06-15 00:54:36 +020068#include <stddef.h>
Benjamin Peterson52b94082019-09-25 21:33:58 -070069#include <string.h> /* memset(), memcpy() */
Victor Stinner23ec4b52017-06-15 00:54:36 +020070#include <assert.h>
Benjamin Peterson52b94082019-09-25 21:33:58 -070071#include <limits.h> /* UINT_MAX */
72#include <stdio.h> /* fprintf */
73#include <stdlib.h> /* getenv, rand_s */
Miss Islington (bot)27067852021-08-29 07:32:50 -070074#include <stdint.h> /* uintptr_t */
75#include <math.h> /* isnan */
Victor Stinner23ec4b52017-06-15 00:54:36 +020076
Victor Stinner5ff71322017-06-21 14:39:22 +020077#ifdef _WIN32
Benjamin Peterson52b94082019-09-25 21:33:58 -070078# define getpid GetCurrentProcessId
Victor Stinner23ec4b52017-06-15 00:54:36 +020079#else
Benjamin Peterson52b94082019-09-25 21:33:58 -070080# include <sys/time.h> /* gettimeofday() */
81# include <sys/types.h> /* getpid() */
82# include <unistd.h> /* getpid() */
83# include <fcntl.h> /* O_RDONLY */
84# include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020085#endif
86
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070087#define XML_BUILDING_EXPAT 1
88
Victor Stinner5ff71322017-06-21 14:39:22 +020089#ifdef _WIN32
Benjamin Peterson52b94082019-09-25 21:33:58 -070090# include "winconfig.h"
Miss Islington (bot)27067852021-08-29 07:32:50 -070091#endif
92
93#include <expat_config.h>
Christian Heimesaa152762013-12-06 23:43:50 +010094
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070095#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000096#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020097#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000098
Victor Stinner93d0cb52017-08-18 23:43:54 +020099#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Benjamin Peterson52b94082019-09-25 21:33:58 -0700100# if defined(HAVE_GETRANDOM)
101# include <sys/random.h> /* getrandom */
102# else
103# include <unistd.h> /* syscall */
104# include <sys/syscall.h> /* SYS_getrandom */
105# endif
106# if ! defined(GRND_NONBLOCK)
107# define GRND_NONBLOCK 0x0001
108# endif /* defined(GRND_NONBLOCK) */
109#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200110
Benjamin Peterson52b94082019-09-25 21:33:58 -0700111#if defined(HAVE_LIBBSD) \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200112 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
Benjamin Peterson52b94082019-09-25 21:33:58 -0700113# include <bsd/stdlib.h>
Victor Stinner93d0cb52017-08-18 23:43:54 +0200114#endif
115
Benjamin Peterson52b94082019-09-25 21:33:58 -0700116#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
117# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
Victor Stinner93d0cb52017-08-18 23:43:54 +0200118#endif
119
Benjamin Peterson52b94082019-09-25 21:33:58 -0700120#if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
121 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
122 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
123 && ! defined(XML_POOR_ENTROPY)
124# error You do not have support for any sources of high quality entropy \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200125 enabled. For end user security, that is probably not what you want. \
126 \
127 Your options include: \
Miss Islington (bot)27067852021-08-29 07:32:50 -0700128 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
129 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200130 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
Miss Islington (bot)27067852021-08-29 07:32:50 -0700131 * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200132 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
133 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
Miss Islington (bot)27067852021-08-29 07:32:50 -0700134 * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
135 * Windows >=Vista (rand_s): _WIN32. \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200136 \
137 If insist on not using any of these, bypass this error by defining \
138 XML_POOR_ENTROPY; you have been warned. \
139 \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200140 If you have reasons to patch this detection code away or need changes \
141 to the build system, please open a bug. Thank you!
142#endif
143
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000144#ifdef XML_UNICODE
Benjamin Peterson52b94082019-09-25 21:33:58 -0700145# define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
146# define XmlConvert XmlUtf16Convert
147# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
148# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
149# define XmlEncode XmlUtf16Encode
Miss Islington (bot)27067852021-08-29 07:32:50 -0700150# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000151typedef unsigned short ICHAR;
152#else
Benjamin Peterson52b94082019-09-25 21:33:58 -0700153# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
154# define XmlConvert XmlUtf8Convert
155# define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
156# define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
157# define XmlEncode XmlUtf8Encode
158# define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000159typedef char ICHAR;
160#endif
161
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000162#ifndef XML_NS
163
Benjamin Peterson52b94082019-09-25 21:33:58 -0700164# define XmlInitEncodingNS XmlInitEncoding
165# define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
166# undef XmlGetInternalEncodingNS
167# define XmlGetInternalEncodingNS XmlGetInternalEncoding
168# define XmlParseXmlDeclNS XmlParseXmlDecl
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000169
170#endif
171
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000172#ifdef XML_UNICODE
173
Benjamin Peterson52b94082019-09-25 21:33:58 -0700174# ifdef XML_UNICODE_WCHAR_T
175# define XML_T(x) (const wchar_t) x
176# define XML_L(x) L##x
177# else
178# define XML_T(x) (const unsigned short)x
179# define XML_L(x) x
180# endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000181
182#else
183
Benjamin Peterson52b94082019-09-25 21:33:58 -0700184# define XML_T(x) x
185# define XML_L(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000186
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000187#endif
188
189/* Round up n to be a multiple of sz, where sz is a power of 2. */
Benjamin Peterson52b94082019-09-25 21:33:58 -0700190#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000191
Benjamin Peterson5033aa72018-09-10 21:04:00 -0700192/* Do safe (NULL-aware) pointer arithmetic */
193#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
194
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000195#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000196#include "xmltok.h"
197#include "xmlrole.h"
198
199typedef const XML_Char *KEY;
200
201typedef struct {
202 KEY name;
203} NAMED;
204
205typedef struct {
206 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000207 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000208 size_t size;
209 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000210 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000211} HASH_TABLE;
212
Benjamin Peterson52b94082019-09-25 21:33:58 -0700213static size_t keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000214
Benjamin Peterson52b94082019-09-25 21:33:58 -0700215static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
Fred Drake08317ae2003-10-21 15:38:55 +0000216
217/* For probing (after a collision) we need a step size relative prime
218 to the hash table size, which is a power of 2. We use double-hashing,
219 since we can calculate a second hash value cheaply by taking those bits
220 of the first hash value that were discarded (masked out) when the table
221 index was calculated: index = hash & mask, where mask = table->size - 1.
222 We limit the maximum step size to table->size / 4 (mask >> 2) and make
223 it odd, since odd numbers are always relative prime to a power of 2.
224*/
Benjamin Peterson52b94082019-09-25 21:33:58 -0700225#define SECOND_HASH(hash, mask, power) \
226 ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
227#define PROBE_STEP(hash, mask, power) \
Fred Drake08317ae2003-10-21 15:38:55 +0000228 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
229
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000230typedef struct {
231 NAMED **p;
232 NAMED **end;
233} HASH_TABLE_ITER;
234
Benjamin Peterson52b94082019-09-25 21:33:58 -0700235#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000236#define INIT_DATA_BUF_SIZE 1024
237#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000238#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000239#define INIT_BLOCK_SIZE 1024
240#define INIT_BUFFER_SIZE 1024
241
242#define EXPAND_SPARE 24
243
244typedef struct binding {
245 struct prefix *prefix;
246 struct binding *nextTagBinding;
247 struct binding *prevPrefixBinding;
248 const struct attribute_id *attId;
249 XML_Char *uri;
250 int uriLen;
251 int uriAlloc;
252} BINDING;
253
254typedef struct prefix {
255 const XML_Char *name;
256 BINDING *binding;
257} PREFIX;
258
259typedef struct {
260 const XML_Char *str;
261 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000262 const XML_Char *prefix;
263 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000264 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000265 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000266} TAG_NAME;
267
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000268/* TAG represents an open element.
269 The name of the element is stored in both the document and API
270 encodings. The memory buffer 'buf' is a separately-allocated
271 memory area which stores the name. During the XML_Parse()/
272 XMLParseBuffer() when the element is open, the memory for the 'raw'
273 version of the name (in the document encoding) is shared with the
274 document buffer. If the element is open across calls to
275 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
276 contain the 'raw' name as well.
277
278 A parser re-uses these structures, maintaining a list of allocated
279 TAG objects in a free list.
280*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000281typedef struct tag {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700282 struct tag *parent; /* parent of this element */
283 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000284 int rawNameLength;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700285 TAG_NAME name; /* tagName in the API encoding */
286 char *buf; /* buffer for name components */
287 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000288 BINDING *bindings;
289} TAG;
290
291typedef struct {
292 const XML_Char *name;
293 const XML_Char *textPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700294 int textLen; /* length in XML_Chars */
295 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000296 const XML_Char *systemId;
297 const XML_Char *base;
298 const XML_Char *publicId;
299 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000300 XML_Bool open;
301 XML_Bool is_param;
302 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000303} ENTITY;
304
305typedef struct {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700306 enum XML_Content_Type type;
307 enum XML_Content_Quant quant;
308 const XML_Char *name;
309 int firstchild;
310 int lastchild;
311 int childcnt;
312 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000313} CONTENT_SCAFFOLD;
314
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000315#define INIT_SCAFFOLD_ELEMENTS 32
316
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000317typedef struct block {
318 struct block *next;
319 int size;
320 XML_Char s[1];
321} BLOCK;
322
323typedef struct {
324 BLOCK *blocks;
325 BLOCK *freeBlocks;
326 const XML_Char *end;
327 XML_Char *ptr;
328 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000329 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000330} STRING_POOL;
331
332/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000333 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000334typedef struct attribute_id {
335 XML_Char *name;
336 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000337 XML_Bool maybeTokenized;
338 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000339} ATTRIBUTE_ID;
340
341typedef struct {
342 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000343 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000344 const XML_Char *value;
345} DEFAULT_ATTRIBUTE;
346
347typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000348 unsigned long version;
349 unsigned long hash;
350 const XML_Char *uriName;
351} NS_ATT;
352
353typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000354 const XML_Char *name;
355 PREFIX *prefix;
356 const ATTRIBUTE_ID *idAtt;
357 int nDefaultAtts;
358 int allocDefaultAtts;
359 DEFAULT_ATTRIBUTE *defaultAtts;
360} ELEMENT_TYPE;
361
362typedef struct {
363 HASH_TABLE generalEntities;
364 HASH_TABLE elementTypes;
365 HASH_TABLE attributeIds;
366 HASH_TABLE prefixes;
367 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000368 STRING_POOL entityValuePool;
369 /* false once a parameter entity reference has been skipped */
370 XML_Bool keepProcessing;
371 /* true once an internal or external PE reference has been encountered;
372 this includes the reference to an external subset */
373 XML_Bool hasParamEntityRefs;
374 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000375#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000376 /* indicates if external PE has been read */
377 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000378 HASH_TABLE paramEntities;
379#endif /* XML_DTD */
380 PREFIX defaultPrefix;
381 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000382 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000383 CONTENT_SCAFFOLD *scaffold;
384 unsigned contentStringLen;
385 unsigned scaffSize;
386 unsigned scaffCount;
387 int scaffLevel;
388 int *scaffIndex;
389} DTD;
390
391typedef struct open_internal_entity {
392 const char *internalEventPtr;
393 const char *internalEventEndPtr;
394 struct open_internal_entity *next;
395 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000396 int startTagLevel;
397 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000398} OPEN_INTERNAL_ENTITY;
399
Miss Islington (bot)27067852021-08-29 07:32:50 -0700400enum XML_Account {
401 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
402 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
403 expansion */
404 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
405};
406
407#ifdef XML_DTD
408typedef unsigned long long XmlBigCount;
409typedef struct accounting {
410 XmlBigCount countBytesDirect;
411 XmlBigCount countBytesIndirect;
412 int debugLevel;
413 float maximumAmplificationFactor; // >=1.0
414 unsigned long long activationThresholdBytes;
415} ACCOUNTING;
416
417typedef struct entity_stats {
418 unsigned int countEverOpened;
419 unsigned int currentDepth;
420 unsigned int maximumDepthSeen;
421 int debugLevel;
422} ENTITY_STATS;
423#endif /* XML_DTD */
424
Benjamin Peterson52b94082019-09-25 21:33:58 -0700425typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
426 const char *end, const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000427
428static Processor prologProcessor;
429static Processor prologInitProcessor;
430static Processor contentProcessor;
431static Processor cdataSectionProcessor;
432#ifdef XML_DTD
433static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000434static Processor externalParEntProcessor;
435static Processor externalParEntInitProcessor;
436static Processor entityValueProcessor;
437static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000438#endif /* XML_DTD */
439static Processor epilogProcessor;
440static Processor errorProcessor;
441static Processor externalEntityInitProcessor;
442static Processor externalEntityInitProcessor2;
443static Processor externalEntityInitProcessor3;
444static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000445static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000446
Benjamin Peterson52b94082019-09-25 21:33:58 -0700447static enum XML_Error handleUnknownEncoding(XML_Parser parser,
448 const XML_Char *encodingName);
449static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
450 const char *s, const char *next);
451static enum XML_Error initializeEncoding(XML_Parser parser);
452static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
453 const char *s, const char *end, int tok,
454 const char *next, const char **nextPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700455 XML_Bool haveMore, XML_Bool allowClosingDoctype,
456 enum XML_Account account);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700457static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
458 XML_Bool betweenDecl);
459static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
460 const ENCODING *enc, const char *start,
461 const char *end, const char **endPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700462 XML_Bool haveMore, enum XML_Account account);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700463static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
464 const char **startPtr, const char *end,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700465 const char **nextPtr, XML_Bool haveMore,
466 enum XML_Account account);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000467#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -0700468static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
469 const char **startPtr, const char *end,
470 const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000471#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000472
Benjamin Peterson52b94082019-09-25 21:33:58 -0700473static void freeBindings(XML_Parser parser, BINDING *bindings);
474static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
475 const char *s, TAG_NAME *tagNamePtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700476 BINDING **bindingsPtr,
477 enum XML_Account account);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700478static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
479 const ATTRIBUTE_ID *attId, const XML_Char *uri,
480 BINDING **bindingsPtr);
481static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
482 XML_Bool isId, const XML_Char *dfltValue,
483 XML_Parser parser);
484static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
485 XML_Bool isCdata, const char *,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700486 const char *, STRING_POOL *,
487 enum XML_Account account);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700488static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
489 XML_Bool isCdata, const char *,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700490 const char *, STRING_POOL *,
491 enum XML_Account account);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700492static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
493 const char *start, const char *end);
494static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
495static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
Miss Islington (bot)27067852021-08-29 07:32:50 -0700496 const char *start, const char *end,
497 enum XML_Account account);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700498static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
499 const char *start, const char *end);
500static int reportComment(XML_Parser parser, const ENCODING *enc,
501 const char *start, const char *end);
502static void reportDefault(XML_Parser parser, const ENCODING *enc,
503 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000504
Benjamin Peterson52b94082019-09-25 21:33:58 -0700505static const XML_Char *getContext(XML_Parser parser);
506static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000507
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000508static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000509
Benjamin Peterson52b94082019-09-25 21:33:58 -0700510static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
Benjamin Peterson4e211002018-06-26 19:25:45 -0700511/* do not call if m_parentParser != NULL */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000512static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700513static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
514 const XML_Memory_Handling_Suite *ms);
515static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
516 const XML_Memory_Handling_Suite *ms);
517static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
518 const HASH_TABLE *);
519static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
520 size_t createSize);
521static void FASTCALL hashTableInit(HASH_TABLE *,
522 const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000523static void FASTCALL hashTableClear(HASH_TABLE *);
524static void FASTCALL hashTableDestroy(HASH_TABLE *);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700525static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
526static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000527
Benjamin Peterson52b94082019-09-25 21:33:58 -0700528static void FASTCALL poolInit(STRING_POOL *,
529 const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000530static void FASTCALL poolClear(STRING_POOL *);
531static void FASTCALL poolDestroy(STRING_POOL *);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700532static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
533 const char *ptr, const char *end);
534static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
535 const char *ptr, const char *end);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000536static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700537static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
538 const XML_Char *s);
539static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
540 int n);
541static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
542 const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000543
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000544static int FASTCALL nextScaffoldPart(XML_Parser parser);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700545static XML_Content *build_model(XML_Parser parser);
546static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
547 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000548
Victor Stinner93d0cb52017-08-18 23:43:54 +0200549static XML_Char *copyString(const XML_Char *s,
550 const XML_Memory_Handling_Suite *memsuite);
551
Victor Stinner23ec4b52017-06-15 00:54:36 +0200552static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700553static XML_Bool startParsing(XML_Parser parser);
554
Benjamin Peterson52b94082019-09-25 21:33:58 -0700555static XML_Parser parserCreate(const XML_Char *encodingName,
556 const XML_Memory_Handling_Suite *memsuite,
557 const XML_Char *nameSep, DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700558
Benjamin Peterson52b94082019-09-25 21:33:58 -0700559static void parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000560
Miss Islington (bot)27067852021-08-29 07:32:50 -0700561#ifdef XML_DTD
562static float accountingGetCurrentAmplification(XML_Parser rootParser);
563static void accountingReportStats(XML_Parser originParser, const char *epilog);
564static void accountingOnAbort(XML_Parser originParser);
565static void accountingReportDiff(XML_Parser rootParser,
566 unsigned int levelsAwayFromRootParser,
567 const char *before, const char *after,
568 ptrdiff_t bytesMore, int source_line,
569 enum XML_Account account);
570static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
571 const char *before, const char *after,
572 int source_line,
573 enum XML_Account account);
574
575static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
576 const char *action, int sourceLine);
577static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
578 int sourceLine);
579static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
580 int sourceLine);
581
582static XML_Parser getRootParserOf(XML_Parser parser,
583 unsigned int *outLevelDiff);
584#endif /* XML_DTD */
585
586static unsigned long getDebugLevel(const char *variableName,
587 unsigned long defaultDebugLevel);
588
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000589#define poolStart(pool) ((pool)->start)
590#define poolEnd(pool) ((pool)->ptr)
591#define poolLength(pool) ((pool)->ptr - (pool)->start)
592#define poolChop(pool) ((void)--(pool->ptr))
593#define poolLastChar(pool) (((pool)->ptr)[-1])
594#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
595#define poolFinish(pool) ((pool)->start = (pool)->ptr)
Benjamin Peterson52b94082019-09-25 21:33:58 -0700596#define poolAppendChar(pool, c) \
597 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
598 ? 0 \
599 : ((*((pool)->ptr)++ = c), 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000600
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000601struct XML_ParserStruct {
Benjamin Peterson4e211002018-06-26 19:25:45 -0700602 /* The first member must be m_userData so that the XML_GetUserData
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000603 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000604 void *m_userData;
605 void *m_handlerArg;
606 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000607 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000608 /* first character to be parsed */
609 const char *m_bufferPtr;
610 /* past last character to be parsed */
611 char *m_bufferEnd;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700612 /* allocated end of m_buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000613 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000614 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000615 const char *m_parseEndPtr;
616 XML_Char *m_dataBuf;
617 XML_Char *m_dataBufEnd;
618 XML_StartElementHandler m_startElementHandler;
619 XML_EndElementHandler m_endElementHandler;
620 XML_CharacterDataHandler m_characterDataHandler;
621 XML_ProcessingInstructionHandler m_processingInstructionHandler;
622 XML_CommentHandler m_commentHandler;
623 XML_StartCdataSectionHandler m_startCdataSectionHandler;
624 XML_EndCdataSectionHandler m_endCdataSectionHandler;
625 XML_DefaultHandler m_defaultHandler;
626 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
627 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
628 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
629 XML_NotationDeclHandler m_notationDeclHandler;
630 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
631 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
632 XML_NotStandaloneHandler m_notStandaloneHandler;
633 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000634 XML_Parser m_externalEntityRefHandlerArg;
635 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000636 XML_UnknownEncodingHandler m_unknownEncodingHandler;
637 XML_ElementDeclHandler m_elementDeclHandler;
638 XML_AttlistDeclHandler m_attlistDeclHandler;
639 XML_EntityDeclHandler m_entityDeclHandler;
640 XML_XmlDeclHandler m_xmlDeclHandler;
641 const ENCODING *m_encoding;
642 INIT_ENCODING m_initEncoding;
643 const ENCODING *m_internalEncoding;
644 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000645 XML_Bool m_ns;
646 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000647 void *m_unknownEncodingMem;
648 void *m_unknownEncodingData;
649 void *m_unknownEncodingHandlerData;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700650 void(XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000651 PROLOG_STATE m_prologState;
652 Processor *m_processor;
653 enum XML_Error m_errorCode;
654 const char *m_eventPtr;
655 const char *m_eventEndPtr;
656 const char *m_positionPtr;
657 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000658 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000659 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000660 int m_tagLevel;
661 ENTITY *m_declEntity;
662 const XML_Char *m_doctypeName;
663 const XML_Char *m_doctypeSysid;
664 const XML_Char *m_doctypePubid;
665 const XML_Char *m_declAttributeType;
666 const XML_Char *m_declNotationName;
667 const XML_Char *m_declNotationPublicId;
668 ELEMENT_TYPE *m_declElementType;
669 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000670 XML_Bool m_declAttributeIsCdata;
671 XML_Bool m_declAttributeIsId;
672 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000673 const XML_Char *m_curBase;
674 TAG *m_tagStack;
675 TAG *m_freeTagList;
676 BINDING *m_inheritedBindings;
677 BINDING *m_freeBindingList;
678 int m_attsSize;
679 int m_nSpecifiedAtts;
680 int m_idAttIndex;
681 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000682 NS_ATT *m_nsAtts;
683 unsigned long m_nsAttsVersion;
684 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700685#ifdef XML_ATTR_INFO
686 XML_AttrInfo *m_attInfo;
687#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000688 POSITION m_position;
689 STRING_POOL m_tempPool;
690 STRING_POOL m_temp2Pool;
691 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000692 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000693 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000694 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000695 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000696#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000697 XML_Bool m_isParamEntity;
698 XML_Bool m_useForeignDTD;
699 enum XML_ParamEntityParsing m_paramEntityParsing;
700#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700701 unsigned long m_hash_secret_salt;
Miss Islington (bot)27067852021-08-29 07:32:50 -0700702#ifdef XML_DTD
703 ACCOUNTING m_accounting;
704 ENTITY_STATS m_entity_stats;
705#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000706};
707
Benjamin Peterson52b94082019-09-25 21:33:58 -0700708#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
709#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
710#define FREE(parser, p) (parser->m_mem.free_fcn((p)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000711
Fred Drake08317ae2003-10-21 15:38:55 +0000712XML_Parser XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -0700713XML_ParserCreate(const XML_Char *encodingName) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000714 return XML_ParserCreate_MM(encodingName, NULL, NULL);
715}
716
Fred Drake08317ae2003-10-21 15:38:55 +0000717XML_Parser XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -0700718XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000719 XML_Char tmp[2];
720 *tmp = nsSep;
721 return XML_ParserCreate_MM(encodingName, NULL, tmp);
722}
723
Benjamin Peterson52b94082019-09-25 21:33:58 -0700724static const XML_Char implicitContext[]
725 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
726 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
727 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
728 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
729 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
730 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
731 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
732 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
733 '\0'};
Victor Stinner5ff71322017-06-21 14:39:22 +0200734
Benjamin Peterson4e211002018-06-26 19:25:45 -0700735/* To avoid warnings about unused functions: */
736#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
737
Benjamin Peterson52b94082019-09-25 21:33:58 -0700738# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200739
740/* Obtain entropy on Linux 3.17+ */
741static int
Benjamin Peterson52b94082019-09-25 21:33:58 -0700742writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
743 int success = 0; /* full count bytes written? */
Victor Stinner5ff71322017-06-21 14:39:22 +0200744 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200745 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200746
747 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700748 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
Victor Stinner5ff71322017-06-21 14:39:22 +0200749 const size_t bytesToWrite = count - bytesWrittenTotal;
750
751 const int bytesWrittenMore =
Benjamin Peterson52b94082019-09-25 21:33:58 -0700752# if defined(HAVE_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200753 getrandom(currentTarget, bytesToWrite, getrandomFlags);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700754# else
Victor Stinner5ff71322017-06-21 14:39:22 +0200755 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700756# endif
Victor Stinner5ff71322017-06-21 14:39:22 +0200757
758 if (bytesWrittenMore > 0) {
759 bytesWrittenTotal += bytesWrittenMore;
760 if (bytesWrittenTotal >= count)
761 success = 1;
762 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200763 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200764
765 return success;
766}
767
Benjamin Peterson52b94082019-09-25 21:33:58 -0700768# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200769
Benjamin Peterson52b94082019-09-25 21:33:58 -0700770# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200771
772/* Extract entropy from /dev/urandom */
773static int
Benjamin Peterson52b94082019-09-25 21:33:58 -0700774writeRandomBytes_dev_urandom(void *target, size_t count) {
775 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200776 size_t bytesWrittenTotal = 0;
777
778 const int fd = open("/dev/urandom", O_RDONLY);
779 if (fd < 0) {
780 return 0;
781 }
782
783 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -0700784 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200785 const size_t bytesToWrite = count - bytesWrittenTotal;
786
787 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
788
789 if (bytesWrittenMore > 0) {
790 bytesWrittenTotal += bytesWrittenMore;
791 if (bytesWrittenTotal >= count)
792 success = 1;
793 }
794 } while (! success && (errno == EINTR));
795
796 close(fd);
797 return success;
798}
799
Benjamin Peterson52b94082019-09-25 21:33:58 -0700800# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200801
Benjamin Peterson52b94082019-09-25 21:33:58 -0700802#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200803
Benjamin Peterson3b03b092019-06-27 20:54:44 -0700804#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200805
806static void
Benjamin Peterson52b94082019-09-25 21:33:58 -0700807writeRandomBytes_arc4random(void *target, size_t count) {
Victor Stinner93d0cb52017-08-18 23:43:54 +0200808 size_t bytesWrittenTotal = 0;
809
810 while (bytesWrittenTotal < count) {
811 const uint32_t random32 = arc4random();
812 size_t i = 0;
813
814 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
Benjamin Peterson52b94082019-09-25 21:33:58 -0700815 i++, bytesWrittenTotal++) {
Victor Stinner93d0cb52017-08-18 23:43:54 +0200816 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
817 ((uint8_t *)target)[bytesWrittenTotal] = random8;
818 }
819 }
820}
821
Benjamin Peterson52b94082019-09-25 21:33:58 -0700822#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200823
Victor Stinner5ff71322017-06-21 14:39:22 +0200824#ifdef _WIN32
825
Miss Islington (bot)27067852021-08-29 07:32:50 -0700826/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
827 as it didn't declare it in its header prior to version 5.3.0 of its
828 runtime package (mingwrt, containing stdlib.h). The upstream fix
829 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
830# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
831 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
832__declspec(dllimport) int rand_s(unsigned int *);
833# endif
834
Benjamin Peterson52b94082019-09-25 21:33:58 -0700835/* Obtain entropy on Windows using the rand_s() function which
836 * generates cryptographically secure random numbers. Internally it
837 * uses RtlGenRandom API which is present in Windows XP and later.
Victor Stinner5ff71322017-06-21 14:39:22 +0200838 */
839static int
Benjamin Peterson52b94082019-09-25 21:33:58 -0700840writeRandomBytes_rand_s(void *target, size_t count) {
841 size_t bytesWrittenTotal = 0;
Victor Stinner5ff71322017-06-21 14:39:22 +0200842
Benjamin Peterson52b94082019-09-25 21:33:58 -0700843 while (bytesWrittenTotal < count) {
844 unsigned int random32 = 0;
845 size_t i = 0;
846
847 if (rand_s(&random32))
848 return 0; /* failure */
849
850 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
851 i++, bytesWrittenTotal++) {
852 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
853 ((uint8_t *)target)[bytesWrittenTotal] = random8;
Victor Stinner5ff71322017-06-21 14:39:22 +0200854 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200855 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700856 return 1; /* success */
Victor Stinner5ff71322017-06-21 14:39:22 +0200857}
858
859#endif /* _WIN32 */
860
Victor Stinner93d0cb52017-08-18 23:43:54 +0200861#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
862
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700863static unsigned long
Benjamin Peterson52b94082019-09-25 21:33:58 -0700864gather_time_entropy(void) {
865# ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200866 FILETIME ft;
867 GetSystemTimeAsFileTime(&ft); /* never fails */
868 return ft.dwHighDateTime ^ ft.dwLowDateTime;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700869# else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200870 struct timeval tv;
871 int gettimeofday_res;
872
873 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200874
Benjamin Peterson52b94082019-09-25 21:33:58 -0700875# if defined(NDEBUG)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200876 (void)gettimeofday_res;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700877# else
878 assert(gettimeofday_res == 0);
879# endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200880
881 /* Microseconds time is <20 bits entropy */
882 return tv.tv_usec;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700883# endif
Victor Stinner23ec4b52017-06-15 00:54:36 +0200884}
885
Benjamin Peterson52b94082019-09-25 21:33:58 -0700886#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200887
888static unsigned long
Benjamin Peterson52b94082019-09-25 21:33:58 -0700889ENTROPY_DEBUG(const char *label, unsigned long entropy) {
Miss Islington (bot)27067852021-08-29 07:32:50 -0700890 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
891 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
Benjamin Peterson52b94082019-09-25 21:33:58 -0700892 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
Victor Stinner5ff71322017-06-21 14:39:22 +0200893 }
894 return entropy;
895}
896
Victor Stinner23ec4b52017-06-15 00:54:36 +0200897static unsigned long
Benjamin Peterson52b94082019-09-25 21:33:58 -0700898generate_hash_secret_salt(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200899 unsigned long entropy;
900 (void)parser;
Benjamin Peterson4e211002018-06-26 19:25:45 -0700901
902 /* "Failproof" high quality providers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200903#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200904 arc4random_buf(&entropy, sizeof(entropy));
905 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200906#elif defined(HAVE_ARC4RANDOM)
907 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
908 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200909#else
910 /* Try high quality providers first .. */
Benjamin Peterson52b94082019-09-25 21:33:58 -0700911# ifdef _WIN32
912 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
913 return ENTROPY_DEBUG("rand_s", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200914 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700915# elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200916 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200917 return ENTROPY_DEBUG("getrandom", entropy);
918 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700919# endif
920# if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200921 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
922 return ENTROPY_DEBUG("/dev/urandom", entropy);
923 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700924# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200925 /* .. and self-made low quality for backup: */
926
927 /* Process ID is 0 bits entropy if attacker has local access */
928 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200929
930 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
931 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200932 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200933 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200934 return ENTROPY_DEBUG("fallback(8)",
Benjamin Peterson52b94082019-09-25 21:33:58 -0700935 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200936 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200937#endif
938}
939
940static unsigned long
941get_hash_secret_salt(XML_Parser parser) {
942 if (parser->m_parentParser != NULL)
943 return get_hash_secret_salt(parser->m_parentParser);
944 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700945}
946
Benjamin Peterson52b94082019-09-25 21:33:58 -0700947static XML_Bool /* only valid for root parser */
948startParsing(XML_Parser parser) {
949 /* hash functions must be initialized before setContext() is called */
950 if (parser->m_hash_secret_salt == 0)
951 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
952 if (parser->m_ns) {
953 /* implicit context only set for root parser, since child
954 parsers (i.e. external entity parsers) will inherit it
955 */
956 return setContext(parser, implicitContext);
957 }
958 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700959}
960
961XML_Parser XMLCALL
962XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700963 const XML_Memory_Handling_Suite *memsuite,
Benjamin Peterson52b94082019-09-25 21:33:58 -0700964 const XML_Char *nameSep) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700965 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000966}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000967
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000968static XML_Parser
969parserCreate(const XML_Char *encodingName,
Benjamin Peterson52b94082019-09-25 21:33:58 -0700970 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
971 DTD *dtd) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000972 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000973
974 if (memsuite) {
975 XML_Memory_Handling_Suite *mtemp;
Benjamin Peterson52b94082019-09-25 21:33:58 -0700976 parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000977 if (parser != NULL) {
978 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
979 mtemp->malloc_fcn = memsuite->malloc_fcn;
980 mtemp->realloc_fcn = memsuite->realloc_fcn;
981 mtemp->free_fcn = memsuite->free_fcn;
982 }
Benjamin Peterson52b94082019-09-25 21:33:58 -0700983 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000984 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000985 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
986 if (parser != NULL) {
987 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
988 mtemp->malloc_fcn = malloc;
989 mtemp->realloc_fcn = realloc;
990 mtemp->free_fcn = free;
991 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000992 }
993
Benjamin Peterson52b94082019-09-25 21:33:58 -0700994 if (! parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000995 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000996
Benjamin Peterson4e211002018-06-26 19:25:45 -0700997 parser->m_buffer = NULL;
998 parser->m_bufferLim = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000999
Benjamin Peterson4e211002018-06-26 19:25:45 -07001000 parser->m_attsSize = INIT_ATTS_SIZE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001001 parser->m_atts
1002 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
Benjamin Peterson4e211002018-06-26 19:25:45 -07001003 if (parser->m_atts == NULL) {
1004 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001005 return NULL;
1006 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001007#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07001008 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1009 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
Benjamin Peterson4e211002018-06-26 19:25:45 -07001010 if (parser->m_attInfo == NULL) {
1011 FREE(parser, parser->m_atts);
1012 FREE(parser, parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001013 return NULL;
1014 }
1015#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -07001016 parser->m_dataBuf
1017 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07001018 if (parser->m_dataBuf == NULL) {
1019 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001020#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001021 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001022#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001023 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001024 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001025 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001026 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001027
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001028 if (dtd)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001029 parser->m_dtd = dtd;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001030 else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001031 parser->m_dtd = dtdCreate(&parser->m_mem);
1032 if (parser->m_dtd == NULL) {
1033 FREE(parser, parser->m_dataBuf);
1034 FREE(parser, parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001035#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001036 FREE(parser, parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001037#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001038 FREE(parser, parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001039 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001040 }
1041 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001042
Benjamin Peterson4e211002018-06-26 19:25:45 -07001043 parser->m_freeBindingList = NULL;
1044 parser->m_freeTagList = NULL;
1045 parser->m_freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001046
Benjamin Peterson4e211002018-06-26 19:25:45 -07001047 parser->m_groupSize = 0;
1048 parser->m_groupConnector = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001049
Benjamin Peterson4e211002018-06-26 19:25:45 -07001050 parser->m_unknownEncodingHandler = NULL;
1051 parser->m_unknownEncodingHandlerData = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001052
Benjamin Peterson4e211002018-06-26 19:25:45 -07001053 parser->m_namespaceSeparator = ASCII_EXCL;
1054 parser->m_ns = XML_FALSE;
1055 parser->m_ns_triplets = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001056
Benjamin Peterson4e211002018-06-26 19:25:45 -07001057 parser->m_nsAtts = NULL;
1058 parser->m_nsAttsVersion = 0;
1059 parser->m_nsAttsPower = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00001060
Benjamin Peterson4e211002018-06-26 19:25:45 -07001061 parser->m_protocolEncodingName = NULL;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001062
Benjamin Peterson4e211002018-06-26 19:25:45 -07001063 poolInit(&parser->m_tempPool, &(parser->m_mem));
1064 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001065 parserInit(parser, encodingName);
1066
Benjamin Peterson52b94082019-09-25 21:33:58 -07001067 if (encodingName && ! parser->m_protocolEncodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001068 XML_ParserFree(parser);
1069 return NULL;
1070 }
1071
1072 if (nameSep) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001073 parser->m_ns = XML_TRUE;
1074 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1075 parser->m_namespaceSeparator = *nameSep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001076 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001077 parser->m_internalEncoding = XmlGetInternalEncoding();
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001078 }
1079
1080 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001081}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001082
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001083static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07001084parserInit(XML_Parser parser, const XML_Char *encodingName) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001085 parser->m_processor = prologInitProcessor;
1086 XmlPrologStateInit(&parser->m_prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001087 if (encodingName != NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001088 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Victor Stinner93d0cb52017-08-18 23:43:54 +02001089 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001090 parser->m_curBase = NULL;
1091 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1092 parser->m_userData = NULL;
1093 parser->m_handlerArg = NULL;
1094 parser->m_startElementHandler = NULL;
1095 parser->m_endElementHandler = NULL;
1096 parser->m_characterDataHandler = NULL;
1097 parser->m_processingInstructionHandler = NULL;
1098 parser->m_commentHandler = NULL;
1099 parser->m_startCdataSectionHandler = NULL;
1100 parser->m_endCdataSectionHandler = NULL;
1101 parser->m_defaultHandler = NULL;
1102 parser->m_startDoctypeDeclHandler = NULL;
1103 parser->m_endDoctypeDeclHandler = NULL;
1104 parser->m_unparsedEntityDeclHandler = NULL;
1105 parser->m_notationDeclHandler = NULL;
1106 parser->m_startNamespaceDeclHandler = NULL;
1107 parser->m_endNamespaceDeclHandler = NULL;
1108 parser->m_notStandaloneHandler = NULL;
1109 parser->m_externalEntityRefHandler = NULL;
1110 parser->m_externalEntityRefHandlerArg = parser;
1111 parser->m_skippedEntityHandler = NULL;
1112 parser->m_elementDeclHandler = NULL;
1113 parser->m_attlistDeclHandler = NULL;
1114 parser->m_entityDeclHandler = NULL;
1115 parser->m_xmlDeclHandler = NULL;
1116 parser->m_bufferPtr = parser->m_buffer;
1117 parser->m_bufferEnd = parser->m_buffer;
1118 parser->m_parseEndByteIndex = 0;
1119 parser->m_parseEndPtr = NULL;
1120 parser->m_declElementType = NULL;
1121 parser->m_declAttributeId = NULL;
1122 parser->m_declEntity = NULL;
1123 parser->m_doctypeName = NULL;
1124 parser->m_doctypeSysid = NULL;
1125 parser->m_doctypePubid = NULL;
1126 parser->m_declAttributeType = NULL;
1127 parser->m_declNotationName = NULL;
1128 parser->m_declNotationPublicId = NULL;
1129 parser->m_declAttributeIsCdata = XML_FALSE;
1130 parser->m_declAttributeIsId = XML_FALSE;
1131 memset(&parser->m_position, 0, sizeof(POSITION));
1132 parser->m_errorCode = XML_ERROR_NONE;
1133 parser->m_eventPtr = NULL;
1134 parser->m_eventEndPtr = NULL;
1135 parser->m_positionPtr = NULL;
1136 parser->m_openInternalEntities = NULL;
1137 parser->m_defaultExpandInternalEntities = XML_TRUE;
1138 parser->m_tagLevel = 0;
1139 parser->m_tagStack = NULL;
1140 parser->m_inheritedBindings = NULL;
1141 parser->m_nSpecifiedAtts = 0;
1142 parser->m_unknownEncodingMem = NULL;
1143 parser->m_unknownEncodingRelease = NULL;
1144 parser->m_unknownEncodingData = NULL;
1145 parser->m_parentParser = NULL;
1146 parser->m_parsingStatus.parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001147#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001148 parser->m_isParamEntity = XML_FALSE;
1149 parser->m_useForeignDTD = XML_FALSE;
1150 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001151#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001152 parser->m_hash_secret_salt = 0;
Miss Islington (bot)27067852021-08-29 07:32:50 -07001153
1154#ifdef XML_DTD
1155 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1156 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1157 parser->m_accounting.maximumAmplificationFactor
1158 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1159 parser->m_accounting.activationThresholdBytes
1160 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1161
1162 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1163 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1164#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001165}
1166
Benjamin Peterson4e211002018-06-26 19:25:45 -07001167/* moves list of bindings to m_freeBindingList */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001168static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001169moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001170 while (bindings) {
1171 BINDING *b = bindings;
1172 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001173 b->nextTagBinding = parser->m_freeBindingList;
1174 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001175 }
1176}
1177
Fred Drake08317ae2003-10-21 15:38:55 +00001178XML_Bool XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001179XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001180 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001181 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001182
1183 if (parser == NULL)
Benjamin Peterson52b94082019-09-25 21:33:58 -07001184 return XML_FALSE;
Victor Stinner5ff71322017-06-21 14:39:22 +02001185
Benjamin Peterson4e211002018-06-26 19:25:45 -07001186 if (parser->m_parentParser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001187 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001188 /* move m_tagStack to m_freeTagList */
1189 tStk = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001190 while (tStk) {
1191 TAG *tag = tStk;
1192 tStk = tStk->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001193 tag->parent = parser->m_freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001194 moveToFreeBindingList(parser, tag->bindings);
1195 tag->bindings = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001196 parser->m_freeTagList = tag;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001197 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001198 /* move m_openInternalEntities to m_freeInternalEntities */
1199 openEntityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001200 while (openEntityList) {
1201 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1202 openEntityList = openEntity->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001203 openEntity->next = parser->m_freeInternalEntities;
1204 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00001205 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001206 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1207 FREE(parser, parser->m_unknownEncodingMem);
1208 if (parser->m_unknownEncodingRelease)
1209 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1210 poolClear(&parser->m_tempPool);
1211 poolClear(&parser->m_temp2Pool);
1212 FREE(parser, (void *)parser->m_protocolEncodingName);
1213 parser->m_protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001214 parserInit(parser, encodingName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001215 dtdReset(parser->m_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001216 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001217}
1218
Fred Drake08317ae2003-10-21 15:38:55 +00001219enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001220XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001221 if (parser == NULL)
Benjamin Peterson52b94082019-09-25 21:33:58 -07001222 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001223 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1224 XXX There's no way for the caller to determine which of the
1225 XXX possible error cases caused the XML_STATUS_ERROR return.
1226 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001227 if (parser->m_parsingStatus.parsing == XML_PARSING
1228 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001229 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001230
1231 /* Get rid of any previous encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001232 FREE(parser, (void *)parser->m_protocolEncodingName);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001233
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001234 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001235 /* No new encoding name */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001236 parser->m_protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001237 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001238 /* Copy the new encoding name into allocated memory */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001239 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Benjamin Peterson52b94082019-09-25 21:33:58 -07001240 if (! parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001241 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001242 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001243 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001244}
1245
Fred Drake08317ae2003-10-21 15:38:55 +00001246XML_Parser XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001247XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1248 const XML_Char *encodingName) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001249 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001250 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001251 DTD *oldDtd;
1252 XML_StartElementHandler oldStartElementHandler;
1253 XML_EndElementHandler oldEndElementHandler;
1254 XML_CharacterDataHandler oldCharacterDataHandler;
1255 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1256 XML_CommentHandler oldCommentHandler;
1257 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1258 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1259 XML_DefaultHandler oldDefaultHandler;
1260 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1261 XML_NotationDeclHandler oldNotationDeclHandler;
1262 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1263 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1264 XML_NotStandaloneHandler oldNotStandaloneHandler;
1265 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1266 XML_SkippedEntityHandler oldSkippedEntityHandler;
1267 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1268 XML_ElementDeclHandler oldElementDeclHandler;
1269 XML_AttlistDeclHandler oldAttlistDeclHandler;
1270 XML_EntityDeclHandler oldEntityDeclHandler;
1271 XML_XmlDeclHandler oldXmlDeclHandler;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001272 ELEMENT_TYPE *oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001273
Victor Stinner5ff71322017-06-21 14:39:22 +02001274 void *oldUserData;
1275 void *oldHandlerArg;
1276 XML_Bool oldDefaultExpandInternalEntities;
1277 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001278#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001279 enum XML_ParamEntityParsing oldParamEntityParsing;
1280 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001281#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001282 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001283 /* Note that the new parser shares the same hash secret as the old
1284 parser, so that dtdCopy and copyEntityTable can lookup values
1285 from hash tables associated with either parser without us having
1286 to worry which hash secrets each table has.
1287 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001288 unsigned long oldhash_secret_salt;
1289
1290 /* Validate the oldParser parameter before we pull everything out of it */
1291 if (oldParser == NULL)
1292 return NULL;
1293
1294 /* Stash the original parser contents on the stack */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001295 oldDtd = parser->m_dtd;
1296 oldStartElementHandler = parser->m_startElementHandler;
1297 oldEndElementHandler = parser->m_endElementHandler;
1298 oldCharacterDataHandler = parser->m_characterDataHandler;
1299 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1300 oldCommentHandler = parser->m_commentHandler;
1301 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1302 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1303 oldDefaultHandler = parser->m_defaultHandler;
1304 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1305 oldNotationDeclHandler = parser->m_notationDeclHandler;
1306 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1307 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1308 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1309 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1310 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1311 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1312 oldElementDeclHandler = parser->m_elementDeclHandler;
1313 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1314 oldEntityDeclHandler = parser->m_entityDeclHandler;
1315 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1316 oldDeclElementType = parser->m_declElementType;
Victor Stinner5ff71322017-06-21 14:39:22 +02001317
Benjamin Peterson4e211002018-06-26 19:25:45 -07001318 oldUserData = parser->m_userData;
1319 oldHandlerArg = parser->m_handlerArg;
1320 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1321 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
Victor Stinner5ff71322017-06-21 14:39:22 +02001322#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001323 oldParamEntityParsing = parser->m_paramEntityParsing;
1324 oldInEntityValue = parser->m_prologState.inEntityValue;
Victor Stinner5ff71322017-06-21 14:39:22 +02001325#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001326 oldns_triplets = parser->m_ns_triplets;
Victor Stinner5ff71322017-06-21 14:39:22 +02001327 /* Note that the new parser shares the same hash secret as the old
1328 parser, so that dtdCopy and copyEntityTable can lookup values
1329 from hash tables associated with either parser without us having
1330 to worry which hash secrets each table has.
1331 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001332 oldhash_secret_salt = parser->m_hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001333
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001334#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07001335 if (! context)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001336 newDtd = oldDtd;
1337#endif /* XML_DTD */
1338
1339 /* Note that the magical uses of the pre-processor to make field
1340 access look more like C++ require that `parser' be overwritten
1341 here. This makes this function more painful to follow than it
1342 would be otherwise.
1343 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001344 if (parser->m_ns) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001345 XML_Char tmp[2];
Benjamin Peterson4e211002018-06-26 19:25:45 -07001346 *tmp = parser->m_namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001347 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Benjamin Peterson52b94082019-09-25 21:33:58 -07001348 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001349 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001350 }
1351
Benjamin Peterson52b94082019-09-25 21:33:58 -07001352 if (! parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001353 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001354
Benjamin Peterson4e211002018-06-26 19:25:45 -07001355 parser->m_startElementHandler = oldStartElementHandler;
1356 parser->m_endElementHandler = oldEndElementHandler;
1357 parser->m_characterDataHandler = oldCharacterDataHandler;
1358 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1359 parser->m_commentHandler = oldCommentHandler;
1360 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1361 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1362 parser->m_defaultHandler = oldDefaultHandler;
1363 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1364 parser->m_notationDeclHandler = oldNotationDeclHandler;
1365 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1366 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1367 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1368 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1369 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1370 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1371 parser->m_elementDeclHandler = oldElementDeclHandler;
1372 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1373 parser->m_entityDeclHandler = oldEntityDeclHandler;
1374 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1375 parser->m_declElementType = oldDeclElementType;
1376 parser->m_userData = oldUserData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001377 if (oldUserData == oldHandlerArg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001378 parser->m_handlerArg = parser->m_userData;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001379 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001380 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001381 if (oldExternalEntityRefHandlerArg != oldParser)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001382 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1383 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1384 parser->m_ns_triplets = oldns_triplets;
1385 parser->m_hash_secret_salt = oldhash_secret_salt;
1386 parser->m_parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001387#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001388 parser->m_paramEntityParsing = oldParamEntityParsing;
1389 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001390 if (context) {
1391#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001392 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1393 || ! setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001394 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001395 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001396 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001397 parser->m_processor = externalEntityInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001398#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07001399 } else {
1400 /* The DTD instance referenced by parser->m_dtd is shared between the
1401 document's root parser and external PE parsers, therefore one does not
1402 need to call setContext. In addition, one also *must* not call
1403 setContext, because this would overwrite existing prefix->binding
1404 pointers in parser->m_dtd with ones that get destroyed with the external
1405 PE parser. This would leave those prefixes with dangling pointers.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001406 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07001407 parser->m_isParamEntity = XML_TRUE;
1408 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1409 parser->m_processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001410 }
1411#endif /* XML_DTD */
1412 return parser;
1413}
1414
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001415static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001416destroyBindings(BINDING *bindings, XML_Parser parser) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001417 for (;;) {
1418 BINDING *b = bindings;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001419 if (! b)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001420 break;
1421 bindings = b->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001422 FREE(parser, b->uri);
1423 FREE(parser, b);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001424 }
1425}
1426
Fred Drake08317ae2003-10-21 15:38:55 +00001427void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001428XML_ParserFree(XML_Parser parser) {
Fred Drake31d485c2004-08-03 07:06:22 +00001429 TAG *tagList;
1430 OPEN_INTERNAL_ENTITY *entityList;
1431 if (parser == NULL)
1432 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001433 /* free m_tagStack and m_freeTagList */
1434 tagList = parser->m_tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001435 for (;;) {
1436 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001437 if (tagList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001438 if (parser->m_freeTagList == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001439 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001440 tagList = parser->m_freeTagList;
1441 parser->m_freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001442 }
Fred Drake31d485c2004-08-03 07:06:22 +00001443 p = tagList;
1444 tagList = tagList->parent;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001445 FREE(parser, p->buf);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001446 destroyBindings(p->bindings, parser);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001447 FREE(parser, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001448 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001449 /* free m_openInternalEntities and m_freeInternalEntities */
1450 entityList = parser->m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +00001451 for (;;) {
1452 OPEN_INTERNAL_ENTITY *openEntity;
1453 if (entityList == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001454 if (parser->m_freeInternalEntities == NULL)
Fred Drake31d485c2004-08-03 07:06:22 +00001455 break;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001456 entityList = parser->m_freeInternalEntities;
1457 parser->m_freeInternalEntities = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001458 }
1459 openEntity = entityList;
1460 entityList = entityList->next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001461 FREE(parser, openEntity);
Fred Drake31d485c2004-08-03 07:06:22 +00001462 }
1463
Benjamin Peterson4e211002018-06-26 19:25:45 -07001464 destroyBindings(parser->m_freeBindingList, parser);
1465 destroyBindings(parser->m_inheritedBindings, parser);
1466 poolDestroy(&parser->m_tempPool);
1467 poolDestroy(&parser->m_temp2Pool);
1468 FREE(parser, (void *)parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001469#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001470 /* external parameter entity parsers share the DTD structure
1471 parser->m_dtd with the root parser, so we must not destroy it
1472 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001473 if (! parser->m_isParamEntity && parser->m_dtd)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001474#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001475 if (parser->m_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001476#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001477 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1478 &parser->m_mem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001479 FREE(parser, (void *)parser->m_atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001480#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07001481 FREE(parser, (void *)parser->m_attInfo);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001482#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07001483 FREE(parser, parser->m_groupConnector);
1484 FREE(parser, parser->m_buffer);
1485 FREE(parser, parser->m_dataBuf);
1486 FREE(parser, parser->m_nsAtts);
1487 FREE(parser, parser->m_unknownEncodingMem);
1488 if (parser->m_unknownEncodingRelease)
1489 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1490 FREE(parser, parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001491}
1492
Fred Drake08317ae2003-10-21 15:38:55 +00001493void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001494XML_UseParserAsHandlerArg(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001495 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001496 parser->m_handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001497}
1498
Fred Drake08317ae2003-10-21 15:38:55 +00001499enum XML_Error XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001500XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001501 if (parser == NULL)
1502 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001503#ifdef XML_DTD
1504 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001505 if (parser->m_parsingStatus.parsing == XML_PARSING
1506 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001507 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001508 parser->m_useForeignDTD = useDTD;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001509 return XML_ERROR_NONE;
1510#else
Miss Islington (bot)27067852021-08-29 07:32:50 -07001511 UNUSED_P(useDTD);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001512 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1513#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001514}
1515
Fred Drake08317ae2003-10-21 15:38:55 +00001516void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001517XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001518 if (parser == NULL)
1519 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001520 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001521 if (parser->m_parsingStatus.parsing == XML_PARSING
1522 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001523 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001524 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001525}
1526
Fred Drake08317ae2003-10-21 15:38:55 +00001527void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001528XML_SetUserData(XML_Parser parser, void *p) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001529 if (parser == NULL)
1530 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001531 if (parser->m_handlerArg == parser->m_userData)
1532 parser->m_handlerArg = parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001533 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001534 parser->m_userData = p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001535}
1536
Fred Drake08317ae2003-10-21 15:38:55 +00001537enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001538XML_SetBase(XML_Parser parser, const XML_Char *p) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001539 if (parser == NULL)
1540 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001541 if (p) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001542 p = poolCopyString(&parser->m_dtd->pool, p);
Benjamin Peterson52b94082019-09-25 21:33:58 -07001543 if (! p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001544 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001545 parser->m_curBase = p;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001546 } else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001547 parser->m_curBase = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001548 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001549}
1550
Benjamin Peterson52b94082019-09-25 21:33:58 -07001551const XML_Char *XMLCALL
1552XML_GetBase(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001553 if (parser == NULL)
1554 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001555 return parser->m_curBase;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001556}
1557
Fred Drake08317ae2003-10-21 15:38:55 +00001558int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001559XML_GetSpecifiedAttributeCount(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001560 if (parser == NULL)
1561 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001562 return parser->m_nSpecifiedAtts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001563}
1564
Fred Drake08317ae2003-10-21 15:38:55 +00001565int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001566XML_GetIdAttributeIndex(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001567 if (parser == NULL)
1568 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001569 return parser->m_idAttIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001570}
1571
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001572#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07001573const XML_AttrInfo *XMLCALL
1574XML_GetAttributeInfo(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001575 if (parser == NULL)
1576 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001577 return parser->m_attInfo;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001578}
1579#endif
1580
Fred Drake08317ae2003-10-21 15:38:55 +00001581void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001582XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1583 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001584 if (parser == NULL)
1585 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001586 parser->m_startElementHandler = start;
1587 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001588}
1589
Fred Drake08317ae2003-10-21 15:38:55 +00001590void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001591XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001592 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001593 parser->m_startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001594}
1595
Fred Drake08317ae2003-10-21 15:38:55 +00001596void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001597XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001598 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001599 parser->m_endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001600}
1601
Fred Drake08317ae2003-10-21 15:38:55 +00001602void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001603XML_SetCharacterDataHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001604 XML_CharacterDataHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001605 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001606 parser->m_characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001607}
1608
Fred Drake08317ae2003-10-21 15:38:55 +00001609void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001610XML_SetProcessingInstructionHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001611 XML_ProcessingInstructionHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001612 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001613 parser->m_processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001614}
1615
Fred Drake08317ae2003-10-21 15:38:55 +00001616void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001617XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001618 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001619 parser->m_commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001620}
1621
Fred Drake08317ae2003-10-21 15:38:55 +00001622void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001623XML_SetCdataSectionHandler(XML_Parser parser,
1624 XML_StartCdataSectionHandler start,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001625 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001626 if (parser == NULL)
1627 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001628 parser->m_startCdataSectionHandler = start;
1629 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001630}
1631
Fred Drake08317ae2003-10-21 15:38:55 +00001632void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001633XML_SetStartCdataSectionHandler(XML_Parser parser,
1634 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001635 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001636 parser->m_startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001637}
1638
Fred Drake08317ae2003-10-21 15:38:55 +00001639void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001640XML_SetEndCdataSectionHandler(XML_Parser parser,
1641 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001642 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001643 parser->m_endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001644}
1645
Fred Drake08317ae2003-10-21 15:38:55 +00001646void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001647XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001648 if (parser == NULL)
1649 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001650 parser->m_defaultHandler = handler;
1651 parser->m_defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001652}
1653
Fred Drake08317ae2003-10-21 15:38:55 +00001654void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001655XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001656 if (parser == NULL)
1657 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001658 parser->m_defaultHandler = handler;
1659 parser->m_defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001660}
1661
Fred Drake08317ae2003-10-21 15:38:55 +00001662void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001663XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1664 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001665 if (parser == NULL)
1666 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001667 parser->m_startDoctypeDeclHandler = start;
1668 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001669}
1670
Fred Drake08317ae2003-10-21 15:38:55 +00001671void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001672XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1673 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001674 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001675 parser->m_startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001676}
1677
Fred Drake08317ae2003-10-21 15:38:55 +00001678void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001679XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001680 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001681 parser->m_endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001682}
1683
Fred Drake08317ae2003-10-21 15:38:55 +00001684void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001685XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001686 XML_UnparsedEntityDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001687 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001688 parser->m_unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001689}
1690
Fred Drake08317ae2003-10-21 15:38:55 +00001691void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001692XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001693 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001694 parser->m_notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001695}
1696
Fred Drake08317ae2003-10-21 15:38:55 +00001697void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001698XML_SetNamespaceDeclHandler(XML_Parser parser,
1699 XML_StartNamespaceDeclHandler start,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001700 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001701 if (parser == NULL)
1702 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001703 parser->m_startNamespaceDeclHandler = start;
1704 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001705}
1706
Fred Drake08317ae2003-10-21 15:38:55 +00001707void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001708XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1709 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001710 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001711 parser->m_startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001712}
1713
Fred Drake08317ae2003-10-21 15:38:55 +00001714void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001715XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1716 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001717 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001718 parser->m_endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001719}
1720
Fred Drake08317ae2003-10-21 15:38:55 +00001721void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001722XML_SetNotStandaloneHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001723 XML_NotStandaloneHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001724 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001725 parser->m_notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001726}
1727
Fred Drake08317ae2003-10-21 15:38:55 +00001728void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001729XML_SetExternalEntityRefHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001730 XML_ExternalEntityRefHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001731 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001732 parser->m_externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001733}
1734
Fred Drake08317ae2003-10-21 15:38:55 +00001735void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001736XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001737 if (parser == NULL)
1738 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001739 if (arg)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001740 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001741 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07001742 parser->m_externalEntityRefHandlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001743}
1744
Fred Drake08317ae2003-10-21 15:38:55 +00001745void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001746XML_SetSkippedEntityHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001747 XML_SkippedEntityHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001748 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001749 parser->m_skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001750}
1751
Fred Drake08317ae2003-10-21 15:38:55 +00001752void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001753XML_SetUnknownEncodingHandler(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001754 XML_UnknownEncodingHandler handler, void *data) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001755 if (parser == NULL)
1756 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001757 parser->m_unknownEncodingHandler = handler;
1758 parser->m_unknownEncodingHandlerData = data;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001759}
1760
Fred Drake08317ae2003-10-21 15:38:55 +00001761void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001762XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001763 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001764 parser->m_elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001765}
1766
Fred Drake08317ae2003-10-21 15:38:55 +00001767void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001768XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001769 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001770 parser->m_attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001771}
1772
Fred Drake08317ae2003-10-21 15:38:55 +00001773void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001774XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001775 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001776 parser->m_entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001777}
1778
Fred Drake08317ae2003-10-21 15:38:55 +00001779void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001780XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001781 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07001782 parser->m_xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001783}
1784
Fred Drake08317ae2003-10-21 15:38:55 +00001785int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001786XML_SetParamEntityParsing(XML_Parser parser,
Benjamin Peterson52b94082019-09-25 21:33:58 -07001787 enum XML_ParamEntityParsing peParsing) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001788 if (parser == NULL)
1789 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001790 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001791 if (parser->m_parsingStatus.parsing == XML_PARSING
1792 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001793 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001794#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07001795 parser->m_paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001796 return 1;
1797#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001798 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001799#endif
1800}
1801
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001802int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001803XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001804 if (parser == NULL)
1805 return 0;
1806 if (parser->m_parentParser)
1807 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001808 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001809 if (parser->m_parsingStatus.parsing == XML_PARSING
1810 || parser->m_parsingStatus.parsing == XML_SUSPENDED)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001811 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001812 parser->m_hash_secret_salt = hash_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001813 return 1;
1814}
1815
Fred Drake08317ae2003-10-21 15:38:55 +00001816enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001817XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001818 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001819 if (parser != NULL)
1820 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001821 return XML_STATUS_ERROR;
1822 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001823 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001824 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001825 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001826 return XML_STATUS_ERROR;
1827 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001828 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001829 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001830 case XML_INITIALIZED:
Benjamin Peterson52b94082019-09-25 21:33:58 -07001831 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001832 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001833 return XML_STATUS_ERROR;
1834 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001835 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001836 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001837 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001838 }
1839
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001840 if (len == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001841 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001842 if (! isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001843 return XML_STATUS_OK;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001844 parser->m_positionPtr = parser->m_bufferPtr;
1845 parser->m_parseEndPtr = parser->m_bufferEnd;
Fred Drake31d485c2004-08-03 07:06:22 +00001846
1847 /* If data are left over from last buffer, and we now know that these
1848 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001849 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001850 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001851 parser->m_errorCode
1852 = parser->m_processor(parser, parser->m_bufferPtr,
1853 parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00001854
Benjamin Peterson4e211002018-06-26 19:25:45 -07001855 if (parser->m_errorCode == XML_ERROR_NONE) {
1856 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001857 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001858 /* It is hard to be certain, but it seems that this case
1859 * cannot occur. This code is cleaning up a previous parse
1860 * with no new data (since len == 0). Changing the parsing
1861 * state requires getting to execute a handler function, and
1862 * there doesn't seem to be an opportunity for that while in
1863 * this circumstance.
1864 *
1865 * Given the uncertainty, we retain the code but exclude it
1866 * from coverage tests.
1867 *
1868 * LCOV_EXCL_START
1869 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07001870 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
1871 parser->m_bufferPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07001872 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001873 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001874 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001875 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001876 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001877 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001878 /* fall through */
1879 default:
1880 return XML_STATUS_OK;
1881 }
1882 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001883 parser->m_eventEndPtr = parser->m_eventPtr;
1884 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001885 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001886 }
1887#ifndef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07001888 else if (parser->m_bufferPtr == parser->m_bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001889 const char *end;
1890 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001891 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001892 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
Miss Islington (bot)27067852021-08-29 07:32:50 -07001893 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07001894 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1895 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1896 parser->m_processor = errorProcessor;
1897 return XML_STATUS_ERROR;
Victor Stinner5ff71322017-06-21 14:39:22 +02001898 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001899 parser->m_parseEndByteIndex += len;
1900 parser->m_positionPtr = s;
1901 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001902
Benjamin Peterson52b94082019-09-25 21:33:58 -07001903 parser->m_errorCode
1904 = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001905
Benjamin Peterson4e211002018-06-26 19:25:45 -07001906 if (parser->m_errorCode != XML_ERROR_NONE) {
1907 parser->m_eventEndPtr = parser->m_eventPtr;
1908 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001909 return XML_STATUS_ERROR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07001910 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001911 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001912 case XML_SUSPENDED:
1913 result = XML_STATUS_SUSPENDED;
1914 break;
1915 case XML_INITIALIZED:
1916 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001917 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001918 parser->m_parsingStatus.parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001919 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001920 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001921 /* fall through */
1922 default:
1923 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001924 }
1925 }
1926
Benjamin Peterson52b94082019-09-25 21:33:58 -07001927 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
1928 &parser->m_position);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001929 nLeftOver = s + len - end;
1930 if (nLeftOver) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07001931 if (parser->m_buffer == NULL
1932 || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001933 /* avoid _signed_ integer overflow */
1934 char *temp = NULL;
1935 const int bytesToAllocate = (int)((unsigned)len * 2U);
1936 if (bytesToAllocate > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001937 temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
Victor Stinner5ff71322017-06-21 14:39:22 +02001938 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001939 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001940 parser->m_errorCode = XML_ERROR_NO_MEMORY;
1941 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
1942 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001943 return XML_STATUS_ERROR;
1944 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001945 parser->m_buffer = temp;
1946 parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001947 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001948 memcpy(parser->m_buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001949 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07001950 parser->m_bufferPtr = parser->m_buffer;
1951 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
1952 parser->m_positionPtr = parser->m_bufferPtr;
1953 parser->m_parseEndPtr = parser->m_bufferEnd;
1954 parser->m_eventPtr = parser->m_bufferPtr;
1955 parser->m_eventEndPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001956 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001957 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07001958#endif /* not defined XML_CONTEXT_BYTES */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001959 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001960 void *buff = XML_GetBuffer(parser, len);
1961 if (buff == NULL)
1962 return XML_STATUS_ERROR;
1963 else {
1964 memcpy(buff, s, len);
1965 return XML_ParseBuffer(parser, len, isFinal);
1966 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001967 }
1968}
1969
Fred Drake08317ae2003-10-21 15:38:55 +00001970enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07001971XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
Fred Drake31d485c2004-08-03 07:06:22 +00001972 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00001973 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001974
Victor Stinner5ff71322017-06-21 14:39:22 +02001975 if (parser == NULL)
1976 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07001977 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001978 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001979 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00001980 return XML_STATUS_ERROR;
1981 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001982 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001983 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001984 case XML_INITIALIZED:
Miss Islington (bot)27067852021-08-29 07:32:50 -07001985 /* Has someone called XML_GetBuffer successfully before? */
1986 if (! parser->m_bufferPtr) {
1987 parser->m_errorCode = XML_ERROR_NO_BUFFER;
1988 return XML_STATUS_ERROR;
1989 }
1990
Benjamin Peterson52b94082019-09-25 21:33:58 -07001991 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07001992 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001993 return XML_STATUS_ERROR;
1994 }
Benjamin Peterson5033aa72018-09-10 21:04:00 -07001995 /* fall through */
Fred Drake31d485c2004-08-03 07:06:22 +00001996 default:
Benjamin Peterson4e211002018-06-26 19:25:45 -07001997 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001998 }
1999
Benjamin Peterson4e211002018-06-26 19:25:45 -07002000 start = parser->m_bufferPtr;
2001 parser->m_positionPtr = start;
2002 parser->m_bufferEnd += len;
2003 parser->m_parseEndPtr = parser->m_bufferEnd;
2004 parser->m_parseEndByteIndex += len;
2005 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00002006
Benjamin Peterson52b94082019-09-25 21:33:58 -07002007 parser->m_errorCode = parser->m_processor(
2008 parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00002009
Benjamin Peterson4e211002018-06-26 19:25:45 -07002010 if (parser->m_errorCode != XML_ERROR_NONE) {
2011 parser->m_eventEndPtr = parser->m_eventPtr;
2012 parser->m_processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002013 return XML_STATUS_ERROR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002014 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002015 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002016 case XML_SUSPENDED:
2017 result = XML_STATUS_SUSPENDED;
2018 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002019 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002020 case XML_PARSING:
2021 if (isFinal) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002022 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002023 return result;
2024 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002025 default:; /* should not happen */
Fred Drake31d485c2004-08-03 07:06:22 +00002026 }
2027 }
2028
Benjamin Peterson52b94082019-09-25 21:33:58 -07002029 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2030 parser->m_bufferPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002031 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002032 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002033}
2034
Benjamin Peterson52b94082019-09-25 21:33:58 -07002035void *XMLCALL
2036XML_GetBuffer(XML_Parser parser, int len) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002037 if (parser == NULL)
2038 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002039 if (len < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002040 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002041 return NULL;
2042 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002043 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002044 case XML_SUSPENDED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002045 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002046 return NULL;
2047 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002048 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002049 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002050 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00002051 }
2052
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002053 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002054#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002055 int keep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002056#endif /* defined XML_CONTEXT_BYTES */
Victor Stinner23ec4b52017-06-15 00:54:36 +02002057 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002058 int neededSize = (int)((unsigned)len
2059 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2060 parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002061 if (neededSize < 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002062 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002063 return NULL;
2064 }
2065#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002066 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002067 if (keep > XML_CONTEXT_BYTES)
2068 keep = XML_CONTEXT_BYTES;
2069 neededSize += keep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002070#endif /* defined XML_CONTEXT_BYTES */
2071 if (neededSize
2072 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002073#ifdef XML_CONTEXT_BYTES
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002074 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002075 int offset
2076 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2077 - keep;
2078 /* The buffer pointers cannot be NULL here; we have at least some bytes
2079 * in the buffer */
2080 memmove(parser->m_buffer, &parser->m_buffer[offset],
2081 parser->m_bufferEnd - parser->m_bufferPtr + keep);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002082 parser->m_bufferEnd -= offset;
2083 parser->m_bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002084 }
2085#else
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002086 if (parser->m_buffer && parser->m_bufferPtr) {
2087 memmove(parser->m_buffer, parser->m_bufferPtr,
2088 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson52b94082019-09-25 21:33:58 -07002089 parser->m_bufferEnd
2090 = parser->m_buffer
2091 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002092 parser->m_bufferPtr = parser->m_buffer;
2093 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002094#endif /* not defined XML_CONTEXT_BYTES */
2095 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002096 char *newBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002097 int bufferSize
2098 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002099 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002100 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002101 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02002102 /* Do not invoke signed arithmetic overflow: */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002103 bufferSize = (int)(2U * (unsigned)bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002104 } while (bufferSize < neededSize && bufferSize > 0);
2105 if (bufferSize <= 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002106 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002107 return NULL;
2108 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002109 newBuf = (char *)MALLOC(parser, bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002110 if (newBuf == 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002111 parser->m_errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002112 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002113 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002114 parser->m_bufferLim = newBuf + bufferSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002115#ifdef XML_CONTEXT_BYTES
Benjamin Peterson4e211002018-06-26 19:25:45 -07002116 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002117 memcpy(newBuf, &parser->m_bufferPtr[-keep],
Benjamin Peterson52b94082019-09-25 21:33:58 -07002118 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2119 + keep);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002120 FREE(parser, parser->m_buffer);
2121 parser->m_buffer = newBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002122 parser->m_bufferEnd
2123 = parser->m_buffer
2124 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2125 + keep;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002126 parser->m_bufferPtr = parser->m_buffer + keep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002127 } else {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002128 /* This must be a brand new buffer with no data in it yet */
2129 parser->m_bufferEnd = newBuf;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002130 parser->m_bufferPtr = parser->m_buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002131 }
2132#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002133 if (parser->m_bufferPtr) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002134 memcpy(newBuf, parser->m_bufferPtr,
2135 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
Benjamin Peterson4e211002018-06-26 19:25:45 -07002136 FREE(parser, parser->m_buffer);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002137 parser->m_bufferEnd
2138 = newBuf
2139 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2140 } else {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002141 /* This must be a brand new buffer with no data in it yet */
2142 parser->m_bufferEnd = newBuf;
2143 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002144 parser->m_bufferPtr = parser->m_buffer = newBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002145#endif /* not defined XML_CONTEXT_BYTES */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002146 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002147 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2148 parser->m_positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002149 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002150 return parser->m_bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002151}
2152
Fred Drake31d485c2004-08-03 07:06:22 +00002153enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002154XML_StopParser(XML_Parser parser, XML_Bool resumable) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002155 if (parser == NULL)
2156 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002157 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002158 case XML_SUSPENDED:
2159 if (resumable) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002160 parser->m_errorCode = XML_ERROR_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002161 return XML_STATUS_ERROR;
2162 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002163 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002164 break;
2165 case XML_FINISHED:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002166 parser->m_errorCode = XML_ERROR_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002167 return XML_STATUS_ERROR;
2168 default:
2169 if (resumable) {
2170#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07002171 if (parser->m_isParamEntity) {
2172 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
Fred Drake31d485c2004-08-03 07:06:22 +00002173 return XML_STATUS_ERROR;
2174 }
2175#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07002176 parser->m_parsingStatus.parsing = XML_SUSPENDED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002177 } else
Benjamin Peterson4e211002018-06-26 19:25:45 -07002178 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002179 }
2180 return XML_STATUS_OK;
2181}
2182
2183enum XML_Status XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002184XML_ResumeParser(XML_Parser parser) {
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002185 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002186
Victor Stinner5ff71322017-06-21 14:39:22 +02002187 if (parser == NULL)
2188 return XML_STATUS_ERROR;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002189 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2190 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002191 return XML_STATUS_ERROR;
2192 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002193 parser->m_parsingStatus.parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002194
Benjamin Peterson52b94082019-09-25 21:33:58 -07002195 parser->m_errorCode = parser->m_processor(
2196 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00002197
Benjamin Peterson4e211002018-06-26 19:25:45 -07002198 if (parser->m_errorCode != XML_ERROR_NONE) {
2199 parser->m_eventEndPtr = parser->m_eventPtr;
2200 parser->m_processor = errorProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00002201 return XML_STATUS_ERROR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002202 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002203 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002204 case XML_SUSPENDED:
2205 result = XML_STATUS_SUSPENDED;
2206 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002207 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002208 case XML_PARSING:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002209 if (parser->m_parsingStatus.finalBuffer) {
2210 parser->m_parsingStatus.parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002211 return result;
2212 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002213 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00002214 }
2215 }
2216
Benjamin Peterson52b94082019-09-25 21:33:58 -07002217 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2218 parser->m_bufferPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002219 parser->m_positionPtr = parser->m_bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002220 return result;
2221}
2222
2223void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002224XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002225 if (parser == NULL)
2226 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002227 assert(status != NULL);
2228 *status = parser->m_parsingStatus;
2229}
2230
Fred Drake08317ae2003-10-21 15:38:55 +00002231enum XML_Error XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002232XML_GetErrorCode(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002233 if (parser == NULL)
2234 return XML_ERROR_INVALID_ARGUMENT;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002235 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002236}
2237
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002238XML_Index XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002239XML_GetCurrentByteIndex(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002240 if (parser == NULL)
2241 return -1;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002242 if (parser->m_eventPtr)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002243 return (XML_Index)(parser->m_parseEndByteIndex
2244 - (parser->m_parseEndPtr - parser->m_eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002245 return -1;
2246}
2247
Fred Drake08317ae2003-10-21 15:38:55 +00002248int XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002249XML_GetCurrentByteCount(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002250 if (parser == NULL)
2251 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002252 if (parser->m_eventEndPtr && parser->m_eventPtr)
2253 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002254 return 0;
2255}
2256
Benjamin Peterson52b94082019-09-25 21:33:58 -07002257const char *XMLCALL
2258XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002259#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002260 if (parser == NULL)
2261 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002262 if (parser->m_eventPtr && parser->m_buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002263 if (offset != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002264 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
Victor Stinner5ff71322017-06-21 14:39:22 +02002265 if (size != NULL)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002266 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002267 return parser->m_buffer;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002268 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002269#else
2270 (void)parser;
2271 (void)offset;
2272 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002273#endif /* defined XML_CONTEXT_BYTES */
Miss Islington (bot)27067852021-08-29 07:32:50 -07002274 return (const char *)0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002275}
2276
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002277XML_Size XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002278XML_GetCurrentLineNumber(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002279 if (parser == NULL)
2280 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002281 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002282 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2283 parser->m_eventPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002284 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002285 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002286 return parser->m_position.lineNumber + 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002287}
2288
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002289XML_Size XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002290XML_GetCurrentColumnNumber(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002291 if (parser == NULL)
2292 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002293 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002294 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2295 parser->m_eventPtr, &parser->m_position);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002296 parser->m_positionPtr = parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002297 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002298 return parser->m_position.columnNumber;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002299}
2300
Fred Drake08317ae2003-10-21 15:38:55 +00002301void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002302XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002303 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002304 FREE(parser, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002305}
2306
Benjamin Peterson52b94082019-09-25 21:33:58 -07002307void *XMLCALL
2308XML_MemMalloc(XML_Parser parser, size_t size) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002309 if (parser == NULL)
2310 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002311 return MALLOC(parser, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002312}
2313
Benjamin Peterson52b94082019-09-25 21:33:58 -07002314void *XMLCALL
2315XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002316 if (parser == NULL)
2317 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002318 return REALLOC(parser, ptr, size);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002319}
2320
Fred Drake08317ae2003-10-21 15:38:55 +00002321void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002322XML_MemFree(XML_Parser parser, void *ptr) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002323 if (parser != NULL)
Benjamin Peterson4e211002018-06-26 19:25:45 -07002324 FREE(parser, ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002325}
2326
Fred Drake08317ae2003-10-21 15:38:55 +00002327void XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002328XML_DefaultCurrent(XML_Parser parser) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002329 if (parser == NULL)
2330 return;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002331 if (parser->m_defaultHandler) {
2332 if (parser->m_openInternalEntities)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002333 reportDefault(parser, parser->m_internalEncoding,
Benjamin Peterson4e211002018-06-26 19:25:45 -07002334 parser->m_openInternalEntities->internalEventPtr,
2335 parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002336 else
Benjamin Peterson52b94082019-09-25 21:33:58 -07002337 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2338 parser->m_eventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002339 }
2340}
2341
Benjamin Peterson52b94082019-09-25 21:33:58 -07002342const XML_LChar *XMLCALL
2343XML_ErrorString(enum XML_Error code) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002344 switch (code) {
2345 case XML_ERROR_NONE:
2346 return NULL;
2347 case XML_ERROR_NO_MEMORY:
2348 return XML_L("out of memory");
2349 case XML_ERROR_SYNTAX:
2350 return XML_L("syntax error");
2351 case XML_ERROR_NO_ELEMENTS:
2352 return XML_L("no element found");
2353 case XML_ERROR_INVALID_TOKEN:
2354 return XML_L("not well-formed (invalid token)");
2355 case XML_ERROR_UNCLOSED_TOKEN:
2356 return XML_L("unclosed token");
2357 case XML_ERROR_PARTIAL_CHAR:
2358 return XML_L("partial character");
2359 case XML_ERROR_TAG_MISMATCH:
2360 return XML_L("mismatched tag");
2361 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2362 return XML_L("duplicate attribute");
2363 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2364 return XML_L("junk after document element");
2365 case XML_ERROR_PARAM_ENTITY_REF:
2366 return XML_L("illegal parameter entity reference");
2367 case XML_ERROR_UNDEFINED_ENTITY:
2368 return XML_L("undefined entity");
2369 case XML_ERROR_RECURSIVE_ENTITY_REF:
2370 return XML_L("recursive entity reference");
2371 case XML_ERROR_ASYNC_ENTITY:
2372 return XML_L("asynchronous entity");
2373 case XML_ERROR_BAD_CHAR_REF:
2374 return XML_L("reference to invalid character number");
2375 case XML_ERROR_BINARY_ENTITY_REF:
2376 return XML_L("reference to binary entity");
2377 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2378 return XML_L("reference to external entity in attribute");
2379 case XML_ERROR_MISPLACED_XML_PI:
2380 return XML_L("XML or text declaration not at start of entity");
2381 case XML_ERROR_UNKNOWN_ENCODING:
2382 return XML_L("unknown encoding");
2383 case XML_ERROR_INCORRECT_ENCODING:
2384 return XML_L("encoding specified in XML declaration is incorrect");
2385 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2386 return XML_L("unclosed CDATA section");
2387 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2388 return XML_L("error in processing external entity reference");
2389 case XML_ERROR_NOT_STANDALONE:
2390 return XML_L("document is not standalone");
2391 case XML_ERROR_UNEXPECTED_STATE:
2392 return XML_L("unexpected parser state - please send a bug report");
2393 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2394 return XML_L("entity declared in parameter entity");
2395 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2396 return XML_L("requested feature requires XML_DTD support in Expat");
2397 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2398 return XML_L("cannot change setting once parsing has begun");
2399 /* Added in 1.95.7. */
2400 case XML_ERROR_UNBOUND_PREFIX:
2401 return XML_L("unbound prefix");
2402 /* Added in 1.95.8. */
2403 case XML_ERROR_UNDECLARING_PREFIX:
2404 return XML_L("must not undeclare prefix");
2405 case XML_ERROR_INCOMPLETE_PE:
2406 return XML_L("incomplete markup in parameter entity");
2407 case XML_ERROR_XML_DECL:
2408 return XML_L("XML declaration not well-formed");
2409 case XML_ERROR_TEXT_DECL:
2410 return XML_L("text declaration not well-formed");
2411 case XML_ERROR_PUBLICID:
2412 return XML_L("illegal character(s) in public id");
2413 case XML_ERROR_SUSPENDED:
2414 return XML_L("parser suspended");
2415 case XML_ERROR_NOT_SUSPENDED:
2416 return XML_L("parser not suspended");
2417 case XML_ERROR_ABORTED:
2418 return XML_L("parsing aborted");
2419 case XML_ERROR_FINISHED:
2420 return XML_L("parsing finished");
2421 case XML_ERROR_SUSPEND_PE:
2422 return XML_L("cannot suspend in external parameter entity");
2423 /* Added in 2.0.0. */
2424 case XML_ERROR_RESERVED_PREFIX_XML:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002425 return XML_L(
2426 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
Benjamin Peterson4e211002018-06-26 19:25:45 -07002427 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2428 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2429 case XML_ERROR_RESERVED_NAMESPACE_URI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002430 return XML_L(
2431 "prefix must not be bound to one of the reserved namespace names");
Benjamin Peterson4e211002018-06-26 19:25:45 -07002432 /* Added in 2.2.5. */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002433 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002434 return XML_L("invalid argument");
Miss Islington (bot)27067852021-08-29 07:32:50 -07002435 /* Added in 2.3.0. */
2436 case XML_ERROR_NO_BUFFER:
2437 return XML_L(
2438 "a successful prior call to function XML_GetBuffer is required");
2439 /* Added in 2.4.0. */
2440 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2441 return XML_L(
2442 "limit on input amplification factor (from DTD and entities) breached");
Benjamin Peterson4e211002018-06-26 19:25:45 -07002443 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002444 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002445}
2446
Benjamin Peterson52b94082019-09-25 21:33:58 -07002447const XML_LChar *XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002448XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002449 /* V1 is used to string-ize the version number. However, it would
2450 string-ize the actual version macro *names* unless we get them
2451 substituted before being passed to V1. CPP is defined to expand
2452 a macro, then rescan for more expansions. Thus, we use V2 to expand
2453 the version macros, then CPP will expand the resulting V1() macro
2454 with the correct numerals. */
2455 /* ### I'm assuming cpp is portable in this respect... */
2456
Benjamin Peterson52b94082019-09-25 21:33:58 -07002457#define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2458#define V2(a, b, c) XML_L("expat_") V1(a, b, c)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002459
2460 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2461
2462#undef V1
2463#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002464}
2465
Fred Drake08317ae2003-10-21 15:38:55 +00002466XML_Expat_Version XMLCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002467XML_ExpatVersionInfo(void) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002468 XML_Expat_Version version;
2469
2470 version.major = XML_MAJOR_VERSION;
2471 version.minor = XML_MINOR_VERSION;
2472 version.micro = XML_MICRO_VERSION;
2473
2474 return version;
2475}
2476
Benjamin Peterson52b94082019-09-25 21:33:58 -07002477const XML_Feature *XMLCALL
2478XML_GetFeatureList(void) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07002479 static const XML_Feature features[] = {
2480 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2481 sizeof(XML_Char)},
2482 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2483 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002484#ifdef XML_UNICODE
Miss Islington (bot)27067852021-08-29 07:32:50 -07002485 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002486#endif
2487#ifdef XML_UNICODE_WCHAR_T
Miss Islington (bot)27067852021-08-29 07:32:50 -07002488 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002489#endif
2490#ifdef XML_DTD
Miss Islington (bot)27067852021-08-29 07:32:50 -07002491 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002492#endif
2493#ifdef XML_CONTEXT_BYTES
Miss Islington (bot)27067852021-08-29 07:32:50 -07002494 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2495 XML_CONTEXT_BYTES},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002496#endif
2497#ifdef XML_MIN_SIZE
Miss Islington (bot)27067852021-08-29 07:32:50 -07002498 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002499#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002500#ifdef XML_NS
Miss Islington (bot)27067852021-08-29 07:32:50 -07002501 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002502#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002503#ifdef XML_LARGE_SIZE
Miss Islington (bot)27067852021-08-29 07:32:50 -07002504 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002505#endif
2506#ifdef XML_ATTR_INFO
Miss Islington (bot)27067852021-08-29 07:32:50 -07002507 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002508#endif
Miss Islington (bot)27067852021-08-29 07:32:50 -07002509#ifdef XML_DTD
2510 /* Added in Expat 2.4.0. */
2511 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2512 XML_L("XML_BLAP_MAX_AMP"),
2513 (long int)
2514 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2515 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2516 XML_L("XML_BLAP_ACT_THRES"),
2517 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2518#endif
2519 {XML_FEATURE_END, NULL, 0}};
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002520
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002521 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002522}
2523
Miss Islington (bot)27067852021-08-29 07:32:50 -07002524#ifdef XML_DTD
2525XML_Bool XMLCALL
2526XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2527 XML_Parser parser, float maximumAmplificationFactor) {
2528 if ((parser == NULL) || (parser->m_parentParser != NULL)
2529 || isnan(maximumAmplificationFactor)
2530 || (maximumAmplificationFactor < 1.0f)) {
2531 return XML_FALSE;
2532 }
2533 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2534 return XML_TRUE;
2535}
2536
2537XML_Bool XMLCALL
2538XML_SetBillionLaughsAttackProtectionActivationThreshold(
2539 XML_Parser parser, unsigned long long activationThresholdBytes) {
2540 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2541 return XML_FALSE;
2542 }
2543 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2544 return XML_TRUE;
2545}
2546#endif /* XML_DTD */
2547
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002548/* Initially tag->rawName always points into the parse buffer;
2549 for those TAG instances opened while the current parse buffer was
2550 processed, and not yet closed, we need to store tag->rawName in a more
2551 permanent location, since the parse buffer is about to be discarded.
2552*/
2553static XML_Bool
Benjamin Peterson52b94082019-09-25 21:33:58 -07002554storeRawNames(XML_Parser parser) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002555 TAG *tag = parser->m_tagStack;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002556 while (tag) {
2557 int bufSize;
2558 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2559 char *rawNameBuf = tag->buf + nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002560 /* Stop if already stored. Since m_tagStack is a stack, we can stop
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002561 at the first entry that has already been copied; everything
2562 below it in the stack is already been accounted for in a
2563 previous call to this function.
2564 */
2565 if (tag->rawName == rawNameBuf)
2566 break;
2567 /* For re-use purposes we need to ensure that the
2568 size of tag->buf is a multiple of sizeof(XML_Char).
2569 */
2570 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2571 if (bufSize > tag->bufEnd - tag->buf) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002572 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002573 if (temp == NULL)
2574 return XML_FALSE;
2575 /* if tag->name.str points to tag->buf (only when namespace
2576 processing is off) then we have to update it
2577 */
2578 if (tag->name.str == (XML_Char *)tag->buf)
2579 tag->name.str = (XML_Char *)temp;
2580 /* if tag->name.localPart is set (when namespace processing is on)
2581 then update it as well, since it will always point into tag->buf
2582 */
2583 if (tag->name.localPart)
Benjamin Peterson52b94082019-09-25 21:33:58 -07002584 tag->name.localPart
2585 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002586 tag->buf = temp;
2587 tag->bufEnd = temp + bufSize;
2588 rawNameBuf = temp + nameLen;
2589 }
2590 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2591 tag->rawName = rawNameBuf;
2592 tag = tag->parent;
2593 }
2594 return XML_TRUE;
2595}
2596
2597static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002598contentProcessor(XML_Parser parser, const char *start, const char *end,
2599 const char **endPtr) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07002600 enum XML_Error result = doContent(
2601 parser, 0, parser->m_encoding, start, end, endPtr,
2602 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
Fred Drake31d485c2004-08-03 07:06:22 +00002603 if (result == XML_ERROR_NONE) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002604 if (! storeRawNames(parser))
Fred Drake31d485c2004-08-03 07:06:22 +00002605 return XML_ERROR_NO_MEMORY;
2606 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002607 return result;
2608}
2609
2610static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002611externalEntityInitProcessor(XML_Parser parser, const char *start,
2612 const char *end, const char **endPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002613 enum XML_Error result = initializeEncoding(parser);
2614 if (result != XML_ERROR_NONE)
2615 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002616 parser->m_processor = externalEntityInitProcessor2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002617 return externalEntityInitProcessor2(parser, start, end, endPtr);
2618}
2619
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002620static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002621externalEntityInitProcessor2(XML_Parser parser, const char *start,
2622 const char *end, const char **endPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002623 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002624 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002625 switch (tok) {
2626 case XML_TOK_BOM:
Miss Islington (bot)27067852021-08-29 07:32:50 -07002627#ifdef XML_DTD
2628 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2629 XML_ACCOUNT_DIRECT)) {
2630 accountingOnAbort(parser);
2631 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2632 }
2633#endif /* XML_DTD */
2634
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002635 /* If we are at the end of the buffer, this would cause the next stage,
2636 i.e. externalEntityInitProcessor3, to pass control directly to
2637 doContent (by detecting XML_TOK_NONE) without processing any xml text
2638 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2639 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002640 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002641 *endPtr = next;
2642 return XML_ERROR_NONE;
2643 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002644 start = next;
2645 break;
2646 case XML_TOK_PARTIAL:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002647 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002648 *endPtr = start;
2649 return XML_ERROR_NONE;
2650 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002651 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002652 return XML_ERROR_UNCLOSED_TOKEN;
2653 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002654 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002655 *endPtr = start;
2656 return XML_ERROR_NONE;
2657 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002658 parser->m_eventPtr = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002659 return XML_ERROR_PARTIAL_CHAR;
2660 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002661 parser->m_processor = externalEntityInitProcessor3;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002662 return externalEntityInitProcessor3(parser, start, end, endPtr);
2663}
2664
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002665static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002666externalEntityInitProcessor3(XML_Parser parser, const char *start,
2667 const char *end, const char **endPtr) {
Fred Drake31d485c2004-08-03 07:06:22 +00002668 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002669 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002670 parser->m_eventPtr = start;
2671 tok = XmlContentTok(parser->m_encoding, start, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07002672 /* Note: These bytes are accounted later in:
2673 - processXmlDecl
2674 - externalEntityContentProcessor
2675 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07002676 parser->m_eventEndPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00002677
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002678 switch (tok) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002679 case XML_TOK_XML_DECL: {
2680 enum XML_Error result;
2681 result = processXmlDecl(parser, 1, start, next);
2682 if (result != XML_ERROR_NONE)
2683 return result;
2684 switch (parser->m_parsingStatus.parsing) {
2685 case XML_SUSPENDED:
2686 *endPtr = next;
2687 return XML_ERROR_NONE;
2688 case XML_FINISHED:
2689 return XML_ERROR_ABORTED;
2690 default:
2691 start = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002692 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002693 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002694 case XML_TOK_PARTIAL:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002695 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002696 *endPtr = start;
2697 return XML_ERROR_NONE;
2698 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002699 return XML_ERROR_UNCLOSED_TOKEN;
2700 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson52b94082019-09-25 21:33:58 -07002701 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002702 *endPtr = start;
2703 return XML_ERROR_NONE;
2704 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002705 return XML_ERROR_PARTIAL_CHAR;
2706 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07002707 parser->m_processor = externalEntityContentProcessor;
2708 parser->m_tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002709 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002710}
2711
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002712static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07002713externalEntityContentProcessor(XML_Parser parser, const char *start,
2714 const char *end, const char **endPtr) {
2715 enum XML_Error result
2716 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07002717 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2718 XML_ACCOUNT_ENTITY_EXPANSION);
Fred Drake31d485c2004-08-03 07:06:22 +00002719 if (result == XML_ERROR_NONE) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07002720 if (! storeRawNames(parser))
Fred Drake31d485c2004-08-03 07:06:22 +00002721 return XML_ERROR_NO_MEMORY;
2722 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002723 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002724}
2725
2726static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07002727doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2728 const char *s, const char *end, const char **nextPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07002729 XML_Bool haveMore, enum XML_Account account) {
Fred Drake31d485c2004-08-03 07:06:22 +00002730 /* save one level of indirection */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002731 DTD *const dtd = parser->m_dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002732
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002733 const char **eventPP;
2734 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002735 if (enc == parser->m_encoding) {
2736 eventPP = &parser->m_eventPtr;
2737 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002738 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002739 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2740 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002741 }
2742 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002743
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002744 for (;;) {
2745 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2746 int tok = XmlContentTok(enc, s, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07002747#ifdef XML_DTD
2748 const char *accountAfter
2749 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2750 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2751 : next;
2752 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2753 account)) {
2754 accountingOnAbort(parser);
2755 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2756 }
2757#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002758 *eventEndPP = next;
2759 switch (tok) {
2760 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002761 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002762 *nextPtr = s;
2763 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002764 }
2765 *eventEndPP = end;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002766 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002767 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002768 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002769 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002770 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002771 /* We are at the end of the final buffer, should we check for
2772 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002773 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002774 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002775 return XML_ERROR_NO_ELEMENTS;
Benjamin Peterson4e211002018-06-26 19:25:45 -07002776 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002777 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002778 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002779 return XML_ERROR_NONE;
2780 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002781 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002782 *nextPtr = s;
2783 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002784 }
2785 if (startTagLevel > 0) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07002786 if (parser->m_tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002787 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002788 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002789 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002790 }
2791 return XML_ERROR_NO_ELEMENTS;
2792 case XML_TOK_INVALID:
2793 *eventPP = next;
2794 return XML_ERROR_INVALID_TOKEN;
2795 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002796 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002797 *nextPtr = s;
2798 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002799 }
2800 return XML_ERROR_UNCLOSED_TOKEN;
2801 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002802 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002803 *nextPtr = s;
2804 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002805 }
2806 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07002807 case XML_TOK_ENTITY_REF: {
2808 const XML_Char *name;
2809 ENTITY *entity;
2810 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
2811 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
2812 if (ch) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07002813#ifdef XML_DTD
2814 /* NOTE: We are replacing 4-6 characters original input for 1 character
2815 * so there is no amplification and hence recording without
2816 * protection. */
2817 accountingDiffTolerated(parser, tok, (char *)&ch,
2818 ((char *)&ch) + sizeof(XML_Char), __LINE__,
2819 XML_ACCOUNT_ENTITY_EXPANSION);
2820#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002821 if (parser->m_characterDataHandler)
2822 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002823 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002824 reportDefault(parser, enc, s, next);
2825 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002826 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002827 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
2828 next - enc->minBytesPerChar);
2829 if (! name)
2830 return XML_ERROR_NO_MEMORY;
2831 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
2832 poolDiscard(&dtd->pool);
2833 /* First, determine if a check for an existing declaration is needed;
2834 if yes, check that the entity exists, and that it is internal,
2835 otherwise call the skipped entity or default handler.
2836 */
2837 if (! dtd->hasParamEntityRefs || dtd->standalone) {
2838 if (! entity)
2839 return XML_ERROR_UNDEFINED_ENTITY;
2840 else if (! entity->is_internal)
2841 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2842 } else if (! entity) {
2843 if (parser->m_skippedEntityHandler)
2844 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
2845 else if (parser->m_defaultHandler)
2846 reportDefault(parser, enc, s, next);
2847 break;
2848 }
2849 if (entity->open)
2850 return XML_ERROR_RECURSIVE_ENTITY_REF;
2851 if (entity->notation)
2852 return XML_ERROR_BINARY_ENTITY_REF;
2853 if (entity->textPtr) {
2854 enum XML_Error result;
2855 if (! parser->m_defaultExpandInternalEntities) {
2856 if (parser->m_skippedEntityHandler)
2857 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
2858 0);
2859 else if (parser->m_defaultHandler)
2860 reportDefault(parser, enc, s, next);
2861 break;
2862 }
2863 result = processInternalEntity(parser, entity, XML_FALSE);
2864 if (result != XML_ERROR_NONE)
2865 return result;
2866 } else if (parser->m_externalEntityRefHandler) {
2867 const XML_Char *context;
2868 entity->open = XML_TRUE;
2869 context = getContext(parser);
2870 entity->open = XML_FALSE;
2871 if (! context)
2872 return XML_ERROR_NO_MEMORY;
2873 if (! parser->m_externalEntityRefHandler(
2874 parser->m_externalEntityRefHandlerArg, context, entity->base,
2875 entity->systemId, entity->publicId))
2876 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2877 poolDiscard(&parser->m_tempPool);
2878 } else if (parser->m_defaultHandler)
2879 reportDefault(parser, enc, s, next);
2880 break;
2881 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002882 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002883 /* fall through */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002884 case XML_TOK_START_TAG_WITH_ATTS: {
2885 TAG *tag;
2886 enum XML_Error result;
2887 XML_Char *toPtr;
2888 if (parser->m_freeTagList) {
2889 tag = parser->m_freeTagList;
2890 parser->m_freeTagList = parser->m_freeTagList->parent;
2891 } else {
2892 tag = (TAG *)MALLOC(parser, sizeof(TAG));
2893 if (! tag)
2894 return XML_ERROR_NO_MEMORY;
2895 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
2896 if (! tag->buf) {
2897 FREE(parser, tag);
2898 return XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002899 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002900 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002901 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002902 tag->bindings = NULL;
2903 tag->parent = parser->m_tagStack;
2904 parser->m_tagStack = tag;
2905 tag->name.localPart = NULL;
2906 tag->name.prefix = NULL;
2907 tag->rawName = s + enc->minBytesPerChar;
2908 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2909 ++parser->m_tagLevel;
2910 {
2911 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2912 const char *fromPtr = tag->rawName;
2913 toPtr = (XML_Char *)tag->buf;
2914 for (;;) {
2915 int bufSize;
2916 int convLen;
2917 const enum XML_Convert_Result convert_res
2918 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
2919 (ICHAR *)tag->bufEnd - 1);
2920 convLen = (int)(toPtr - (XML_Char *)tag->buf);
2921 if ((fromPtr >= rawNameEnd)
2922 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
2923 tag->name.strLen = convLen;
2924 break;
2925 }
2926 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
2927 {
2928 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2929 if (temp == NULL)
2930 return XML_ERROR_NO_MEMORY;
2931 tag->buf = temp;
2932 tag->bufEnd = temp + bufSize;
2933 toPtr = (XML_Char *)temp + convLen;
2934 }
2935 }
2936 }
2937 tag->name.str = (XML_Char *)tag->buf;
2938 *toPtr = XML_T('\0');
Miss Islington (bot)27067852021-08-29 07:32:50 -07002939 result
2940 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002941 if (result)
2942 return result;
2943 if (parser->m_startElementHandler)
2944 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
2945 (const XML_Char **)parser->m_atts);
2946 else if (parser->m_defaultHandler)
2947 reportDefault(parser, enc, s, next);
2948 poolClear(&parser->m_tempPool);
2949 break;
2950 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002951 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002952 /* fall through */
Benjamin Peterson52b94082019-09-25 21:33:58 -07002953 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
2954 const char *rawName = s + enc->minBytesPerChar;
2955 enum XML_Error result;
2956 BINDING *bindings = NULL;
2957 XML_Bool noElmHandlers = XML_TRUE;
2958 TAG_NAME name;
2959 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
2960 rawName + XmlNameLength(enc, rawName));
2961 if (! name.str)
2962 return XML_ERROR_NO_MEMORY;
2963 poolFinish(&parser->m_tempPool);
Miss Islington (bot)27067852021-08-29 07:32:50 -07002964 result = storeAtts(parser, enc, s, &name, &bindings,
2965 XML_ACCOUNT_NONE /* token spans whole start tag */);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002966 if (result != XML_ERROR_NONE) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002967 freeBindings(parser, bindings);
Benjamin Peterson52b94082019-09-25 21:33:58 -07002968 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002969 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07002970 poolFinish(&parser->m_tempPool);
2971 if (parser->m_startElementHandler) {
2972 parser->m_startElementHandler(parser->m_handlerArg, name.str,
2973 (const XML_Char **)parser->m_atts);
2974 noElmHandlers = XML_FALSE;
2975 }
2976 if (parser->m_endElementHandler) {
2977 if (parser->m_startElementHandler)
2978 *eventPP = *eventEndPP;
2979 parser->m_endElementHandler(parser->m_handlerArg, name.str);
2980 noElmHandlers = XML_FALSE;
2981 }
2982 if (noElmHandlers && parser->m_defaultHandler)
2983 reportDefault(parser, enc, s, next);
2984 poolClear(&parser->m_tempPool);
2985 freeBindings(parser, bindings);
2986 }
2987 if ((parser->m_tagLevel == 0)
2988 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
Benjamin Peterson5033aa72018-09-10 21:04:00 -07002989 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
2990 parser->m_processor = epilogProcessor;
2991 else
2992 return epilogProcessor(parser, next, end, nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07002993 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002994 break;
2995 case XML_TOK_END_TAG:
Benjamin Peterson4e211002018-06-26 19:25:45 -07002996 if (parser->m_tagLevel == startTagLevel)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002997 return XML_ERROR_ASYNC_ENTITY;
2998 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002999 int len;
3000 const char *rawName;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003001 TAG *tag = parser->m_tagStack;
3002 parser->m_tagStack = tag->parent;
3003 tag->parent = parser->m_freeTagList;
3004 parser->m_freeTagList = tag;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003005 rawName = s + enc->minBytesPerChar * 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003006 len = XmlNameLength(enc, rawName);
3007 if (len != tag->rawNameLength
3008 || memcmp(tag->rawName, rawName, len) != 0) {
3009 *eventPP = rawName;
3010 return XML_ERROR_TAG_MISMATCH;
3011 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003012 --parser->m_tagLevel;
3013 if (parser->m_endElementHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003014 const XML_Char *localPart;
3015 const XML_Char *prefix;
3016 XML_Char *uri;
3017 localPart = tag->name.localPart;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003018 if (parser->m_ns && localPart) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003019 /* localPart and prefix may have been overwritten in
3020 tag->name.str, since this points to the binding->uri
3021 buffer which gets re-used; so we have to add them again
3022 */
3023 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3024 /* don't need to check for space - already done in storeAtts() */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003025 while (*localPart)
3026 *uri++ = *localPart++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003027 prefix = (XML_Char *)tag->name.prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003028 if (parser->m_ns_triplets && prefix) {
3029 *uri++ = parser->m_namespaceSeparator;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003030 while (*prefix)
3031 *uri++ = *prefix++;
3032 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003033 *uri = XML_T('\0');
3034 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003035 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003036 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003037 reportDefault(parser, enc, s, next);
3038 while (tag->bindings) {
3039 BINDING *b = tag->bindings;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003040 if (parser->m_endNamespaceDeclHandler)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003041 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3042 b->prefix->name);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003043 tag->bindings = tag->bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003044 b->nextTagBinding = parser->m_freeBindingList;
3045 parser->m_freeBindingList = b;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003046 b->prefix->binding = b->prevPrefixBinding;
3047 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003048 if ((parser->m_tagLevel == 0)
3049 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3050 if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
3051 parser->m_processor = epilogProcessor;
3052 else
3053 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003054 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003055 }
3056 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003057 case XML_TOK_CHAR_REF: {
3058 int n = XmlCharRefNumber(enc, s);
3059 if (n < 0)
3060 return XML_ERROR_BAD_CHAR_REF;
3061 if (parser->m_characterDataHandler) {
3062 XML_Char buf[XML_ENCODE_MAX];
3063 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3064 XmlEncode(n, (ICHAR *)buf));
3065 } else if (parser->m_defaultHandler)
3066 reportDefault(parser, enc, s, next);
3067 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003068 case XML_TOK_XML_DECL:
3069 return XML_ERROR_MISPLACED_XML_PI;
3070 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003071 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003072 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003073 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003074 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003075 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003076 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003077 case XML_TOK_CDATA_SECT_OPEN: {
3078 enum XML_Error result;
3079 if (parser->m_startCdataSectionHandler)
3080 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3081 /* BEGIN disabled code */
3082 /* Suppose you doing a transformation on a document that involves
3083 changing only the character data. You set up a defaultHandler
3084 and a characterDataHandler. The defaultHandler simply copies
3085 characters through. The characterDataHandler does the
3086 transformation and writes the characters out escaping them as
3087 necessary. This case will fail to work if we leave out the
3088 following two lines (because & and < inside CDATA sections will
3089 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003090
Benjamin Peterson52b94082019-09-25 21:33:58 -07003091 However, now we have a start/endCdataSectionHandler, so it seems
3092 easier to let the user deal with this.
3093 */
3094 else if (0 && parser->m_characterDataHandler)
3095 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3096 0);
3097 /* END disabled code */
3098 else if (parser->m_defaultHandler)
3099 reportDefault(parser, enc, s, next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07003100 result
3101 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003102 if (result != XML_ERROR_NONE)
3103 return result;
3104 else if (! next) {
3105 parser->m_processor = cdataSectionProcessor;
3106 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003107 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003108 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003109 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00003110 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003111 *nextPtr = s;
3112 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003113 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003114 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003115 if (MUST_CONVERT(enc, s)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003116 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3117 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003118 parser->m_characterDataHandler(
3119 parser->m_handlerArg, parser->m_dataBuf,
3120 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3121 } else
3122 parser->m_characterDataHandler(
3123 parser->m_handlerArg, (XML_Char *)s,
3124 (int)((XML_Char *)end - (XML_Char *)s));
3125 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003126 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003127 /* We are at the end of the final buffer, should we check for
3128 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00003129 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003130 if (startTagLevel == 0) {
3131 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003132 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003133 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003134 if (parser->m_tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003135 *eventPP = end;
3136 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003137 }
Fred Drake31d485c2004-08-03 07:06:22 +00003138 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003139 return XML_ERROR_NONE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003140 case XML_TOK_DATA_CHARS: {
3141 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3142 if (charDataHandler) {
3143 if (MUST_CONVERT(enc, s)) {
3144 for (;;) {
3145 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3146 const enum XML_Convert_Result convert_res = XmlConvert(
3147 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3148 *eventEndPP = s;
3149 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3150 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3151 if ((convert_res == XML_CONVERT_COMPLETED)
3152 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3153 break;
3154 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003155 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003156 } else
3157 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3158 (int)((XML_Char *)next - (XML_Char *)s));
3159 } else if (parser->m_defaultHandler)
3160 reportDefault(parser, enc, s, next);
3161 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003162 case XML_TOK_PI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07003163 if (! reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003164 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003165 break;
3166 case XML_TOK_COMMENT:
Benjamin Peterson52b94082019-09-25 21:33:58 -07003167 if (! reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003168 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003169 break;
3170 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003171 /* All of the tokens produced by XmlContentTok() have their own
3172 * explicit cases, so this default is not strictly necessary.
3173 * However it is a useful safety net, so we retain the code and
3174 * simply exclude it from the coverage tests.
3175 *
3176 * LCOV_EXCL_START
3177 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003178 if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003179 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003180 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003181 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003182 }
3183 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003184 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003185 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003186 *nextPtr = next;
3187 return XML_ERROR_NONE;
3188 case XML_FINISHED:
3189 return XML_ERROR_ABORTED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003190 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00003191 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003192 }
3193 /* not reached */
3194}
3195
Victor Stinner5ff71322017-06-21 14:39:22 +02003196/* This function does not call free() on the allocated memory, merely
Benjamin Peterson4e211002018-06-26 19:25:45 -07003197 * moving it to the parser's m_freeBindingList where it can be freed or
Victor Stinner5ff71322017-06-21 14:39:22 +02003198 * reused as appropriate.
3199 */
3200static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07003201freeBindings(XML_Parser parser, BINDING *bindings) {
Victor Stinner5ff71322017-06-21 14:39:22 +02003202 while (bindings) {
3203 BINDING *b = bindings;
3204
Benjamin Peterson4e211002018-06-26 19:25:45 -07003205 /* m_startNamespaceDeclHandler will have been called for this
Victor Stinner5ff71322017-06-21 14:39:22 +02003206 * binding in addBindings(), so call the end handler now.
3207 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003208 if (parser->m_endNamespaceDeclHandler)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003209 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
Victor Stinner5ff71322017-06-21 14:39:22 +02003210
3211 bindings = bindings->nextTagBinding;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003212 b->nextTagBinding = parser->m_freeBindingList;
3213 parser->m_freeBindingList = b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003214 b->prefix->binding = b->prevPrefixBinding;
3215 }
3216}
3217
Fred Drake4faea012003-01-28 06:42:40 +00003218/* Precondition: all arguments must be non-NULL;
3219 Purpose:
3220 - normalize attributes
3221 - check attributes for well-formedness
3222 - generate namespace aware attribute names (URI, prefix)
3223 - build list of attributes for startElementHandler
3224 - default attributes
3225 - process namespace declarations (check and report them)
3226 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003227*/
3228static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003229storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07003230 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3231 enum XML_Account account) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003232 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003233 ELEMENT_TYPE *elementType;
3234 int nDefaultAtts;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003235 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003236 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003237 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003238 int i;
3239 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003240 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003241 int nPrefixes = 0;
3242 BINDING *binding;
3243 const XML_Char *localPart;
3244
3245 /* lookup the element type name */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003246 elementType
3247 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3248 if (! elementType) {
Fred Drake4faea012003-01-28 06:42:40 +00003249 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003250 if (! name)
Fred Drake4faea012003-01-28 06:42:40 +00003251 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003252 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003253 sizeof(ELEMENT_TYPE));
Benjamin Peterson52b94082019-09-25 21:33:58 -07003254 if (! elementType)
Fred Drake4faea012003-01-28 06:42:40 +00003255 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003256 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
Fred Drake4faea012003-01-28 06:42:40 +00003257 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003258 }
Fred Drake4faea012003-01-28 06:42:40 +00003259 nDefaultAtts = elementType->nDefaultAtts;
3260
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003261 /* get the attributes from the tokenizer */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003262 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3263 if (n + nDefaultAtts > parser->m_attsSize) {
3264 int oldAttsSize = parser->m_attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003265 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003266#ifdef XML_ATTR_INFO
3267 XML_AttrInfo *temp2;
3268#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07003269 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003270 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3271 parser->m_attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003272 if (temp == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003273 parser->m_attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003274 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003275 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003276 parser->m_atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003277#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07003278 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3279 parser->m_attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003280 if (temp2 == NULL) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003281 parser->m_attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003282 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003283 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003284 parser->m_attInfo = temp2;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003285#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003286 if (n > oldAttsSize)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003287 XmlGetAttributes(enc, attStr, n, parser->m_atts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003288 }
Fred Drake4faea012003-01-28 06:42:40 +00003289
Benjamin Peterson4e211002018-06-26 19:25:45 -07003290 appAtts = (const XML_Char **)parser->m_atts;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003291 for (i = 0; i < n; i++) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003292 ATTRIBUTE *currAtt = &parser->m_atts[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003293#ifdef XML_ATTR_INFO
Benjamin Peterson4e211002018-06-26 19:25:45 -07003294 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003295#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003296 /* add the name and value to the attribute list */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003297 ATTRIBUTE_ID *attId
3298 = getAttributeId(parser, enc, currAtt->name,
3299 currAtt->name + XmlNameLength(enc, currAtt->name));
3300 if (! attId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003301 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003302#ifdef XML_ATTR_INFO
Benjamin Peterson52b94082019-09-25 21:33:58 -07003303 currAttInfo->nameStart
3304 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3305 currAttInfo->nameEnd
3306 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3307 currAttInfo->valueStart = parser->m_parseEndByteIndex
3308 - (parser->m_parseEndPtr - currAtt->valuePtr);
3309 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3310 - (parser->m_parseEndPtr - currAtt->valueEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003311#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003312 /* Detect duplicate attributes by their QNames. This does not work when
3313 namespace processing is turned on and different prefixes for the same
3314 namespace are used. For this case we have a check further down.
3315 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003316 if ((attId->name)[-1]) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003317 if (enc == parser->m_encoding)
3318 parser->m_eventPtr = parser->m_atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003319 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3320 }
3321 (attId->name)[-1] = 1;
3322 appAtts[attIndex++] = attId->name;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003323 if (! parser->m_atts[i].normalized) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003324 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003325 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003326
3327 /* figure out whether declared as other than CDATA */
3328 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003329 int j;
3330 for (j = 0; j < nDefaultAtts; j++) {
3331 if (attId == elementType->defaultAtts[j].id) {
3332 isCdata = elementType->defaultAtts[j].isCdata;
3333 break;
3334 }
3335 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003336 }
3337
3338 /* normalize the attribute value */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003339 result = storeAttributeValue(
3340 parser, enc, isCdata, parser->m_atts[i].valuePtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07003341 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003342 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003343 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003344 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3345 poolFinish(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003346 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003347 /* the value did not need normalizing */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003348 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3349 parser->m_atts[i].valuePtr,
Benjamin Peterson4e211002018-06-26 19:25:45 -07003350 parser->m_atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003351 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003352 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003353 poolFinish(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003354 }
3355 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003356 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003357 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003358 /* deal with namespace declarations here */
3359 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3360 appAtts[attIndex], bindingsPtr);
3361 if (result)
3362 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003363 --attIndex;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003364 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003365 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003366 attIndex++;
3367 nPrefixes++;
3368 (attId->name)[-1] = 2;
3369 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003370 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003371 attIndex++;
3372 }
Fred Drake4faea012003-01-28 06:42:40 +00003373
3374 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003375 parser->m_nSpecifiedAtts = attIndex;
Fred Drake4faea012003-01-28 06:42:40 +00003376 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3377 for (i = 0; i < attIndex; i += 2)
3378 if (appAtts[i] == elementType->idAtt->name) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003379 parser->m_idAttIndex = i;
Fred Drake4faea012003-01-28 06:42:40 +00003380 break;
3381 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003382 } else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003383 parser->m_idAttIndex = -1;
Fred Drake4faea012003-01-28 06:42:40 +00003384
3385 /* do attribute defaulting */
3386 for (i = 0; i < nDefaultAtts; i++) {
3387 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003388 if (! (da->id->name)[-1] && da->value) {
Fred Drake4faea012003-01-28 06:42:40 +00003389 if (da->id->prefix) {
3390 if (da->id->xmlns) {
3391 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3392 da->value, bindingsPtr);
3393 if (result)
3394 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003395 } else {
Fred Drake4faea012003-01-28 06:42:40 +00003396 (da->id->name)[-1] = 2;
3397 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003398 appAtts[attIndex++] = da->id->name;
3399 appAtts[attIndex++] = da->value;
3400 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003401 } else {
Fred Drake4faea012003-01-28 06:42:40 +00003402 (da->id->name)[-1] = 1;
3403 appAtts[attIndex++] = da->id->name;
3404 appAtts[attIndex++] = da->value;
3405 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003406 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003407 }
Fred Drake4faea012003-01-28 06:42:40 +00003408 appAtts[attIndex] = 0;
3409
Fred Drake08317ae2003-10-21 15:38:55 +00003410 /* expand prefixed attribute names, check for duplicates,
3411 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003412 i = 0;
3413 if (nPrefixes) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003414 int j; /* hash table index */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003415 unsigned long version = parser->m_nsAttsVersion;
3416 int nsAttsSize = (int)1 << parser->m_nsAttsPower;
3417 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003418 /* size of hash table must be at least 2 * (# of prefixed attributes) */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003419 if ((nPrefixes << 1)
3420 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
Fred Drake08317ae2003-10-21 15:38:55 +00003421 NS_ATT *temp;
3422 /* hash table size must also be a power of 2 and >= 8 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003423 while (nPrefixes >> parser->m_nsAttsPower++)
3424 ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003425 if (parser->m_nsAttsPower < 3)
3426 parser->m_nsAttsPower = 3;
3427 nsAttsSize = (int)1 << parser->m_nsAttsPower;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003428 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3429 nsAttsSize * sizeof(NS_ATT));
3430 if (! temp) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003431 /* Restore actual size of memory in m_nsAtts */
3432 parser->m_nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003433 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003434 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003435 parser->m_nsAtts = temp;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003436 version = 0; /* force re-initialization of m_nsAtts hash table */
Fred Drake08317ae2003-10-21 15:38:55 +00003437 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003438 /* using a version flag saves us from initializing m_nsAtts every time */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003439 if (! version) { /* initialize version flags when version wraps around */
Fred Drake08317ae2003-10-21 15:38:55 +00003440 version = INIT_ATTS_VERSION;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003441 for (j = nsAttsSize; j != 0;)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003442 parser->m_nsAtts[--j].version = version;
Fred Drake08317ae2003-10-21 15:38:55 +00003443 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003444 parser->m_nsAttsVersion = --version;
Fred Drake08317ae2003-10-21 15:38:55 +00003445
3446 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003447 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003448 const XML_Char *s = appAtts[i];
Benjamin Peterson52b94082019-09-25 21:33:58 -07003449 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003450 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003451 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003452 unsigned long uriHash;
3453 struct siphash sip_state;
3454 struct sipkey sip_key;
3455
3456 copy_salt_to_sipkey(parser, &sip_key);
3457 sip24_init(&sip_state, &sip_key);
3458
Benjamin Peterson52b94082019-09-25 21:33:58 -07003459 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003460 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003461 if (! id || ! id->prefix) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003462 /* This code is walking through the appAtts array, dealing
3463 * with (in this case) a prefixed attribute name. To be in
3464 * the array, the attribute must have already been bound, so
3465 * has to have passed through the hash table lookup once
3466 * already. That implies that an entry for it already
3467 * exists, so the lookup above will return a pointer to
Benjamin Peterson52b94082019-09-25 21:33:58 -07003468 * already allocated memory. There is no opportunaity for
Victor Stinner93d0cb52017-08-18 23:43:54 +02003469 * the allocator to fail, so the condition above cannot be
3470 * fulfilled.
3471 *
3472 * Since it is difficult to be certain that the above
3473 * analysis is complete, we retain the test and merely
3474 * remove the code from coverage tests.
3475 */
3476 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3477 }
Fred Drake08317ae2003-10-21 15:38:55 +00003478 b = id->prefix->binding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003479 if (! b)
Fred Drake08317ae2003-10-21 15:38:55 +00003480 return XML_ERROR_UNBOUND_PREFIX;
3481
Fred Drake08317ae2003-10-21 15:38:55 +00003482 for (j = 0; j < b->uriLen; j++) {
3483 const XML_Char c = b->uri[j];
Benjamin Peterson52b94082019-09-25 21:33:58 -07003484 if (! poolAppendChar(&parser->m_tempPool, c))
Fred Drake08317ae2003-10-21 15:38:55 +00003485 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003486 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003487
3488 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3489
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003490 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003491 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003492
3493 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3494
Benjamin Peterson52b94082019-09-25 21:33:58 -07003495 do { /* copies null terminator */
3496 if (! poolAppendChar(&parser->m_tempPool, *s))
Fred Drake08317ae2003-10-21 15:38:55 +00003497 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003498 } while (*s++);
3499
Victor Stinner5ff71322017-06-21 14:39:22 +02003500 uriHash = (unsigned long)sip24_final(&sip_state);
3501
Fred Drake08317ae2003-10-21 15:38:55 +00003502 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003503 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003504 */
3505 unsigned char step = 0;
3506 unsigned long mask = nsAttsSize - 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003507 j = uriHash & mask; /* index into hash table */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003508 while (parser->m_nsAtts[j].version == version) {
Fred Drake08317ae2003-10-21 15:38:55 +00003509 /* for speed we compare stored hash values first */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003510 if (uriHash == parser->m_nsAtts[j].hash) {
3511 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3512 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
Fred Drake08317ae2003-10-21 15:38:55 +00003513 /* s1 is null terminated, but not s2 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003514 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3515 ;
Fred Drake08317ae2003-10-21 15:38:55 +00003516 if (*s1 == 0)
3517 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3518 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003519 if (! step)
Benjamin Peterson4e211002018-06-26 19:25:45 -07003520 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003521 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003522 }
Fred Drake08317ae2003-10-21 15:38:55 +00003523 }
3524
Benjamin Peterson52b94082019-09-25 21:33:58 -07003525 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003526 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
Fred Drake08317ae2003-10-21 15:38:55 +00003527 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003528 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003529 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003530 return XML_ERROR_NO_MEMORY;
3531 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003532 }
Fred Drake08317ae2003-10-21 15:38:55 +00003533
3534 /* store expanded name in attribute list */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003535 s = poolStart(&parser->m_tempPool);
3536 poolFinish(&parser->m_tempPool);
Fred Drake08317ae2003-10-21 15:38:55 +00003537 appAtts[i] = s;
3538
3539 /* fill empty slot with new version, uriName and hash value */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003540 parser->m_nsAtts[j].version = version;
3541 parser->m_nsAtts[j].hash = uriHash;
3542 parser->m_nsAtts[j].uriName = s;
Fred Drake08317ae2003-10-21 15:38:55 +00003543
Benjamin Peterson52b94082019-09-25 21:33:58 -07003544 if (! --nPrefixes) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003545 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003546 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003547 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003548 } else /* not prefixed */
3549 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003550 }
3551 }
Fred Drake08317ae2003-10-21 15:38:55 +00003552 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003553 for (; i < attIndex; i += 2)
3554 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003555 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3556 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003557
Benjamin Peterson52b94082019-09-25 21:33:58 -07003558 if (! parser->m_ns)
Fred Drake08317ae2003-10-21 15:38:55 +00003559 return XML_ERROR_NONE;
3560
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003561 /* expand the element type name */
3562 if (elementType->prefix) {
3563 binding = elementType->prefix->binding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003564 if (! binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003565 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003566 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003567 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003568 ;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003569 } else if (dtd->defaultPrefix.binding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003570 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003571 localPart = tagNamePtr->str;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003572 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003573 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003574 prefixLen = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003575 if (parser->m_ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003576 for (; binding->prefix->name[prefixLen++];)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003577 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003578 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003579 tagNamePtr->localPart = localPart;
3580 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003581 tagNamePtr->prefix = binding->prefix->name;
3582 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003583 for (i = 0; localPart[i++];)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003584 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003585 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003586 if (n > binding->uriAlloc) {
3587 TAG *p;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003588 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
Benjamin Peterson52b94082019-09-25 21:33:58 -07003589 if (! uri)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003590 return XML_ERROR_NO_MEMORY;
3591 binding->uriAlloc = n + EXPAND_SPARE;
3592 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003593 for (p = parser->m_tagStack; p; p = p->parent)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003594 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003595 p->name.str = uri;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003596 FREE(parser, binding->uri);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003597 binding->uri = uri;
3598 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003599 /* if m_namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003600 uri = binding->uri + binding->uriLen;
3601 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003602 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003603 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003604 uri += i - 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003605 *uri = parser->m_namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003606 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3607 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003608 tagNamePtr->str = binding->uri;
3609 return XML_ERROR_NONE;
3610}
3611
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003612/* addBinding() overwrites the value of prefix->binding without checking.
3613 Therefore one must keep track of the old value outside of addBinding().
3614*/
3615static enum XML_Error
3616addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
Benjamin Peterson52b94082019-09-25 21:33:58 -07003617 const XML_Char *uri, BINDING **bindingsPtr) {
3618 static const XML_Char xmlNamespace[]
3619 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
3620 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
3621 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
3622 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
3623 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
3624 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
3625 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3626 ASCII_e, '\0'};
3627 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
3628 static const XML_Char xmlnsNamespace[]
3629 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
3630 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
3631 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
3632 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
3633 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
3634 static const int xmlnsLen
3635 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003636
3637 XML_Bool mustBeXML = XML_FALSE;
3638 XML_Bool isXML = XML_TRUE;
3639 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003640
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003641 BINDING *b;
3642 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003643
Fred Drake31d485c2004-08-03 07:06:22 +00003644 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003645 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003646 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003647
Benjamin Peterson52b94082019-09-25 21:33:58 -07003648 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003649 && prefix->name[1] == XML_T(ASCII_m)
3650 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003651 /* Not allowed to bind xmlns */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003652 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003653 && prefix->name[5] == XML_T('\0'))
3654 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3655
3656 if (prefix->name[3] == XML_T('\0'))
3657 mustBeXML = XML_TRUE;
3658 }
3659
3660 for (len = 0; uri[len]; len++) {
3661 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3662 isXML = XML_FALSE;
3663
Benjamin Peterson52b94082019-09-25 21:33:58 -07003664 if (! mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003665 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3666 isXMLNS = XML_FALSE;
3667 }
3668 isXML = isXML && len == xmlLen;
3669 isXMLNS = isXMLNS && len == xmlnsLen;
3670
3671 if (mustBeXML != isXML)
3672 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3673 : XML_ERROR_RESERVED_NAMESPACE_URI;
3674
3675 if (isXMLNS)
3676 return XML_ERROR_RESERVED_NAMESPACE_URI;
3677
Benjamin Peterson4e211002018-06-26 19:25:45 -07003678 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003679 len++;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003680 if (parser->m_freeBindingList) {
3681 b = parser->m_freeBindingList;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003682 if (len > b->uriAlloc) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003683 XML_Char *temp = (XML_Char *)REALLOC(
3684 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003685 if (temp == NULL)
3686 return XML_ERROR_NO_MEMORY;
3687 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003688 b->uriAlloc = len + EXPAND_SPARE;
3689 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003690 parser->m_freeBindingList = b->nextTagBinding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003691 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003692 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
Benjamin Peterson52b94082019-09-25 21:33:58 -07003693 if (! b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003694 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003695 b->uri
3696 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
3697 if (! b->uri) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003698 FREE(parser, b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003699 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003700 }
3701 b->uriAlloc = len + EXPAND_SPARE;
3702 }
3703 b->uriLen = len;
3704 memcpy(b->uri, uri, len * sizeof(XML_Char));
Benjamin Peterson4e211002018-06-26 19:25:45 -07003705 if (parser->m_namespaceSeparator)
3706 b->uri[len - 1] = parser->m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003707 b->prefix = prefix;
3708 b->attId = attId;
3709 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003710 /* NULL binding when default namespace undeclared */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003711 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003712 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003713 else
3714 prefix->binding = b;
3715 b->nextTagBinding = *bindingsPtr;
3716 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003717 /* if attId == NULL then we are not starting a namespace scope */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003718 if (attId && parser->m_startNamespaceDeclHandler)
3719 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
Benjamin Peterson52b94082019-09-25 21:33:58 -07003720 prefix->binding ? uri : 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003721 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003722}
3723
3724/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003725 the whole file is parsed with one call.
3726*/
3727static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003728cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
3729 const char **endPtr) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07003730 enum XML_Error result = doCdataSection(
3731 parser, parser->m_encoding, &start, end, endPtr,
3732 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
Fred Drake31d485c2004-08-03 07:06:22 +00003733 if (result != XML_ERROR_NONE)
3734 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003735 if (start) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07003736 if (parser->m_parentParser) { /* we are parsing an external entity */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003737 parser->m_processor = externalEntityContentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003738 return externalEntityContentProcessor(parser, start, end, endPtr);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003739 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003740 parser->m_processor = contentProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003741 return contentProcessor(parser, start, end, endPtr);
3742 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003743 }
3744 return result;
3745}
3746
Fred Drake31d485c2004-08-03 07:06:22 +00003747/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003748 the section is not yet closed.
3749*/
3750static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003751doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07003752 const char *end, const char **nextPtr, XML_Bool haveMore,
3753 enum XML_Account account) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003754 const char *s = *startPtr;
3755 const char **eventPP;
3756 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003757 if (enc == parser->m_encoding) {
3758 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003759 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003760 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003761 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003762 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3763 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003764 }
3765 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003766 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003767
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003768 for (;;) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07003769 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003770 int tok = XmlCdataSectionTok(enc, s, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07003771#ifdef XML_DTD
3772 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
3773 accountingOnAbort(parser);
3774 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3775 }
3776#else
3777 UNUSED_P(account);
3778#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003779 *eventEndPP = next;
3780 switch (tok) {
3781 case XML_TOK_CDATA_SECT_CLOSE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003782 if (parser->m_endCdataSectionHandler)
3783 parser->m_endCdataSectionHandler(parser->m_handlerArg);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003784 /* BEGIN disabled code */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003785 /* see comment under XML_TOK_CDATA_SECT_OPEN */
Benjamin Peterson3b03b092019-06-27 20:54:44 -07003786 else if (0 && parser->m_characterDataHandler)
Benjamin Peterson52b94082019-09-25 21:33:58 -07003787 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3788 0);
3789 /* END disabled code */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003790 else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003791 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003792 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003793 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003794 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003795 return XML_ERROR_ABORTED;
3796 else
3797 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003798 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003799 if (parser->m_characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003800 XML_Char c = 0xA;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003801 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07003802 } else if (parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003803 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003804 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003805 case XML_TOK_DATA_CHARS: {
3806 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3807 if (charDataHandler) {
3808 if (MUST_CONVERT(enc, s)) {
3809 for (;;) {
3810 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3811 const enum XML_Convert_Result convert_res = XmlConvert(
3812 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3813 *eventEndPP = next;
3814 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3815 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3816 if ((convert_res == XML_CONVERT_COMPLETED)
3817 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3818 break;
3819 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003820 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07003821 } else
3822 charDataHandler(parser->m_handlerArg, (XML_Char *)s,
3823 (int)((XML_Char *)next - (XML_Char *)s));
3824 } else if (parser->m_defaultHandler)
3825 reportDefault(parser, enc, s, next);
3826 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003827 case XML_TOK_INVALID:
3828 *eventPP = next;
3829 return XML_ERROR_INVALID_TOKEN;
3830 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003831 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003832 *nextPtr = s;
3833 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003834 }
3835 return XML_ERROR_PARTIAL_CHAR;
3836 case XML_TOK_PARTIAL:
3837 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003838 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003839 *nextPtr = s;
3840 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003841 }
3842 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3843 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003844 /* Every token returned by XmlCdataSectionTok() has its own
3845 * explicit case, so this default case will never be executed.
3846 * We retain it as a safety net and exclude it from the coverage
3847 * statistics.
3848 *
3849 * LCOV_EXCL_START
Benjamin Peterson52b94082019-09-25 21:33:58 -07003850 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003851 *eventPP = next;
3852 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003853 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003854 }
Fred Drake31d485c2004-08-03 07:06:22 +00003855
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003856 *eventPP = s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003857 switch (parser->m_parsingStatus.parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003858 case XML_SUSPENDED:
3859 *nextPtr = next;
3860 return XML_ERROR_NONE;
3861 case XML_FINISHED:
3862 return XML_ERROR_ABORTED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003863 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00003864 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003865 }
3866 /* not reached */
3867}
3868
3869#ifdef XML_DTD
3870
3871/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003872 the whole file is parsed with one call.
3873*/
3874static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07003875ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
3876 const char **endPtr) {
3877 enum XML_Error result
3878 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
3879 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003880 if (result != XML_ERROR_NONE)
3881 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003882 if (start) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07003883 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003884 return prologProcessor(parser, start, end, endPtr);
3885 }
3886 return result;
3887}
3888
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003889/* startPtr gets set to non-null is the section is closed, and to null
3890 if the section is not yet closed.
3891*/
3892static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003893doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
3894 const char *end, const char **nextPtr, XML_Bool haveMore) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07003895 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003896 int tok;
3897 const char *s = *startPtr;
3898 const char **eventPP;
3899 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003900 if (enc == parser->m_encoding) {
3901 eventPP = &parser->m_eventPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003902 *eventPP = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003903 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07003904 } else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003905 /* It's not entirely clear, but it seems the following two lines
3906 * of code cannot be executed. The only occasions on which 'enc'
Benjamin Peterson4e211002018-06-26 19:25:45 -07003907 * is not 'encoding' are when this function is called
Victor Stinner93d0cb52017-08-18 23:43:54 +02003908 * from the internal entity processing, and IGNORE sections are an
3909 * error in internal entities.
3910 *
3911 * Since it really isn't clear that this is true, we keep the code
3912 * and just remove it from our coverage tests.
3913 *
3914 * LCOV_EXCL_START
3915 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07003916 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
3917 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003918 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003919 }
3920 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003921 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003922 tok = XmlIgnoreSectionTok(enc, s, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07003923# ifdef XML_DTD
3924 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
3925 XML_ACCOUNT_DIRECT)) {
3926 accountingOnAbort(parser);
3927 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
3928 }
3929# endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003930 *eventEndPP = next;
3931 switch (tok) {
3932 case XML_TOK_IGNORE_SECT:
Benjamin Peterson4e211002018-06-26 19:25:45 -07003933 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003934 reportDefault(parser, enc, s, next);
3935 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003936 *nextPtr = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003937 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003938 return XML_ERROR_ABORTED;
3939 else
3940 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003941 case XML_TOK_INVALID:
3942 *eventPP = next;
3943 return XML_ERROR_INVALID_TOKEN;
3944 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003945 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003946 *nextPtr = s;
3947 return XML_ERROR_NONE;
3948 }
3949 return XML_ERROR_PARTIAL_CHAR;
3950 case XML_TOK_PARTIAL:
3951 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003952 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003953 *nextPtr = s;
3954 return XML_ERROR_NONE;
3955 }
3956 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3957 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003958 /* All of the tokens that XmlIgnoreSectionTok() returns have
3959 * explicit cases to handle them, so this default case is never
3960 * executed. We keep it as a safety net anyway, and remove it
3961 * from our test coverage statistics.
3962 *
3963 * LCOV_EXCL_START
3964 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003965 *eventPP = next;
3966 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003967 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003968 }
3969 /* not reached */
3970}
3971
3972#endif /* XML_DTD */
3973
3974static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07003975initializeEncoding(XML_Parser parser) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003976 const char *s;
3977#ifdef XML_UNICODE
3978 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003979 /* See comments abount `protoclEncodingName` in parserInit() */
Benjamin Peterson52b94082019-09-25 21:33:58 -07003980 if (! parser->m_protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003981 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003982 else {
3983 int i;
Benjamin Peterson4e211002018-06-26 19:25:45 -07003984 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003985 if (i == sizeof(encodingBuf) - 1
Benjamin Peterson4e211002018-06-26 19:25:45 -07003986 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003987 encodingBuf[0] = '\0';
3988 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003989 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07003990 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003991 }
3992 encodingBuf[i] = '\0';
3993 s = encodingBuf;
3994 }
3995#else
Benjamin Peterson4e211002018-06-26 19:25:45 -07003996 s = parser->m_protocolEncodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003997#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -07003998 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
3999 &parser->m_initEncoding, &parser->m_encoding, s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004000 return XML_ERROR_NONE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004001 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004002}
4003
4004static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07004005processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4006 const char *next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004007 const char *encodingName = NULL;
4008 const XML_Char *storedEncName = NULL;
4009 const ENCODING *newEncoding = NULL;
4010 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004011 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004012 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004013 int standalone = -1;
Miss Islington (bot)27067852021-08-29 07:32:50 -07004014
4015#ifdef XML_DTD
4016 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4017 XML_ACCOUNT_DIRECT)) {
4018 accountingOnAbort(parser);
4019 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4020 }
4021#endif
4022
Benjamin Peterson52b94082019-09-25 21:33:58 -07004023 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4024 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4025 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
Fred Drake31d485c2004-08-03 07:06:22 +00004026 if (isGeneralTextEntity)
4027 return XML_ERROR_TEXT_DECL;
4028 else
4029 return XML_ERROR_XML_DECL;
4030 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004031 if (! isGeneralTextEntity && standalone == 1) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004032 parser->m_dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004033#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004034 if (parser->m_paramEntityParsing
4035 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004036 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004037#endif /* XML_DTD */
4038 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004039 if (parser->m_xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004040 if (encodingName != NULL) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004041 storedEncName = poolStoreString(
4042 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4043 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4044 if (! storedEncName)
4045 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004046 poolFinish(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004047 }
4048 if (version) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004049 storedversion
4050 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4051 versionend - parser->m_encoding->minBytesPerChar);
4052 if (! storedversion)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004053 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004054 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004055 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4056 standalone);
4057 } else if (parser->m_defaultHandler)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004058 reportDefault(parser, parser->m_encoding, s, next);
4059 if (parser->m_protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004060 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004061 /* Check that the specified encoding does not conflict with what
4062 * the parser has already deduced. Do we have the same number
4063 * of bytes in the smallest representation of a character? If
4064 * this is UTF-16, is it the same endianness?
4065 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004066 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
Benjamin Peterson52b94082019-09-25 21:33:58 -07004067 || (newEncoding->minBytesPerChar == 2
4068 && newEncoding != parser->m_encoding)) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004069 parser->m_eventPtr = encodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004070 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004071 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004072 parser->m_encoding = newEncoding;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004073 } else if (encodingName) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004074 enum XML_Error result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004075 if (! storedEncName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004076 storedEncName = poolStoreString(
Benjamin Peterson52b94082019-09-25 21:33:58 -07004077 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4078 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4079 if (! storedEncName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004080 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004081 }
4082 result = handleUnknownEncoding(parser, storedEncName);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004083 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004084 if (result == XML_ERROR_UNKNOWN_ENCODING)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004085 parser->m_eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004086 return result;
4087 }
4088 }
4089
4090 if (storedEncName || storedversion)
Benjamin Peterson4e211002018-06-26 19:25:45 -07004091 poolClear(&parser->m_temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004092
4093 return XML_ERROR_NONE;
4094}
4095
4096static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07004097handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004098 if (parser->m_unknownEncodingHandler) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004099 XML_Encoding info;
4100 int i;
4101 for (i = 0; i < 256; i++)
4102 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004103 info.convert = NULL;
4104 info.data = NULL;
4105 info.release = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004106 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4107 encodingName, &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004108 ENCODING *enc;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004109 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
Benjamin Peterson52b94082019-09-25 21:33:58 -07004110 if (! parser->m_unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004111 if (info.release)
4112 info.release(info.data);
4113 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004114 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004115 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4116 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004117 if (enc) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004118 parser->m_unknownEncodingData = info.data;
4119 parser->m_unknownEncodingRelease = info.release;
4120 parser->m_encoding = enc;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004121 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004122 }
4123 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004124 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004125 info.release(info.data);
4126 }
4127 return XML_ERROR_UNKNOWN_ENCODING;
4128}
4129
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004130static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004131prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4132 const char **nextPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004133 enum XML_Error result = initializeEncoding(parser);
4134 if (result != XML_ERROR_NONE)
4135 return result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004136 parser->m_processor = prologProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004137 return prologProcessor(parser, s, end, nextPtr);
4138}
4139
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004140#ifdef XML_DTD
4141
4142static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004143externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4144 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004145 enum XML_Error result = initializeEncoding(parser);
4146 if (result != XML_ERROR_NONE)
4147 return result;
4148
4149 /* we know now that XML_Parse(Buffer) has been called,
4150 so we consider the external parameter entity read */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004151 parser->m_dtd->paramEntityRead = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004152
Benjamin Peterson4e211002018-06-26 19:25:45 -07004153 if (parser->m_prologState.inEntityValue) {
4154 parser->m_processor = entityValueInitProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004155 return entityValueInitProcessor(parser, s, end, nextPtr);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004156 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004157 parser->m_processor = externalParEntProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004158 return externalParEntProcessor(parser, s, end, nextPtr);
4159 }
4160}
4161
4162static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004163entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4164 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004165 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00004166 const char *start = s;
4167 const char *next = start;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004168 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004169
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004170 for (;;) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004171 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07004172 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4173 - storeEntityValue
4174 - processXmlDecl
4175 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004176 parser->m_eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004177 if (tok <= 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004178 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00004179 *nextPtr = s;
4180 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004181 }
4182 switch (tok) {
4183 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004184 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004185 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004186 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004187 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004188 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004189 case XML_TOK_NONE: /* start == end */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004190 default:
4191 break;
4192 }
Fred Drake31d485c2004-08-03 07:06:22 +00004193 /* found end of entity value - can store it now */
Miss Islington (bot)27067852021-08-29 07:32:50 -07004194 return storeEntityValue(parser, parser->m_encoding, s, end,
4195 XML_ACCOUNT_DIRECT);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004196 } else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00004197 enum XML_Error result;
4198 result = processXmlDecl(parser, 0, start, next);
4199 if (result != XML_ERROR_NONE)
4200 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004201 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4202 * that to happen, a parameter entity parsing handler must have attempted
4203 * to suspend the parser, which fails and raises an error. The parser can
4204 * be aborted, but can't be suspended.
Victor Stinner93d0cb52017-08-18 23:43:54 +02004205 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004206 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004207 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004208 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004209 /* stop scanning for text declaration - we found one */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004210 parser->m_processor = entityValueProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004211 return entityValueProcessor(parser, next, end, nextPtr);
4212 }
4213 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4214 return XML_TOK_NONE on the next call, which would then cause the
4215 function to exit with *nextPtr set to s - that is what we want for other
4216 tokens, but not for the BOM - we would rather like to skip it;
4217 then, when this routine is entered the next time, XmlPrologTok will
4218 return XML_TOK_INVALID, since the BOM is still in the buffer
4219 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004220 else if (tok == XML_TOK_BOM && next == end
4221 && ! parser->m_parsingStatus.finalBuffer) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07004222# ifdef XML_DTD
4223 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4224 XML_ACCOUNT_DIRECT)) {
4225 accountingOnAbort(parser);
4226 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4227 }
4228# endif
4229
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004230 *nextPtr = next;
4231 return XML_ERROR_NONE;
4232 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004233 /* If we get this token, we have the start of what might be a
4234 normal tag, but not a declaration (i.e. it doesn't begin with
4235 "<!"). In a DTD context, that isn't legal.
4236 */
4237 else if (tok == XML_TOK_INSTANCE_START) {
4238 *nextPtr = next;
4239 return XML_ERROR_SYNTAX;
4240 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004241 start = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004242 parser->m_eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004243 }
4244}
4245
4246static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004247externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4248 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004249 const char *next = s;
4250 int tok;
4251
Benjamin Peterson4e211002018-06-26 19:25:45 -07004252 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004253 if (tok <= 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004254 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004255 *nextPtr = s;
4256 return XML_ERROR_NONE;
4257 }
4258 switch (tok) {
4259 case XML_TOK_INVALID:
4260 return XML_ERROR_INVALID_TOKEN;
4261 case XML_TOK_PARTIAL:
4262 return XML_ERROR_UNCLOSED_TOKEN;
4263 case XML_TOK_PARTIAL_CHAR:
4264 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004265 case XML_TOK_NONE: /* start == end */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004266 default:
4267 break;
4268 }
4269 }
4270 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4271 However, when parsing an external subset, doProlog will not accept a BOM
Miss Islington (bot)27067852021-08-29 07:32:50 -07004272 as valid, and report a syntax error, so we have to skip the BOM, and
4273 account for the BOM bytes.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004274 */
4275 else if (tok == XML_TOK_BOM) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07004276 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4277 XML_ACCOUNT_DIRECT)) {
4278 accountingOnAbort(parser);
4279 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4280 }
4281
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004282 s = next;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004283 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004284 }
4285
Benjamin Peterson4e211002018-06-26 19:25:45 -07004286 parser->m_processor = prologProcessor;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004287 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07004288 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4289 XML_ACCOUNT_DIRECT);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004290}
4291
4292static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004293entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4294 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004295 const char *start = s;
4296 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004297 const ENCODING *enc = parser->m_encoding;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004298 int tok;
4299
4300 for (;;) {
4301 tok = XmlPrologTok(enc, start, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07004302 /* Note: These bytes are accounted later in:
4303 - storeEntityValue
4304 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004305 if (tok <= 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004306 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004307 *nextPtr = s;
4308 return XML_ERROR_NONE;
4309 }
4310 switch (tok) {
4311 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004312 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004313 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004314 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004315 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004316 return XML_ERROR_PARTIAL_CHAR;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004317 case XML_TOK_NONE: /* start == end */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004318 default:
4319 break;
4320 }
Fred Drake31d485c2004-08-03 07:06:22 +00004321 /* found end of entity value - can store it now */
Miss Islington (bot)27067852021-08-29 07:32:50 -07004322 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004323 }
4324 start = next;
4325 }
4326}
4327
4328#endif /* XML_DTD */
4329
4330static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07004331prologProcessor(XML_Parser parser, const char *s, const char *end,
4332 const char **nextPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004333 const char *next = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004334 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004335 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07004336 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4337 XML_ACCOUNT_DIRECT);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004338}
4339
4340static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07004341doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4342 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
Miss Islington (bot)27067852021-08-29 07:32:50 -07004343 XML_Bool allowClosingDoctype, enum XML_Account account) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004344#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004345 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004346#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004347 static const XML_Char atypeCDATA[]
4348 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4349 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4350 static const XML_Char atypeIDREF[]
4351 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4352 static const XML_Char atypeIDREFS[]
4353 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4354 static const XML_Char atypeENTITY[]
4355 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4356 static const XML_Char atypeENTITIES[]
4357 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4358 ASCII_I, ASCII_E, ASCII_S, '\0'};
4359 static const XML_Char atypeNMTOKEN[]
4360 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4361 static const XML_Char atypeNMTOKENS[]
4362 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4363 ASCII_E, ASCII_N, ASCII_S, '\0'};
4364 static const XML_Char notationPrefix[]
4365 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4366 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4367 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4368 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004369
Miss Islington (bot)27067852021-08-29 07:32:50 -07004370#ifndef XML_DTD
4371 UNUSED_P(account);
4372#endif
4373
Fred Drake31d485c2004-08-03 07:06:22 +00004374 /* save one level of indirection */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004375 DTD *const dtd = parser->m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004376
4377 const char **eventPP;
4378 const char **eventEndPP;
4379 enum XML_Content_Quant quant;
4380
Benjamin Peterson4e211002018-06-26 19:25:45 -07004381 if (enc == parser->m_encoding) {
4382 eventPP = &parser->m_eventPtr;
4383 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004384 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07004385 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4386 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004387 }
Fred Drake31d485c2004-08-03 07:06:22 +00004388
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004389 for (;;) {
4390 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004391 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004392 *eventPP = s;
4393 *eventEndPP = next;
4394 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004395 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004396 *nextPtr = s;
4397 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004398 }
4399 switch (tok) {
4400 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004401 *eventPP = next;
4402 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004403 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004404 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004405 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004406 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004407 case -XML_TOK_PROLOG_S:
4408 tok = -tok;
4409 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004410 case XML_TOK_NONE:
4411#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004412 /* for internal PE NOT referenced between declarations */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004413 if (enc != parser->m_encoding
4414 && ! parser->m_openInternalEntities->betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00004415 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004416 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004417 }
4418 /* WFC: PE Between Declarations - must check that PE contains
4419 complete markup, not only for external PEs, but also for
4420 internal PEs if the reference occurs between declarations.
4421 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004422 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4423 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004424 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004425 return XML_ERROR_INCOMPLETE_PE;
4426 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004427 return XML_ERROR_NONE;
4428 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004429#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004430 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004431 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004432 tok = -tok;
4433 next = end;
4434 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004435 }
4436 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004437 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
Miss Islington (bot)27067852021-08-29 07:32:50 -07004438#ifdef XML_DTD
4439 switch (role) {
4440 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4441 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4442 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4443 break;
4444 default:
4445 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4446 accountingOnAbort(parser);
4447 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4448 }
4449 }
4450#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004451 switch (role) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004452 case XML_ROLE_XML_DECL: {
4453 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4454 if (result != XML_ERROR_NONE)
4455 return result;
4456 enc = parser->m_encoding;
4457 handleDefault = XML_FALSE;
4458 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004459 case XML_ROLE_DOCTYPE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004460 if (parser->m_startDoctypeDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004461 parser->m_doctypeName
4462 = poolStoreString(&parser->m_tempPool, enc, s, next);
4463 if (! parser->m_doctypeName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004464 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004465 poolFinish(&parser->m_tempPool);
4466 parser->m_doctypePubid = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004467 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004468 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004469 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004470 break;
4471 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004472 if (parser->m_startDoctypeDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004473 parser->m_startDoctypeDeclHandler(
4474 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4475 parser->m_doctypePubid, 1);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004476 parser->m_doctypeName = NULL;
4477 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004478 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004479 }
4480 break;
4481#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004482 case XML_ROLE_TEXT_DECL: {
4483 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4484 if (result != XML_ERROR_NONE)
4485 return result;
4486 enc = parser->m_encoding;
4487 handleDefault = XML_FALSE;
4488 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004489#endif /* XML_DTD */
4490 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004491#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004492 parser->m_useForeignDTD = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004493 parser->m_declEntity = (ENTITY *)lookup(
4494 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4495 if (! parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004496 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004497#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004498 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004499 if (parser->m_startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004500 XML_Char *pubId;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004501 if (! XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004502 return XML_ERROR_PUBLICID;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004503 pubId = poolStoreString(&parser->m_tempPool, enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004504 s + enc->minBytesPerChar,
4505 next - enc->minBytesPerChar);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004506 if (! pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004507 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004508 normalizePublicId(pubId);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004509 poolFinish(&parser->m_tempPool);
4510 parser->m_doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004511 handleDefault = XML_FALSE;
4512 goto alreadyChecked;
4513 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004514 /* fall through */
4515 case XML_ROLE_ENTITY_PUBLIC_ID:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004516 if (! XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004517 return XML_ERROR_PUBLICID;
4518 alreadyChecked:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004519 if (dtd->keepProcessing && parser->m_declEntity) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004520 XML_Char *tem
4521 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4522 next - enc->minBytesPerChar);
4523 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004524 return XML_ERROR_NO_MEMORY;
4525 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004526 parser->m_declEntity->publicId = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004527 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004528 /* Don't suppress the default handler if we fell through from
4529 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
4530 */
4531 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004532 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004533 }
4534 break;
4535 case XML_ROLE_DOCTYPE_CLOSE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004536 if (allowClosingDoctype != XML_TRUE) {
4537 /* Must not close doctype from within expanded parameter entities */
4538 return XML_ERROR_INVALID_TOKEN;
4539 }
4540
Benjamin Peterson4e211002018-06-26 19:25:45 -07004541 if (parser->m_doctypeName) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004542 parser->m_startDoctypeDeclHandler(
4543 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4544 parser->m_doctypePubid, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004545 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004546 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004547 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004548 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
4549 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004550 was not set, indicating an external subset
4551 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004552#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004553 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004554 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4555 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004556 if (parser->m_paramEntityParsing
4557 && parser->m_externalEntityRefHandler) {
4558 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4559 externalSubsetName, sizeof(ENTITY));
4560 if (! entity) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004561 /* The external subset name "#" will have already been
4562 * inserted into the hash table at the start of the
4563 * external entity parsing, so no allocation will happen
4564 * and lookup() cannot fail.
4565 */
4566 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4567 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004568 if (parser->m_useForeignDTD)
4569 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004570 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004571 if (! parser->m_externalEntityRefHandler(
4572 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4573 entity->systemId, entity->publicId))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004574 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004575 if (dtd->paramEntityRead) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004576 if (! dtd->standalone && parser->m_notStandaloneHandler
4577 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004578 return XML_ERROR_NOT_STANDALONE;
4579 }
4580 /* if we didn't read the foreign DTD then this means that there
4581 is no external subset and we must reset dtd->hasParamEntityRefs
4582 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004583 else if (! parser->m_doctypeSysid)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004584 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004585 /* end of DTD - no need to update dtd->keepProcessing */
4586 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004587 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004588 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004589#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004590 if (parser->m_endDoctypeDeclHandler) {
4591 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004592 handleDefault = XML_FALSE;
4593 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004594 break;
4595 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004596#ifdef XML_DTD
4597 /* if there is no DOCTYPE declaration then now is the
4598 last chance to read the foreign DTD
4599 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004600 if (parser->m_useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004601 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004602 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004603 if (parser->m_paramEntityParsing
4604 && parser->m_externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004605 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Benjamin Peterson52b94082019-09-25 21:33:58 -07004606 externalSubsetName, sizeof(ENTITY));
4607 if (! entity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004608 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004609 entity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004610 dtd->paramEntityRead = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004611 if (! parser->m_externalEntityRefHandler(
4612 parser->m_externalEntityRefHandlerArg, 0, entity->base,
4613 entity->systemId, entity->publicId))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004614 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004615 if (dtd->paramEntityRead) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004616 if (! dtd->standalone && parser->m_notStandaloneHandler
4617 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004618 return XML_ERROR_NOT_STANDALONE;
4619 }
4620 /* if we didn't read the foreign DTD then this means that there
4621 is no external subset and we must reset dtd->hasParamEntityRefs
4622 */
4623 else
4624 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004625 /* end of DTD - no need to update dtd->keepProcessing */
4626 }
4627 }
4628#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004629 parser->m_processor = contentProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004630 return contentProcessor(parser, s, end, nextPtr);
4631 case XML_ROLE_ATTLIST_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004632 parser->m_declElementType = getElementType(parser, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004633 if (! parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004634 return XML_ERROR_NO_MEMORY;
4635 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004636 case XML_ROLE_ATTRIBUTE_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004637 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004638 if (! parser->m_declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004639 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004640 parser->m_declAttributeIsCdata = XML_FALSE;
4641 parser->m_declAttributeType = NULL;
4642 parser->m_declAttributeIsId = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004643 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004644 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004645 parser->m_declAttributeIsCdata = XML_TRUE;
4646 parser->m_declAttributeType = atypeCDATA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004647 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004648 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004649 parser->m_declAttributeIsId = XML_TRUE;
4650 parser->m_declAttributeType = atypeID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004651 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004652 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004653 parser->m_declAttributeType = atypeIDREF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004654 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004655 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004656 parser->m_declAttributeType = atypeIDREFS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004657 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004658 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004659 parser->m_declAttributeType = atypeENTITY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004660 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004661 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004662 parser->m_declAttributeType = atypeENTITIES;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004663 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004664 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004665 parser->m_declAttributeType = atypeNMTOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004666 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004667 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004668 parser->m_declAttributeType = atypeNMTOKENS;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004669 checkAttListDeclHandler:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004670 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004671 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004672 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004673 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4674 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004675 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004676 const XML_Char *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004677 if (parser->m_declAttributeType) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004678 prefix = enumValueSep;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004679 } else {
4680 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
4681 : enumValueStart);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004682 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004683 if (! poolAppendString(&parser->m_tempPool, prefix))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004684 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004685 if (! poolAppend(&parser->m_tempPool, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004686 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004687 parser->m_declAttributeType = parser->m_tempPool.start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004688 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004689 }
4690 break;
4691 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4692 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004693 if (dtd->keepProcessing) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004694 if (! defineAttribute(parser->m_declElementType,
4695 parser->m_declAttributeId,
4696 parser->m_declAttributeIsCdata,
4697 parser->m_declAttributeIsId, 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004698 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004699 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4700 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4701 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4702 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004703 /* Enumerated or Notation type */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004704 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4705 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004706 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004707 parser->m_declAttributeType = parser->m_tempPool.start;
4708 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004709 }
4710 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004711 parser->m_attlistDeclHandler(
4712 parser->m_handlerArg, parser->m_declElementType->name,
4713 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
4714 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004715 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004716 handleDefault = XML_FALSE;
4717 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004718 }
4719 break;
4720 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4721 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004722 if (dtd->keepProcessing) {
4723 const XML_Char *attVal;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004724 enum XML_Error result = storeAttributeValue(
4725 parser, enc, parser->m_declAttributeIsCdata,
Miss Islington (bot)27067852021-08-29 07:32:50 -07004726 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
4727 XML_ACCOUNT_NONE);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004728 if (result)
4729 return result;
4730 attVal = poolStart(&dtd->pool);
4731 poolFinish(&dtd->pool);
4732 /* ID attributes aren't allowed to have a default */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004733 if (! defineAttribute(
4734 parser->m_declElementType, parser->m_declAttributeId,
4735 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004736 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004737 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
4738 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
4739 || (*parser->m_declAttributeType == XML_T(ASCII_N)
4740 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004741 /* Enumerated or Notation type */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004742 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
4743 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004744 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004745 parser->m_declAttributeType = parser->m_tempPool.start;
4746 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004747 }
4748 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004749 parser->m_attlistDeclHandler(
4750 parser->m_handlerArg, parser->m_declElementType->name,
4751 parser->m_declAttributeId->name, parser->m_declAttributeType,
4752 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004753 poolClear(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004754 handleDefault = XML_FALSE;
4755 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004756 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004757 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004758 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004759 if (dtd->keepProcessing) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07004760 enum XML_Error result
4761 = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
4762 next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004763 if (parser->m_declEntity) {
4764 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004765 parser->m_declEntity->textLen
4766 = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004767 poolFinish(&dtd->entityValuePool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004768 if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004769 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004770 parser->m_entityDeclHandler(
4771 parser->m_handlerArg, parser->m_declEntity->name,
4772 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
4773 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004774 handleDefault = XML_FALSE;
4775 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004776 } else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004777 poolDiscard(&dtd->entityValuePool);
4778 if (result != XML_ERROR_NONE)
4779 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004780 }
4781 break;
4782 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004783#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07004784 parser->m_useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004785#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004786 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004787 if (parser->m_startDoctypeDeclHandler) {
4788 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
Benjamin Peterson52b94082019-09-25 21:33:58 -07004789 s + enc->minBytesPerChar,
4790 next - enc->minBytesPerChar);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004791 if (parser->m_doctypeSysid == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004792 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004793 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004794 handleDefault = XML_FALSE;
4795 }
4796#ifdef XML_DTD
4797 else
Benjamin Peterson4e211002018-06-26 19:25:45 -07004798 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
4799 for the case where no parser->m_startDoctypeDeclHandler is set */
4800 parser->m_doctypeSysid = externalSubsetName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004801#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004802 if (! dtd->standalone
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004803#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07004804 && ! parser->m_paramEntityParsing
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004805#endif /* XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004806 && parser->m_notStandaloneHandler
Benjamin Peterson52b94082019-09-25 21:33:58 -07004807 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004808 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004809#ifndef XML_DTD
4810 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004811#else /* XML_DTD */
4812 if (! parser->m_declEntity) {
4813 parser->m_declEntity = (ENTITY *)lookup(
4814 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4815 if (! parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004816 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004817 parser->m_declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004818 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004819#endif /* XML_DTD */
Benjamin Peterson5033aa72018-09-10 21:04:00 -07004820 /* fall through */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004821 case XML_ROLE_ENTITY_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004822 if (dtd->keepProcessing && parser->m_declEntity) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004823 parser->m_declEntity->systemId
4824 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
4825 next - enc->minBytesPerChar);
4826 if (! parser->m_declEntity->systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004827 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004828 parser->m_declEntity->base = parser->m_curBase;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004829 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004830 /* Don't suppress the default handler if we fell through from
4831 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
4832 */
4833 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004834 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004835 }
4836 break;
4837 case XML_ROLE_ENTITY_COMPLETE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004838 if (dtd->keepProcessing && parser->m_declEntity
4839 && parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004840 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004841 parser->m_entityDeclHandler(
4842 parser->m_handlerArg, parser->m_declEntity->name,
4843 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
4844 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004845 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004846 }
4847 break;
4848 case XML_ROLE_ENTITY_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004849 if (dtd->keepProcessing && parser->m_declEntity) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004850 parser->m_declEntity->notation
4851 = poolStoreString(&dtd->pool, enc, s, next);
4852 if (! parser->m_declEntity->notation)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004853 return XML_ERROR_NO_MEMORY;
4854 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004855 if (parser->m_unparsedEntityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004856 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004857 parser->m_unparsedEntityDeclHandler(
4858 parser->m_handlerArg, parser->m_declEntity->name,
4859 parser->m_declEntity->base, parser->m_declEntity->systemId,
4860 parser->m_declEntity->publicId, parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004861 handleDefault = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004862 } else if (parser->m_entityDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004863 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004864 parser->m_entityDeclHandler(
4865 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
4866 parser->m_declEntity->base, parser->m_declEntity->systemId,
4867 parser->m_declEntity->publicId, parser->m_declEntity->notation);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004868 handleDefault = XML_FALSE;
4869 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004870 }
4871 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004872 case XML_ROLE_GENERAL_ENTITY_NAME: {
4873 if (XmlPredefinedEntityName(enc, s, next)) {
4874 parser->m_declEntity = NULL;
4875 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004876 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004877 if (dtd->keepProcessing) {
4878 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004879 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004880 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004881 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
4882 name, sizeof(ENTITY));
4883 if (! parser->m_declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004884 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004885 if (parser->m_declEntity->name != name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004886 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004887 parser->m_declEntity = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004888 } else {
4889 poolFinish(&dtd->pool);
4890 parser->m_declEntity->publicId = NULL;
4891 parser->m_declEntity->is_param = XML_FALSE;
4892 /* if we have a parent parser or are reading an internal parameter
4893 entity, then the entity declaration is not considered "internal"
4894 */
4895 parser->m_declEntity->is_internal
4896 = ! (parser->m_parentParser || parser->m_openInternalEntities);
4897 if (parser->m_entityDeclHandler)
4898 handleDefault = XML_FALSE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004899 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004900 } else {
4901 poolDiscard(&dtd->pool);
4902 parser->m_declEntity = NULL;
4903 }
4904 } break;
4905 case XML_ROLE_PARAM_ENTITY_NAME:
4906#ifdef XML_DTD
4907 if (dtd->keepProcessing) {
4908 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4909 if (! name)
4910 return XML_ERROR_NO_MEMORY;
4911 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
4912 name, sizeof(ENTITY));
4913 if (! parser->m_declEntity)
4914 return XML_ERROR_NO_MEMORY;
4915 if (parser->m_declEntity->name != name) {
4916 poolDiscard(&dtd->pool);
4917 parser->m_declEntity = NULL;
4918 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004919 poolFinish(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004920 parser->m_declEntity->publicId = NULL;
4921 parser->m_declEntity->is_param = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004922 /* if we have a parent parser or are reading an internal parameter
4923 entity, then the entity declaration is not considered "internal"
4924 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07004925 parser->m_declEntity->is_internal
4926 = ! (parser->m_parentParser || parser->m_openInternalEntities);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004927 if (parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004928 handleDefault = XML_FALSE;
4929 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004930 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004931 poolDiscard(&dtd->pool);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004932 parser->m_declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004933 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07004934#else /* not XML_DTD */
Benjamin Peterson4e211002018-06-26 19:25:45 -07004935 parser->m_declEntity = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004936#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004937 break;
4938 case XML_ROLE_NOTATION_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004939 parser->m_declNotationPublicId = NULL;
4940 parser->m_declNotationName = NULL;
4941 if (parser->m_notationDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004942 parser->m_declNotationName
4943 = poolStoreString(&parser->m_tempPool, enc, s, next);
4944 if (! parser->m_declNotationName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004945 return XML_ERROR_NO_MEMORY;
Benjamin Peterson4e211002018-06-26 19:25:45 -07004946 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004947 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004948 }
4949 break;
4950 case XML_ROLE_NOTATION_PUBLIC_ID:
Benjamin Peterson52b94082019-09-25 21:33:58 -07004951 if (! XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004952 return XML_ERROR_PUBLICID;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004953 if (parser
4954 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
4955 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004956 s + enc->minBytesPerChar,
4957 next - enc->minBytesPerChar);
Benjamin Peterson52b94082019-09-25 21:33:58 -07004958 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004959 return XML_ERROR_NO_MEMORY;
4960 normalizePublicId(tem);
Benjamin Peterson4e211002018-06-26 19:25:45 -07004961 parser->m_declNotationPublicId = tem;
4962 poolFinish(&parser->m_tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004963 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004964 }
4965 break;
4966 case XML_ROLE_NOTATION_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004967 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07004968 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
4969 s + enc->minBytesPerChar,
4970 next - enc->minBytesPerChar);
4971 if (! systemId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004972 return XML_ERROR_NO_MEMORY;
4973 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004974 parser->m_notationDeclHandler(
4975 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
4976 systemId, parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004977 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004978 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004979 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004980 break;
4981 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07004982 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004983 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07004984 parser->m_notationDeclHandler(
4985 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
4986 0, parser->m_declNotationPublicId);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004987 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004988 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07004989 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004990 break;
4991 case XML_ROLE_ERROR:
4992 switch (tok) {
4993 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004994 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004995 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004996 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004997 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004998 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004999 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005000 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005001 }
5002#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07005003 case XML_ROLE_IGNORE_SECT: {
5004 enum XML_Error result;
5005 if (parser->m_defaultHandler)
5006 reportDefault(parser, enc, s, next);
5007 handleDefault = XML_FALSE;
5008 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5009 if (result != XML_ERROR_NONE)
5010 return result;
5011 else if (! next) {
5012 parser->m_processor = ignoreSectionProcessor;
5013 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005014 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005015 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005016#endif /* XML_DTD */
5017 case XML_ROLE_GROUP_OPEN:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005018 if (parser->m_prologState.level >= parser->m_groupSize) {
5019 if (parser->m_groupSize) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005020 {
5021 char *const new_connector = (char *)REALLOC(
5022 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5023 if (new_connector == NULL) {
5024 parser->m_groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005025 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005026 }
5027 parser->m_groupConnector = new_connector;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005028 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005029
5030 if (dtd->scaffIndex) {
5031 int *const new_scaff_index = (int *)REALLOC(
5032 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5033 if (new_scaff_index == NULL)
5034 return XML_ERROR_NO_MEMORY;
5035 dtd->scaffIndex = new_scaff_index;
5036 }
5037 } else {
5038 parser->m_groupConnector
5039 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5040 if (! parser->m_groupConnector) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005041 parser->m_groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005042 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005043 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005044 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005045 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005046 parser->m_groupConnector[parser->m_prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005047 if (dtd->in_eldecl) {
5048 int myindex = nextScaffoldPart(parser);
5049 if (myindex < 0)
5050 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005051 assert(dtd->scaffIndex != NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005052 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5053 dtd->scaffLevel++;
5054 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005055 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005056 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005057 }
5058 break;
5059 case XML_ROLE_GROUP_SEQUENCE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005060 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005061 return XML_ERROR_SYNTAX;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005062 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5063 if (dtd->in_eldecl && parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005064 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005065 break;
5066 case XML_ROLE_GROUP_CHOICE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005067 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005068 return XML_ERROR_SYNTAX;
5069 if (dtd->in_eldecl
Benjamin Peterson52b94082019-09-25 21:33:58 -07005070 && ! parser->m_groupConnector[parser->m_prologState.level]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005071 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
Benjamin Peterson52b94082019-09-25 21:33:58 -07005072 != XML_CTYPE_MIXED)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005073 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5074 = XML_CTYPE_CHOICE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005075 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005076 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005077 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005078 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005079 break;
5080 case XML_ROLE_PARAM_ENTITY_REF:
5081#ifdef XML_DTD
5082 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005083 dtd->hasParamEntityRefs = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005084 if (! parser->m_paramEntityParsing)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005085 dtd->keepProcessing = dtd->standalone;
5086 else {
5087 const XML_Char *name;
5088 ENTITY *entity;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005089 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5090 next - enc->minBytesPerChar);
5091 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005092 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005093 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005094 poolDiscard(&dtd->pool);
5095 /* first, determine if a check for an existing declaration is needed;
5096 if yes, check that the entity exists, and that it is internal,
5097 otherwise call the skipped entity handler
5098 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005099 if (parser->m_prologState.documentEntity
5100 && (dtd->standalone ? ! parser->m_openInternalEntities
5101 : ! dtd->hasParamEntityRefs)) {
5102 if (! entity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005103 return XML_ERROR_UNDEFINED_ENTITY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005104 else if (! entity->is_internal) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005105 /* It's hard to exhaustively search the code to be sure,
5106 * but there doesn't seem to be a way of executing the
5107 * following line. There are two cases:
5108 *
5109 * If 'standalone' is false, the DTD must have no
5110 * parameter entities or we wouldn't have passed the outer
5111 * 'if' statement. That measn the only entity in the hash
5112 * table is the external subset name "#" which cannot be
5113 * given as a parameter entity name in XML syntax, so the
5114 * lookup must have returned NULL and we don't even reach
5115 * the test for an internal entity.
5116 *
5117 * If 'standalone' is true, it does not seem to be
5118 * possible to create entities taking this code path that
5119 * are not internal entities, so fail the test above.
5120 *
5121 * Because this analysis is very uncertain, the code is
5122 * being left in place and merely removed from the
5123 * coverage test statistics.
5124 */
5125 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5126 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005127 } else if (! entity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005128 dtd->keepProcessing = dtd->standalone;
5129 /* cannot report skipped entities in declarations */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005130 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5131 && parser->m_skippedEntityHandler) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005132 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005133 handleDefault = XML_FALSE;
5134 }
5135 break;
5136 }
5137 if (entity->open)
5138 return XML_ERROR_RECURSIVE_ENTITY_REF;
5139 if (entity->textPtr) {
5140 enum XML_Error result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005141 XML_Bool betweenDecl
5142 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005143 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005144 if (result != XML_ERROR_NONE)
5145 return result;
5146 handleDefault = XML_FALSE;
5147 break;
5148 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005149 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005150 dtd->paramEntityRead = XML_FALSE;
5151 entity->open = XML_TRUE;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005152 entityTrackingOnOpen(parser, entity, __LINE__);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005153 if (! parser->m_externalEntityRefHandler(
5154 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5155 entity->systemId, entity->publicId)) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005156 entityTrackingOnClose(parser, entity, __LINE__);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005157 entity->open = XML_FALSE;
5158 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5159 }
Miss Islington (bot)27067852021-08-29 07:32:50 -07005160 entityTrackingOnClose(parser, entity, __LINE__);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005161 entity->open = XML_FALSE;
5162 handleDefault = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005163 if (! dtd->paramEntityRead) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005164 dtd->keepProcessing = dtd->standalone;
5165 break;
5166 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005167 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005168 dtd->keepProcessing = dtd->standalone;
5169 break;
5170 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005171 }
5172#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005173 if (! dtd->standalone && parser->m_notStandaloneHandler
5174 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005175 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005176 break;
5177
Benjamin Peterson52b94082019-09-25 21:33:58 -07005178 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005179
5180 case XML_ROLE_ELEMENT_NAME:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005181 if (parser->m_elementDeclHandler) {
5182 parser->m_declElementType = getElementType(parser, enc, s, next);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005183 if (! parser->m_declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005184 return XML_ERROR_NO_MEMORY;
5185 dtd->scaffLevel = 0;
5186 dtd->scaffCount = 0;
5187 dtd->in_eldecl = XML_TRUE;
5188 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005189 }
5190 break;
5191
5192 case XML_ROLE_CONTENT_ANY:
5193 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005194 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005195 if (parser->m_elementDeclHandler) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005196 XML_Content *content
5197 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5198 if (! content)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005199 return XML_ERROR_NO_MEMORY;
5200 content->quant = XML_CQUANT_NONE;
5201 content->name = NULL;
5202 content->numchildren = 0;
5203 content->children = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005204 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5205 : XML_CTYPE_EMPTY);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005206 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005207 parser->m_elementDeclHandler(
5208 parser->m_handlerArg, parser->m_declElementType->name, content);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005209 handleDefault = XML_FALSE;
5210 }
5211 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005212 }
5213 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005214
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005215 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005216 if (dtd->in_eldecl) {
5217 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5218 = XML_CTYPE_MIXED;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005219 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005220 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005221 }
5222 break;
5223
5224 case XML_ROLE_CONTENT_ELEMENT:
5225 quant = XML_CQUANT_NONE;
5226 goto elementContent;
5227 case XML_ROLE_CONTENT_ELEMENT_OPT:
5228 quant = XML_CQUANT_OPT;
5229 goto elementContent;
5230 case XML_ROLE_CONTENT_ELEMENT_REP:
5231 quant = XML_CQUANT_REP;
5232 goto elementContent;
5233 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5234 quant = XML_CQUANT_PLUS;
5235 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005236 if (dtd->in_eldecl) {
5237 ELEMENT_TYPE *el;
5238 const XML_Char *name;
5239 int nameLen;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005240 const char *nxt
5241 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005242 int myindex = nextScaffoldPart(parser);
5243 if (myindex < 0)
5244 return XML_ERROR_NO_MEMORY;
5245 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5246 dtd->scaffold[myindex].quant = quant;
5247 el = getElementType(parser, enc, s, nxt);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005248 if (! el)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005249 return XML_ERROR_NO_MEMORY;
5250 name = el->name;
5251 dtd->scaffold[myindex].name = name;
5252 nameLen = 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005253 for (; name[nameLen++];)
5254 ;
5255 dtd->contentStringLen += nameLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005256 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005257 handleDefault = XML_FALSE;
5258 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005259 break;
5260
5261 case XML_ROLE_GROUP_CLOSE:
5262 quant = XML_CQUANT_NONE;
5263 goto closeGroup;
5264 case XML_ROLE_GROUP_CLOSE_OPT:
5265 quant = XML_CQUANT_OPT;
5266 goto closeGroup;
5267 case XML_ROLE_GROUP_CLOSE_REP:
5268 quant = XML_CQUANT_REP;
5269 goto closeGroup;
5270 case XML_ROLE_GROUP_CLOSE_PLUS:
5271 quant = XML_CQUANT_PLUS;
5272 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005273 if (dtd->in_eldecl) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005274 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005275 handleDefault = XML_FALSE;
5276 dtd->scaffLevel--;
5277 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5278 if (dtd->scaffLevel == 0) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005279 if (! handleDefault) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005280 XML_Content *model = build_model(parser);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005281 if (! model)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005282 return XML_ERROR_NO_MEMORY;
5283 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005284 parser->m_elementDeclHandler(
5285 parser->m_handlerArg, parser->m_declElementType->name, model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005286 }
5287 dtd->in_eldecl = XML_FALSE;
5288 dtd->contentStringLen = 0;
5289 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005290 }
5291 break;
5292 /* End element declaration stuff */
5293
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005294 case XML_ROLE_PI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005295 if (! reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005296 return XML_ERROR_NO_MEMORY;
5297 handleDefault = XML_FALSE;
5298 break;
5299 case XML_ROLE_COMMENT:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005300 if (! reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005301 return XML_ERROR_NO_MEMORY;
5302 handleDefault = XML_FALSE;
5303 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005304 case XML_ROLE_NONE:
5305 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005306 case XML_TOK_BOM:
5307 handleDefault = XML_FALSE;
5308 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005309 }
5310 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005311 case XML_ROLE_DOCTYPE_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005312 if (parser->m_startDoctypeDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005313 handleDefault = XML_FALSE;
5314 break;
5315 case XML_ROLE_ENTITY_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005316 if (dtd->keepProcessing && parser->m_entityDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005317 handleDefault = XML_FALSE;
5318 break;
5319 case XML_ROLE_NOTATION_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005320 if (parser->m_notationDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005321 handleDefault = XML_FALSE;
5322 break;
5323 case XML_ROLE_ATTLIST_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005324 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005325 handleDefault = XML_FALSE;
5326 break;
5327 case XML_ROLE_ELEMENT_NONE:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005328 if (parser->m_elementDeclHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005329 handleDefault = XML_FALSE;
5330 break;
5331 } /* end of big switch */
5332
Benjamin Peterson4e211002018-06-26 19:25:45 -07005333 if (handleDefault && parser->m_defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005334 reportDefault(parser, enc, s, next);
5335
Benjamin Peterson4e211002018-06-26 19:25:45 -07005336 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005337 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005338 *nextPtr = next;
5339 return XML_ERROR_NONE;
5340 case XML_FINISHED:
5341 return XML_ERROR_ABORTED;
5342 default:
5343 s = next;
5344 tok = XmlPrologTok(enc, s, end, &next);
5345 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005346 }
5347 /* not reached */
5348}
5349
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005350static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005351epilogProcessor(XML_Parser parser, const char *s, const char *end,
5352 const char **nextPtr) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005353 parser->m_processor = epilogProcessor;
5354 parser->m_eventPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005355 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005356 const char *next = NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005357 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07005358#ifdef XML_DTD
5359 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5360 XML_ACCOUNT_DIRECT)) {
5361 accountingOnAbort(parser);
5362 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5363 }
5364#endif
Benjamin Peterson4e211002018-06-26 19:25:45 -07005365 parser->m_eventEndPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005366 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005367 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005368 case -XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005369 if (parser->m_defaultHandler) {
5370 reportDefault(parser, parser->m_encoding, s, next);
5371 if (parser->m_parsingStatus.parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005372 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005373 }
Fred Drake31d485c2004-08-03 07:06:22 +00005374 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005375 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005376 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005377 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005378 return XML_ERROR_NONE;
5379 case XML_TOK_PROLOG_S:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005380 if (parser->m_defaultHandler)
5381 reportDefault(parser, parser->m_encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005382 break;
5383 case XML_TOK_PI:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005384 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005385 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005386 break;
5387 case XML_TOK_COMMENT:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005388 if (! reportComment(parser, parser->m_encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005389 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005390 break;
5391 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005392 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005393 return XML_ERROR_INVALID_TOKEN;
5394 case XML_TOK_PARTIAL:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005395 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005396 *nextPtr = s;
5397 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005398 }
5399 return XML_ERROR_UNCLOSED_TOKEN;
5400 case XML_TOK_PARTIAL_CHAR:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005401 if (! parser->m_parsingStatus.finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005402 *nextPtr = s;
5403 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005404 }
5405 return XML_ERROR_PARTIAL_CHAR;
5406 default:
5407 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5408 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07005409 parser->m_eventPtr = s = next;
5410 switch (parser->m_parsingStatus.parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005411 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005412 *nextPtr = next;
5413 return XML_ERROR_NONE;
5414 case XML_FINISHED:
5415 return XML_ERROR_ABORTED;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005416 default:;
Fred Drake31d485c2004-08-03 07:06:22 +00005417 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005418 }
5419}
5420
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005421static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07005422processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
Fred Drake31d485c2004-08-03 07:06:22 +00005423 const char *textStart, *textEnd;
5424 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005425 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005426 OPEN_INTERNAL_ENTITY *openEntity;
5427
Benjamin Peterson4e211002018-06-26 19:25:45 -07005428 if (parser->m_freeInternalEntities) {
5429 openEntity = parser->m_freeInternalEntities;
5430 parser->m_freeInternalEntities = openEntity->next;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005431 } else {
5432 openEntity
5433 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
5434 if (! openEntity)
Fred Drake31d485c2004-08-03 07:06:22 +00005435 return XML_ERROR_NO_MEMORY;
5436 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005437 entity->open = XML_TRUE;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005438#ifdef XML_DTD
5439 entityTrackingOnOpen(parser, entity, __LINE__);
5440#endif
Fred Drake31d485c2004-08-03 07:06:22 +00005441 entity->processed = 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005442 openEntity->next = parser->m_openInternalEntities;
5443 parser->m_openInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005444 openEntity->entity = entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005445 openEntity->startTagLevel = parser->m_tagLevel;
Fred Drake31d485c2004-08-03 07:06:22 +00005446 openEntity->betweenDecl = betweenDecl;
5447 openEntity->internalEventPtr = NULL;
5448 openEntity->internalEventEndPtr = NULL;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005449 textStart = (const char *)entity->textPtr;
5450 textEnd = (const char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005451 /* Set a safe default value in case 'next' does not get set */
5452 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005453
5454#ifdef XML_DTD
5455 if (entity->is_param) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005456 int tok
5457 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5458 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005459 tok, next, &next, XML_FALSE, XML_FALSE,
5460 XML_ACCOUNT_ENTITY_EXPANSION);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005461 } else
Fred Drake31d485c2004-08-03 07:06:22 +00005462#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005463 result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005464 textStart, textEnd, &next, XML_FALSE,
5465 XML_ACCOUNT_ENTITY_EXPANSION);
Fred Drake31d485c2004-08-03 07:06:22 +00005466
5467 if (result == XML_ERROR_NONE) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005468 if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005469 entity->processed = (int)(next - textStart);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005470 parser->m_processor = internalEntityProcessor;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005471 } else {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005472#ifdef XML_DTD
5473 entityTrackingOnClose(parser, entity, __LINE__);
5474#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005475 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005476 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005477 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005478 openEntity->next = parser->m_freeInternalEntities;
5479 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005480 }
5481 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005482 return result;
5483}
5484
Fred Drake31d485c2004-08-03 07:06:22 +00005485static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005486internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
5487 const char **nextPtr) {
Fred Drake31d485c2004-08-03 07:06:22 +00005488 ENTITY *entity;
5489 const char *textStart, *textEnd;
5490 const char *next;
5491 enum XML_Error result;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005492 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005493 if (! openEntity)
Fred Drake31d485c2004-08-03 07:06:22 +00005494 return XML_ERROR_UNEXPECTED_STATE;
5495
5496 entity = openEntity->entity;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005497 textStart = ((const char *)entity->textPtr) + entity->processed;
5498 textEnd = (const char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005499 /* Set a safe default value in case 'next' does not get set */
5500 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005501
5502#ifdef XML_DTD
5503 if (entity->is_param) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005504 int tok
5505 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
5506 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005507 tok, next, &next, XML_FALSE, XML_TRUE,
5508 XML_ACCOUNT_ENTITY_EXPANSION);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005509 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005510#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005511 result = doContent(parser, openEntity->startTagLevel,
5512 parser->m_internalEncoding, textStart, textEnd, &next,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005513 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
Fred Drake31d485c2004-08-03 07:06:22 +00005514
5515 if (result != XML_ERROR_NONE)
5516 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005517 else if (textEnd != next
5518 && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005519 entity->processed = (int)(next - (const char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005520 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005521 } else {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005522#ifdef XML_DTD
5523 entityTrackingOnClose(parser, entity, __LINE__);
5524#endif
Fred Drake31d485c2004-08-03 07:06:22 +00005525 entity->open = XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005526 parser->m_openInternalEntities = openEntity->next;
Fred Drake31d485c2004-08-03 07:06:22 +00005527 /* put openEntity back in list of free instances */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005528 openEntity->next = parser->m_freeInternalEntities;
5529 parser->m_freeInternalEntities = openEntity;
Fred Drake31d485c2004-08-03 07:06:22 +00005530 }
5531
5532#ifdef XML_DTD
5533 if (entity->is_param) {
5534 int tok;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005535 parser->m_processor = prologProcessor;
5536 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5537 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005538 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
5539 XML_ACCOUNT_DIRECT);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005540 } else
Fred Drake31d485c2004-08-03 07:06:22 +00005541#endif /* XML_DTD */
5542 {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005543 parser->m_processor = contentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +00005544 /* see externalEntityContentProcessor vs contentProcessor */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005545 return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
5546 s, end, nextPtr,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005547 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
5548 XML_ACCOUNT_DIRECT);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005549 }
Fred Drake31d485c2004-08-03 07:06:22 +00005550}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005551
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005552static enum XML_Error PTRCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005553errorProcessor(XML_Parser parser, const char *s, const char *end,
5554 const char **nextPtr) {
5555 UNUSED_P(s);
5556 UNUSED_P(end);
5557 UNUSED_P(nextPtr);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005558 return parser->m_errorCode;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005559}
5560
5561static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005562storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005563 const char *ptr, const char *end, STRING_POOL *pool,
5564 enum XML_Account account) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005565 enum XML_Error result
Miss Islington (bot)27067852021-08-29 07:32:50 -07005566 = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005567 if (result)
5568 return result;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005569 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005570 poolChop(pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005571 if (! poolAppendChar(pool, XML_T('\0')))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005572 return XML_ERROR_NO_MEMORY;
5573 return XML_ERROR_NONE;
5574}
5575
5576static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005577appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005578 const char *ptr, const char *end, STRING_POOL *pool,
5579 enum XML_Account account) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005580 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Miss Islington (bot)27067852021-08-29 07:32:50 -07005581#ifndef XML_DTD
5582 UNUSED_P(account);
5583#endif
5584
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005585 for (;;) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005586 const char *next
5587 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005588 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07005589#ifdef XML_DTD
5590 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
5591 accountingOnAbort(parser);
5592 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5593 }
5594#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005595 switch (tok) {
5596 case XML_TOK_NONE:
5597 return XML_ERROR_NONE;
5598 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005599 if (enc == parser->m_encoding)
5600 parser->m_eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005601 return XML_ERROR_INVALID_TOKEN;
5602 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005603 if (enc == parser->m_encoding)
5604 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005605 return XML_ERROR_INVALID_TOKEN;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005606 case XML_TOK_CHAR_REF: {
5607 XML_Char buf[XML_ENCODE_MAX];
5608 int i;
5609 int n = XmlCharRefNumber(enc, ptr);
5610 if (n < 0) {
5611 if (enc == parser->m_encoding)
5612 parser->m_eventPtr = ptr;
5613 return XML_ERROR_BAD_CHAR_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005614 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005615 if (! isCdata && n == 0x20 /* space */
5616 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5617 break;
5618 n = XmlEncode(n, (ICHAR *)buf);
5619 /* The XmlEncode() functions can never return 0 here. That
5620 * error return happens if the code point passed in is either
5621 * negative or greater than or equal to 0x110000. The
5622 * XmlCharRefNumber() functions will all return a number
5623 * strictly less than 0x110000 or a negative value if an error
5624 * occurred. The negative value is intercepted above, so
5625 * XmlEncode() is never passed a value it might return an
5626 * error for.
5627 */
5628 for (i = 0; i < n; i++) {
5629 if (! poolAppendChar(pool, buf[i]))
5630 return XML_ERROR_NO_MEMORY;
5631 }
5632 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005633 case XML_TOK_DATA_CHARS:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005634 if (! poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005635 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005636 break;
5637 case XML_TOK_TRAILING_CR:
5638 next = ptr + enc->minBytesPerChar;
5639 /* fall through */
5640 case XML_TOK_ATTRIBUTE_VALUE_S:
5641 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005642 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005643 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005644 if (! poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005645 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005646 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005647 case XML_TOK_ENTITY_REF: {
5648 const XML_Char *name;
5649 ENTITY *entity;
5650 char checkEntityDecl;
5651 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
5652 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
5653 if (ch) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005654#ifdef XML_DTD
5655 /* NOTE: We are replacing 4-6 characters original input for 1 character
5656 * so there is no amplification and hence recording without
5657 * protection. */
5658 accountingDiffTolerated(parser, tok, (char *)&ch,
5659 ((char *)&ch) + sizeof(XML_Char), __LINE__,
5660 XML_ACCOUNT_ENTITY_EXPANSION);
5661#endif /* XML_DTD */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005662 if (! poolAppendChar(pool, ch))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005663 return XML_ERROR_NO_MEMORY;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005664 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005665 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005666 name = poolStoreString(&parser->m_temp2Pool, enc,
5667 ptr + enc->minBytesPerChar,
5668 next - enc->minBytesPerChar);
5669 if (! name)
5670 return XML_ERROR_NO_MEMORY;
5671 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
5672 poolDiscard(&parser->m_temp2Pool);
5673 /* First, determine if a check for an existing declaration is needed;
5674 if yes, check that the entity exists, and that it is internal.
5675 */
5676 if (pool == &dtd->pool) /* are we called from prolog? */
5677 checkEntityDecl =
5678#ifdef XML_DTD
5679 parser->m_prologState.documentEntity &&
5680#endif /* XML_DTD */
5681 (dtd->standalone ? ! parser->m_openInternalEntities
5682 : ! dtd->hasParamEntityRefs);
5683 else /* if (pool == &parser->m_tempPool): we are called from content */
5684 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
5685 if (checkEntityDecl) {
5686 if (! entity)
5687 return XML_ERROR_UNDEFINED_ENTITY;
5688 else if (! entity->is_internal)
5689 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5690 } else if (! entity) {
5691 /* Cannot report skipped entity here - see comments on
5692 parser->m_skippedEntityHandler.
5693 if (parser->m_skippedEntityHandler)
5694 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
5695 */
5696 /* Cannot call the default handler because this would be
5697 out of sync with the call to the startElementHandler.
5698 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
5699 reportDefault(parser, enc, ptr, next);
5700 */
5701 break;
5702 }
5703 if (entity->open) {
5704 if (enc == parser->m_encoding) {
5705 /* It does not appear that this line can be executed.
5706 *
5707 * The "if (entity->open)" check catches recursive entity
5708 * definitions. In order to be called with an open
5709 * entity, it must have gone through this code before and
5710 * been through the recursive call to
5711 * appendAttributeValue() some lines below. That call
5712 * sets the local encoding ("enc") to the parser's
5713 * internal encoding (internal_utf8 or internal_utf16),
5714 * which can never be the same as the principle encoding.
5715 * It doesn't appear there is another code path that gets
5716 * here with entity->open being TRUE.
5717 *
5718 * Since it is not certain that this logic is watertight,
5719 * we keep the line and merely exclude it from coverage
5720 * tests.
5721 */
5722 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
5723 }
5724 return XML_ERROR_RECURSIVE_ENTITY_REF;
5725 }
5726 if (entity->notation) {
5727 if (enc == parser->m_encoding)
5728 parser->m_eventPtr = ptr;
5729 return XML_ERROR_BINARY_ENTITY_REF;
5730 }
5731 if (! entity->textPtr) {
5732 if (enc == parser->m_encoding)
5733 parser->m_eventPtr = ptr;
5734 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
5735 } else {
5736 enum XML_Error result;
5737 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5738 entity->open = XML_TRUE;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005739#ifdef XML_DTD
5740 entityTrackingOnOpen(parser, entity, __LINE__);
5741#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -07005742 result = appendAttributeValue(parser, parser->m_internalEncoding,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005743 isCdata, (const char *)entity->textPtr,
5744 (const char *)textEnd, pool,
5745 XML_ACCOUNT_ENTITY_EXPANSION);
5746#ifdef XML_DTD
5747 entityTrackingOnClose(parser, entity, __LINE__);
5748#endif
Benjamin Peterson52b94082019-09-25 21:33:58 -07005749 entity->open = XML_FALSE;
5750 if (result)
5751 return result;
5752 }
5753 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005754 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005755 /* The only token returned by XmlAttributeValueTok() that does
5756 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5757 * Getting that would require an entity name to contain an
5758 * incomplete XML character (e.g. \xE2\x82); however previous
5759 * tokenisers will have already recognised and rejected such
5760 * names before XmlAttributeValueTok() gets a look-in. This
5761 * default case should be retained as a safety net, but the code
5762 * excluded from coverage tests.
5763 *
5764 * LCOV_EXCL_START
5765 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005766 if (enc == parser->m_encoding)
5767 parser->m_eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005768 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005769 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005770 }
5771 ptr = next;
5772 }
5773 /* not reached */
5774}
5775
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005776static enum XML_Error
Benjamin Peterson52b94082019-09-25 21:33:58 -07005777storeEntityValue(XML_Parser parser, const ENCODING *enc,
Miss Islington (bot)27067852021-08-29 07:32:50 -07005778 const char *entityTextPtr, const char *entityTextEnd,
5779 enum XML_Account account) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07005780 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005781 STRING_POOL *pool = &(dtd->entityValuePool);
5782 enum XML_Error result = XML_ERROR_NONE;
5783#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005784 int oldInEntityValue = parser->m_prologState.inEntityValue;
5785 parser->m_prologState.inEntityValue = 1;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005786#else
5787 UNUSED_P(account);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005788#endif /* XML_DTD */
5789 /* never return Null for the value argument in EntityDeclHandler,
5790 since this would indicate an external entity; therefore we
5791 have to make sure that entityValuePool.start is not null */
Benjamin Peterson52b94082019-09-25 21:33:58 -07005792 if (! pool->blocks) {
5793 if (! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005794 return XML_ERROR_NO_MEMORY;
5795 }
5796
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005797 for (;;) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005798 const char *next
5799 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005800 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
Miss Islington (bot)27067852021-08-29 07:32:50 -07005801
5802#ifdef XML_DTD
5803 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
5804 account)) {
5805 accountingOnAbort(parser);
5806 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5807 goto endEntityValue;
5808 }
5809#endif
5810
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005811 switch (tok) {
5812 case XML_TOK_PARAM_ENTITY_REF:
5813#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005814 if (parser->m_isParamEntity || enc != parser->m_encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005815 const XML_Char *name;
5816 ENTITY *entity;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005817 name = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005818 entityTextPtr + enc->minBytesPerChar,
5819 next - enc->minBytesPerChar);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005820 if (! name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005821 result = XML_ERROR_NO_MEMORY;
5822 goto endEntityValue;
5823 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005824 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005825 poolDiscard(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005826 if (! entity) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005827 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5828 /* cannot report skipped entity here - see comments on
Benjamin Peterson4e211002018-06-26 19:25:45 -07005829 parser->m_skippedEntityHandler
5830 if (parser->m_skippedEntityHandler)
5831 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005832 */
5833 dtd->keepProcessing = dtd->standalone;
5834 goto endEntityValue;
5835 }
5836 if (entity->open) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005837 if (enc == parser->m_encoding)
5838 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005839 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5840 goto endEntityValue;
5841 }
5842 if (entity->systemId) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005843 if (parser->m_externalEntityRefHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005844 dtd->paramEntityRead = XML_FALSE;
5845 entity->open = XML_TRUE;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005846 entityTrackingOnOpen(parser, entity, __LINE__);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005847 if (! parser->m_externalEntityRefHandler(
5848 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5849 entity->systemId, entity->publicId)) {
Miss Islington (bot)27067852021-08-29 07:32:50 -07005850 entityTrackingOnClose(parser, entity, __LINE__);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005851 entity->open = XML_FALSE;
5852 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5853 goto endEntityValue;
5854 }
Miss Islington (bot)27067852021-08-29 07:32:50 -07005855 entityTrackingOnClose(parser, entity, __LINE__);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005856 entity->open = XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005857 if (! dtd->paramEntityRead)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005858 dtd->keepProcessing = dtd->standalone;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005859 } else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005860 dtd->keepProcessing = dtd->standalone;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005861 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005862 entity->open = XML_TRUE;
Miss Islington (bot)27067852021-08-29 07:32:50 -07005863 entityTrackingOnOpen(parser, entity, __LINE__);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005864 result = storeEntityValue(
Miss Islington (bot)27067852021-08-29 07:32:50 -07005865 parser, parser->m_internalEncoding, (const char *)entity->textPtr,
5866 (const char *)(entity->textPtr + entity->textLen),
5867 XML_ACCOUNT_ENTITY_EXPANSION);
5868 entityTrackingOnClose(parser, entity, __LINE__);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005869 entity->open = XML_FALSE;
5870 if (result)
5871 goto endEntityValue;
5872 }
5873 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005874 }
5875#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005876 /* In the internal subset, PE references are not legal
5877 within markup declarations, e.g entity values in this case. */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005878 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005879 result = XML_ERROR_PARAM_ENTITY_REF;
5880 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005881 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005882 result = XML_ERROR_NONE;
5883 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005884 case XML_TOK_ENTITY_REF:
5885 case XML_TOK_DATA_CHARS:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005886 if (! poolAppend(pool, enc, entityTextPtr, next)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005887 result = XML_ERROR_NO_MEMORY;
5888 goto endEntityValue;
5889 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005890 break;
5891 case XML_TOK_TRAILING_CR:
5892 next = entityTextPtr + enc->minBytesPerChar;
5893 /* fall through */
5894 case XML_TOK_DATA_NEWLINE:
Benjamin Peterson52b94082019-09-25 21:33:58 -07005895 if (pool->end == pool->ptr && ! poolGrow(pool)) {
5896 result = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005897 goto endEntityValue;
5898 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005899 *(pool->ptr)++ = 0xA;
5900 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005901 case XML_TOK_CHAR_REF: {
5902 XML_Char buf[XML_ENCODE_MAX];
5903 int i;
5904 int n = XmlCharRefNumber(enc, entityTextPtr);
5905 if (n < 0) {
5906 if (enc == parser->m_encoding)
5907 parser->m_eventPtr = entityTextPtr;
5908 result = XML_ERROR_BAD_CHAR_REF;
5909 goto endEntityValue;
5910 }
5911 n = XmlEncode(n, (ICHAR *)buf);
5912 /* The XmlEncode() functions can never return 0 here. That
5913 * error return happens if the code point passed in is either
5914 * negative or greater than or equal to 0x110000. The
5915 * XmlCharRefNumber() functions will all return a number
5916 * strictly less than 0x110000 or a negative value if an error
5917 * occurred. The negative value is intercepted above, so
5918 * XmlEncode() is never passed a value it might return an
5919 * error for.
5920 */
5921 for (i = 0; i < n; i++) {
5922 if (pool->end == pool->ptr && ! poolGrow(pool)) {
5923 result = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005924 goto endEntityValue;
5925 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005926 *(pool->ptr)++ = buf[i];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005927 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07005928 } break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005929 case XML_TOK_PARTIAL:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005930 if (enc == parser->m_encoding)
5931 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005932 result = XML_ERROR_INVALID_TOKEN;
5933 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005934 case XML_TOK_INVALID:
Benjamin Peterson4e211002018-06-26 19:25:45 -07005935 if (enc == parser->m_encoding)
5936 parser->m_eventPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005937 result = XML_ERROR_INVALID_TOKEN;
5938 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005939 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005940 /* This default case should be unnecessary -- all the tokens
5941 * that XmlEntityValueTok() can return have their own explicit
5942 * cases -- but should be retained for safety. We do however
5943 * exclude it from the coverage statistics.
5944 *
5945 * LCOV_EXCL_START
5946 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07005947 if (enc == parser->m_encoding)
5948 parser->m_eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005949 result = XML_ERROR_UNEXPECTED_STATE;
5950 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005951 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005952 }
5953 entityTextPtr = next;
5954 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005955endEntityValue:
5956#ifdef XML_DTD
Benjamin Peterson4e211002018-06-26 19:25:45 -07005957 parser->m_prologState.inEntityValue = oldInEntityValue;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005958#endif /* XML_DTD */
5959 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005960}
5961
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005962static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07005963normalizeLines(XML_Char *s) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005964 XML_Char *p;
5965 for (;; s++) {
5966 if (*s == XML_T('\0'))
5967 return;
5968 if (*s == 0xD)
5969 break;
5970 }
5971 p = s;
5972 do {
5973 if (*s == 0xD) {
5974 *p++ = 0xA;
5975 if (*++s == 0xA)
5976 s++;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005977 } else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005978 *p++ = *s++;
5979 } while (*s);
5980 *p = XML_T('\0');
5981}
5982
5983static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005984reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
Benjamin Peterson52b94082019-09-25 21:33:58 -07005985 const char *start, const char *end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005986 const XML_Char *target;
5987 XML_Char *data;
5988 const char *tem;
Benjamin Peterson52b94082019-09-25 21:33:58 -07005989 if (! parser->m_processingInstructionHandler) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07005990 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005991 reportDefault(parser, enc, start, end);
5992 return 1;
5993 }
5994 start += enc->minBytesPerChar * 2;
5995 tem = start + XmlNameLength(enc, start);
Benjamin Peterson4e211002018-06-26 19:25:45 -07005996 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
Benjamin Peterson52b94082019-09-25 21:33:58 -07005997 if (! target)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005998 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07005999 poolFinish(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006000 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6001 end - enc->minBytesPerChar * 2);
6002 if (! data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006003 return 0;
6004 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07006005 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6006 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006007 return 1;
6008}
6009
6010static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07006011reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6012 const char *end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006013 XML_Char *data;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006014 if (! parser->m_commentHandler) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006015 if (parser->m_defaultHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006016 reportDefault(parser, enc, start, end);
6017 return 1;
6018 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006019 data = poolStoreString(&parser->m_tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006020 start + enc->minBytesPerChar * 4,
6021 end - enc->minBytesPerChar * 3);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006022 if (! data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006023 return 0;
6024 normalizeLines(data);
Benjamin Peterson4e211002018-06-26 19:25:45 -07006025 parser->m_commentHandler(parser->m_handlerArg, data);
6026 poolClear(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006027 return 1;
6028}
6029
6030static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006031reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6032 const char *end) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006033 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006034 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006035 const char **eventPP;
6036 const char **eventEndPP;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006037 if (enc == parser->m_encoding) {
6038 eventPP = &parser->m_eventPtr;
6039 eventEndPP = &parser->m_eventEndPtr;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006040 } else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006041 /* To get here, two things must be true; the parser must be
6042 * using a character encoding that is not the same as the
6043 * encoding passed in, and the encoding passed in must need
6044 * conversion to the internal format (UTF-8 unless XML_UNICODE
6045 * is defined). The only occasions on which the encoding passed
6046 * in is not the same as the parser's encoding are when it is
6047 * the internal encoding (e.g. a previously defined parameter
6048 * entity, already converted to internal format). This by
6049 * definition doesn't need conversion, so the whole branch never
6050 * gets executed.
6051 *
6052 * For safety's sake we don't delete these lines and merely
6053 * exclude them from coverage statistics.
6054 *
6055 * LCOV_EXCL_START
6056 */
Benjamin Peterson4e211002018-06-26 19:25:45 -07006057 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6058 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02006059 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006060 }
6061 do {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006062 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006063 convert_res
6064 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006065 *eventEndPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006066 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6067 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006068 *eventPP = s;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006069 } while ((convert_res != XML_CONVERT_COMPLETED)
6070 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6071 } else
6072 parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
6073 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006074}
6075
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006076static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006077defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
Benjamin Peterson52b94082019-09-25 21:33:58 -07006078 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006079 DEFAULT_ATTRIBUTE *att;
6080 if (value || isId) {
6081 /* The handling of default attributes gets messed up if we have
6082 a default which duplicates a non-default. */
6083 int i;
6084 for (i = 0; i < type->nDefaultAtts; i++)
6085 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006086 return 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006087 if (isId && ! type->idAtt && ! attId->xmlns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006088 type->idAtt = attId;
6089 }
6090 if (type->nDefaultAtts == type->allocDefaultAtts) {
6091 if (type->allocDefaultAtts == 0) {
6092 type->allocDefaultAtts = 8;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006093 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6094 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6095 if (! type->defaultAtts) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07006096 type->allocDefaultAtts = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006097 return 0;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006098 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006099 } else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006100 DEFAULT_ATTRIBUTE *temp;
6101 int count = type->allocDefaultAtts * 2;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006102 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6103 (count * sizeof(DEFAULT_ATTRIBUTE)));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006104 if (temp == NULL)
6105 return 0;
6106 type->allocDefaultAtts = count;
6107 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006108 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006109 }
6110 att = type->defaultAtts + type->nDefaultAtts;
6111 att->id = attId;
6112 att->value = value;
6113 att->isCdata = isCdata;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006114 if (! isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006115 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006116 type->nDefaultAtts += 1;
6117 return 1;
6118}
6119
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006120static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07006121setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6122 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006123 const XML_Char *name;
6124 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006125 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006126 PREFIX *prefix;
6127 const XML_Char *s;
6128 for (s = elementType->name; s != name; s++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006129 if (! poolAppendChar(&dtd->pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006130 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006131 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006132 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006133 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006134 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006135 sizeof(PREFIX));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006136 if (! prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006137 return 0;
6138 if (prefix->name == poolStart(&dtd->pool))
6139 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006140 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006141 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006142 elementType->prefix = prefix;
Benjamin Peterson3b03b092019-06-27 20:54:44 -07006143 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006144 }
6145 }
6146 return 1;
6147}
6148
6149static ATTRIBUTE_ID *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006150getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6151 const char *end) {
6152 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006153 ATTRIBUTE_ID *id;
6154 const XML_Char *name;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006155 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006156 return NULL;
6157 name = poolStoreString(&dtd->pool, enc, start, end);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006158 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006159 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006160 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006161 ++name;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006162 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6163 sizeof(ATTRIBUTE_ID));
6164 if (! id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006165 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006166 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006167 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006168 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006169 poolFinish(&dtd->pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006170 if (! parser->m_ns)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006171 ;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006172 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6173 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6174 && name[4] == XML_T(ASCII_s)
6175 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006176 if (name[5] == XML_T('\0'))
6177 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006178 else
Benjamin Peterson52b94082019-09-25 21:33:58 -07006179 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6180 sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006181 id->xmlns = XML_TRUE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006182 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006183 int i;
6184 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006185 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006186 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006187 int j;
6188 for (j = 0; j < i; j++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006189 if (! poolAppendChar(&dtd->pool, name[j]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006190 return NULL;
6191 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006192 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006193 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006194 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6195 poolStart(&dtd->pool), sizeof(PREFIX));
6196 if (! id->prefix)
Benjamin Peterson196d7db2016-06-11 13:28:56 -07006197 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006198 if (id->prefix->name == poolStart(&dtd->pool))
6199 poolFinish(&dtd->pool);
6200 else
6201 poolDiscard(&dtd->pool);
6202 break;
6203 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006204 }
6205 }
6206 }
6207 return id;
6208}
6209
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006210#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006211
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006212static const XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006213getContext(XML_Parser parser) {
6214 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006215 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006216 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006217
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006218 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006219 int i;
6220 int len;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006221 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006222 return NULL;
6223 len = dtd->defaultPrefix.binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006224 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006225 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006226 for (i = 0; i < len; i++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006227 if (! poolAppendChar(&parser->m_tempPool,
6228 dtd->defaultPrefix.binding->uri[i])) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006229 /* Because of memory caching, I don't believe this line can be
6230 * executed.
6231 *
6232 * This is part of a loop copying the default prefix binding
6233 * URI into the parser's temporary string pool. Previously,
6234 * that URI was copied into the same string pool, with a
6235 * terminating NUL character, as part of setContext(). When
6236 * the pool was cleared, that leaves a block definitely big
6237 * enough to hold the URI on the free block list of the pool.
6238 * The URI copy in getContext() therefore cannot run out of
6239 * memory.
6240 *
6241 * If the pool is used between the setContext() and
6242 * getContext() calls, the worst it can do is leave a bigger
6243 * block on the front of the free list. Given that this is
6244 * all somewhat inobvious and program logic can be changed, we
6245 * don't delete the line but we do exclude it from the test
6246 * coverage statistics.
6247 */
6248 return NULL; /* LCOV_EXCL_LINE */
6249 }
6250 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006251 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006252 }
6253
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006254 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006255 for (;;) {
6256 int i;
6257 int len;
6258 const XML_Char *s;
6259 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006260 if (! prefix)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006261 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006262 if (! prefix->binding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006263 /* This test appears to be (justifiable) paranoia. There does
6264 * not seem to be a way of injecting a prefix without a binding
6265 * that doesn't get errored long before this function is called.
6266 * The test should remain for safety's sake, so we instead
6267 * exclude the following line from the coverage statistics.
6268 */
6269 continue; /* LCOV_EXCL_LINE */
6270 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006271 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006272 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006273 for (s = prefix->name; *s; s++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006274 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006275 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006276 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006277 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006278 len = prefix->binding->uriLen;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006279 if (parser->m_namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006280 len--;
6281 for (i = 0; i < len; i++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006282 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006283 return NULL;
6284 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006285 }
6286
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006287 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006288 for (;;) {
6289 const XML_Char *s;
6290 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006291 if (! e)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006292 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006293 if (! e->open)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006294 continue;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006295 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006296 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006297 for (s = e->name; *s; s++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006298 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006299 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006300 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301 }
6302
Benjamin Peterson52b94082019-09-25 21:33:58 -07006303 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006304 return NULL;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006305 return parser->m_tempPool.start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006306}
6307
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006308static XML_Bool
Benjamin Peterson52b94082019-09-25 21:33:58 -07006309setContext(XML_Parser parser, const XML_Char *context) {
6310 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006311 const XML_Char *s = context;
6312
6313 while (*context != XML_T('\0')) {
6314 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6315 ENTITY *e;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006316 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006317 return XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006318 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
6319 poolStart(&parser->m_tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006320 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006321 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006322 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006323 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006324 context = s;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006325 poolDiscard(&parser->m_tempPool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006326 } else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006327 PREFIX *prefix;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006328 if (poolLength(&parser->m_tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006329 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006330 else {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006331 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006332 return XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006333 prefix
6334 = (PREFIX *)lookup(parser, &dtd->prefixes,
6335 poolStart(&parser->m_tempPool), sizeof(PREFIX));
6336 if (! prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006337 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006338 if (prefix->name == poolStart(&parser->m_tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006339 prefix->name = poolCopyString(&dtd->pool, prefix->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006340 if (! prefix->name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006341 return XML_FALSE;
6342 }
Benjamin Peterson4e211002018-06-26 19:25:45 -07006343 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006344 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006345 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006346 context++)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006347 if (! poolAppendChar(&parser->m_tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006348 return XML_FALSE;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006349 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006350 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006351 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
Benjamin Peterson52b94082019-09-25 21:33:58 -07006352 &parser->m_inheritedBindings)
6353 != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006354 return XML_FALSE;
Benjamin Peterson4e211002018-06-26 19:25:45 -07006355 poolDiscard(&parser->m_tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006356 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006357 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006358 s = context;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006359 } else {
6360 if (! poolAppendChar(&parser->m_tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006361 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006362 s++;
6363 }
6364 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006365 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006366}
6367
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006368static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006369normalizePublicId(XML_Char *publicId) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006370 XML_Char *p = publicId;
6371 XML_Char *s;
6372 for (s = publicId; *s; s++) {
6373 switch (*s) {
6374 case 0x20:
6375 case 0xD:
6376 case 0xA:
6377 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006378 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006379 break;
6380 default:
6381 *p++ = *s;
6382 }
6383 }
6384 if (p != publicId && p[-1] == 0x20)
6385 --p;
6386 *p = XML_T('\0');
6387}
6388
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006389static DTD *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006390dtdCreate(const XML_Memory_Handling_Suite *ms) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006391 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6392 if (p == NULL)
6393 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006394 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006395 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006396 hashTableInit(&(p->generalEntities), ms);
6397 hashTableInit(&(p->elementTypes), ms);
6398 hashTableInit(&(p->attributeIds), ms);
6399 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006400#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006401 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006402 hashTableInit(&(p->paramEntities), ms);
6403#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006404 p->defaultPrefix.name = NULL;
6405 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006406
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006407 p->in_eldecl = XML_FALSE;
6408 p->scaffIndex = NULL;
6409 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006410 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006411 p->scaffSize = 0;
6412 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006413 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006414
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006415 p->keepProcessing = XML_TRUE;
6416 p->hasParamEntityRefs = XML_FALSE;
6417 p->standalone = XML_FALSE;
6418 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006419}
6420
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006421static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006422dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006423 HASH_TABLE_ITER iter;
6424 hashTableIterInit(&iter, &(p->elementTypes));
6425 for (;;) {
6426 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006427 if (! e)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006428 break;
6429 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006430 ms->free_fcn(e->defaultAtts);
6431 }
6432 hashTableClear(&(p->generalEntities));
6433#ifdef XML_DTD
6434 p->paramEntityRead = XML_FALSE;
6435 hashTableClear(&(p->paramEntities));
6436#endif /* XML_DTD */
6437 hashTableClear(&(p->elementTypes));
6438 hashTableClear(&(p->attributeIds));
6439 hashTableClear(&(p->prefixes));
6440 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006441 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006442 p->defaultPrefix.name = NULL;
6443 p->defaultPrefix.binding = NULL;
6444
6445 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006446
6447 ms->free_fcn(p->scaffIndex);
6448 p->scaffIndex = NULL;
6449 ms->free_fcn(p->scaffold);
6450 p->scaffold = NULL;
6451
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006452 p->scaffLevel = 0;
6453 p->scaffSize = 0;
6454 p->scaffCount = 0;
6455 p->contentStringLen = 0;
6456
6457 p->keepProcessing = XML_TRUE;
6458 p->hasParamEntityRefs = XML_FALSE;
6459 p->standalone = XML_FALSE;
6460}
6461
6462static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006463dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006464 HASH_TABLE_ITER iter;
6465 hashTableIterInit(&iter, &(p->elementTypes));
6466 for (;;) {
6467 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006468 if (! e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006469 break;
6470 if (e->allocDefaultAtts != 0)
6471 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006472 }
6473 hashTableDestroy(&(p->generalEntities));
6474#ifdef XML_DTD
6475 hashTableDestroy(&(p->paramEntities));
6476#endif /* XML_DTD */
6477 hashTableDestroy(&(p->elementTypes));
6478 hashTableDestroy(&(p->attributeIds));
6479 hashTableDestroy(&(p->prefixes));
6480 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006481 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006482 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006483 ms->free_fcn(p->scaffIndex);
6484 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006485 }
6486 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006487}
6488
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006489/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6490 The new DTD has already been initialized.
6491*/
6492static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07006493dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
6494 const XML_Memory_Handling_Suite *ms) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006495 HASH_TABLE_ITER iter;
6496
6497 /* Copy the prefix table. */
6498
6499 hashTableIterInit(&iter, &(oldDtd->prefixes));
6500 for (;;) {
6501 const XML_Char *name;
6502 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006503 if (! oldP)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006504 break;
6505 name = poolCopyString(&(newDtd->pool), oldP->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006506 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006507 return 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006508 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006509 return 0;
6510 }
6511
6512 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6513
6514 /* Copy the attribute id table. */
6515
6516 for (;;) {
6517 ATTRIBUTE_ID *newA;
6518 const XML_Char *name;
6519 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6520
Benjamin Peterson52b94082019-09-25 21:33:58 -07006521 if (! oldA)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006522 break;
6523 /* Remember to allocate the scratch byte before the name. */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006524 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006525 return 0;
6526 name = poolCopyString(&(newDtd->pool), oldA->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006527 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006528 return 0;
6529 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006530 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006531 sizeof(ATTRIBUTE_ID));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006532 if (! newA)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006533 return 0;
6534 newA->maybeTokenized = oldA->maybeTokenized;
6535 if (oldA->prefix) {
6536 newA->xmlns = oldA->xmlns;
6537 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006538 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006539 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006540 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006541 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006542 }
6543 }
6544
6545 /* Copy the element type table. */
6546
6547 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6548
6549 for (;;) {
6550 int i;
6551 ELEMENT_TYPE *newE;
6552 const XML_Char *name;
6553 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006554 if (! oldE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006555 break;
6556 name = poolCopyString(&(newDtd->pool), oldE->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006557 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006558 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006559 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006560 sizeof(ELEMENT_TYPE));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006561 if (! newE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006562 return 0;
6563 if (oldE->nDefaultAtts) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006564 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
6565 oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6566 if (! newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006567 return 0;
6568 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006569 }
6570 if (oldE->idAtt)
Benjamin Peterson52b94082019-09-25 21:33:58 -07006571 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
6572 oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006573 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6574 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006575 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006576 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006577 for (i = 0; i < newE->nDefaultAtts; i++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006578 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
6579 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006580 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6581 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006582 newE->defaultAtts[i].value
6583 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006584 if (! newE->defaultAtts[i].value)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006585 return 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006586 } else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006587 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006588 }
6589 }
6590
6591 /* Copy the entity tables. */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006592 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
6593 &(oldDtd->generalEntities)))
6594 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006595
6596#ifdef XML_DTD
Benjamin Peterson52b94082019-09-25 21:33:58 -07006597 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
6598 &(oldDtd->paramEntities)))
6599 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006600 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006601#endif /* XML_DTD */
6602
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006603 newDtd->keepProcessing = oldDtd->keepProcessing;
6604 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006605 newDtd->standalone = oldDtd->standalone;
6606
6607 /* Don't want deep copying for scaffolding */
6608 newDtd->in_eldecl = oldDtd->in_eldecl;
6609 newDtd->scaffold = oldDtd->scaffold;
6610 newDtd->contentStringLen = oldDtd->contentStringLen;
6611 newDtd->scaffSize = oldDtd->scaffSize;
6612 newDtd->scaffLevel = oldDtd->scaffLevel;
6613 newDtd->scaffIndex = oldDtd->scaffIndex;
6614
6615 return 1;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006616} /* End dtdCopy */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006617
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006618static int
Benjamin Peterson52b94082019-09-25 21:33:58 -07006619copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
6620 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006621 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006622 const XML_Char *cachedOldBase = NULL;
6623 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006624
6625 hashTableIterInit(&iter, oldTable);
6626
6627 for (;;) {
6628 ENTITY *newE;
6629 const XML_Char *name;
6630 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006631 if (! oldE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006632 break;
6633 name = poolCopyString(newPool, oldE->name);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006634 if (! name)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006635 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006636 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Benjamin Peterson52b94082019-09-25 21:33:58 -07006637 if (! newE)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006638 return 0;
6639 if (oldE->systemId) {
6640 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006641 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006642 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006643 newE->systemId = tem;
6644 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006645 if (oldE->base == cachedOldBase)
6646 newE->base = cachedNewBase;
6647 else {
6648 cachedOldBase = oldE->base;
6649 tem = poolCopyString(newPool, cachedOldBase);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006650 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006651 return 0;
6652 cachedNewBase = newE->base = tem;
6653 }
6654 }
6655 if (oldE->publicId) {
6656 tem = poolCopyString(newPool, oldE->publicId);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006657 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006658 return 0;
6659 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006660 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006661 } else {
6662 const XML_Char *tem
6663 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
6664 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006665 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006666 newE->textPtr = tem;
6667 newE->textLen = oldE->textLen;
6668 }
6669 if (oldE->notation) {
6670 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006671 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006672 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006673 newE->notation = tem;
6674 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006675 newE->is_param = oldE->is_param;
6676 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006677 }
6678 return 1;
6679}
6680
Fred Drake08317ae2003-10-21 15:38:55 +00006681#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006682
Fred Drake08317ae2003-10-21 15:38:55 +00006683static XML_Bool FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006684keyeq(KEY s1, KEY s2) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006685 for (; *s1 == *s2; s1++, s2++)
6686 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006687 return XML_TRUE;
6688 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006689}
6690
Victor Stinner5ff71322017-06-21 14:39:22 +02006691static size_t
Benjamin Peterson52b94082019-09-25 21:33:58 -07006692keylen(KEY s) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006693 size_t len = 0;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006694 for (; *s; s++, len++)
6695 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02006696 return len;
6697}
6698
6699static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07006700copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006701 key->k[0] = 0;
6702 key->k[1] = get_hash_secret_salt(parser);
6703}
6704
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006705static unsigned long FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006706hash(XML_Parser parser, KEY s) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006707 struct siphash state;
6708 struct sipkey key;
Victor Stinner5ff71322017-06-21 14:39:22 +02006709 (void)sip24_valid;
6710 copy_salt_to_sipkey(parser, &key);
6711 sip24_init(&state, &key);
6712 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6713 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006714}
6715
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006716static NAMED *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006717lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006718 size_t i;
6719 if (table->size == 0) {
6720 size_t tsize;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006721 if (! createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006722 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006723 table->power = INIT_POWER;
6724 /* table->size is a power of 2 */
6725 table->size = (size_t)1 << INIT_POWER;
6726 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006727 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006728 if (! table->v) {
Fred Drake31d485c2004-08-03 07:06:22 +00006729 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006730 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006731 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006732 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006733 i = hash(parser, name) & ((unsigned long)table->size - 1);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006734 } else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006735 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006736 unsigned long mask = (unsigned long)table->size - 1;
6737 unsigned char step = 0;
6738 i = h & mask;
6739 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006740 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006741 return table->v[i];
Benjamin Peterson52b94082019-09-25 21:33:58 -07006742 if (! step)
Fred Drake08317ae2003-10-21 15:38:55 +00006743 step = PROBE_STEP(h, mask, table->power);
6744 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006745 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07006746 if (! createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006747 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006748
6749 /* check for overflow (table is half full) */
6750 if (table->used >> (table->power - 1)) {
6751 unsigned char newPower = table->power + 1;
6752 size_t newSize = (size_t)1 << newPower;
6753 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006754 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006755 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006756 if (! newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006757 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006758 memset(newV, 0, tsize);
6759 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006760 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006761 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006762 size_t j = newHash & newMask;
6763 step = 0;
6764 while (newV[j]) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006765 if (! step)
Fred Drake08317ae2003-10-21 15:38:55 +00006766 step = PROBE_STEP(newHash, newMask, newPower);
6767 j < step ? (j += newSize - step) : (j -= step);
6768 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006769 newV[j] = table->v[i];
6770 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006771 table->mem->free_fcn(table->v);
6772 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006773 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006774 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006775 i = h & newMask;
6776 step = 0;
6777 while (table->v[i]) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006778 if (! step)
Fred Drake08317ae2003-10-21 15:38:55 +00006779 step = PROBE_STEP(h, newMask, newPower);
6780 i < step ? (i += newSize - step) : (i -= step);
6781 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006782 }
6783 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006784 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07006785 if (! table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006786 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006787 memset(table->v[i], 0, createSize);
6788 table->v[i]->name = name;
6789 (table->used)++;
6790 return table->v[i];
6791}
6792
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006793static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006794hashTableClear(HASH_TABLE *table) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006795 size_t i;
6796 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006797 table->mem->free_fcn(table->v[i]);
6798 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006799 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006800 table->used = 0;
6801}
6802
6803static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006804hashTableDestroy(HASH_TABLE *table) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006805 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006806 for (i = 0; i < table->size; i++)
6807 table->mem->free_fcn(table->v[i]);
6808 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006809}
6810
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006811static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006812hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
Fred Drake08317ae2003-10-21 15:38:55 +00006813 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006814 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006815 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006816 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006817 p->mem = ms;
6818}
6819
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006820static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006821hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006822 iter->p = table->v;
Miss Islington (bot)27067852021-08-29 07:32:50 -07006823 iter->end = iter->p ? iter->p + table->size : NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006824}
6825
Benjamin Peterson52b94082019-09-25 21:33:58 -07006826static NAMED *FASTCALL
6827hashTableIterNext(HASH_TABLE_ITER *iter) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006828 while (iter->p != iter->end) {
6829 NAMED *tem = *(iter->p)++;
6830 if (tem)
6831 return tem;
6832 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006833 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006834}
6835
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006836static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006837poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006838 pool->blocks = NULL;
6839 pool->freeBlocks = NULL;
6840 pool->start = NULL;
6841 pool->ptr = NULL;
6842 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006843 pool->mem = ms;
6844}
6845
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006846static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006847poolClear(STRING_POOL *pool) {
6848 if (! pool->freeBlocks)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006849 pool->freeBlocks = pool->blocks;
6850 else {
6851 BLOCK *p = pool->blocks;
6852 while (p) {
6853 BLOCK *tem = p->next;
6854 p->next = pool->freeBlocks;
6855 pool->freeBlocks = p;
6856 p = tem;
6857 }
6858 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006859 pool->blocks = NULL;
6860 pool->start = NULL;
6861 pool->ptr = NULL;
6862 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006863}
6864
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006865static void FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006866poolDestroy(STRING_POOL *pool) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006867 BLOCK *p = pool->blocks;
6868 while (p) {
6869 BLOCK *tem = p->next;
6870 pool->mem->free_fcn(p);
6871 p = tem;
6872 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006873 p = pool->freeBlocks;
6874 while (p) {
6875 BLOCK *tem = p->next;
6876 pool->mem->free_fcn(p);
6877 p = tem;
6878 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006879}
6880
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006881static XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006882poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
6883 const char *end) {
6884 if (! pool->ptr && ! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006885 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006886 for (;;) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006887 const enum XML_Convert_Result convert_res = XmlConvert(
6888 enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6889 if ((convert_res == XML_CONVERT_COMPLETED)
6890 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006891 break;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006892 if (! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006893 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006894 }
6895 return pool->start;
6896}
6897
Benjamin Peterson52b94082019-09-25 21:33:58 -07006898static const XML_Char *FASTCALL
6899poolCopyString(STRING_POOL *pool, const XML_Char *s) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006900 do {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006901 if (! poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006902 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006903 } while (*s++);
6904 s = pool->start;
6905 poolFinish(pool);
6906 return s;
6907}
6908
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006909static const XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006910poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
6911 if (! pool->ptr && ! poolGrow(pool)) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006912 /* The following line is unreachable given the current usage of
6913 * poolCopyStringN(). Currently it is called from exactly one
6914 * place to copy the text of a simple general entity. By that
6915 * point, the name of the entity is already stored in the pool, so
6916 * pool->ptr cannot be NULL.
6917 *
6918 * If poolCopyStringN() is used elsewhere as it well might be,
6919 * this line may well become executable again. Regardless, this
6920 * sort of check shouldn't be removed lightly, so we just exclude
6921 * it from the coverage statistics.
6922 */
6923 return NULL; /* LCOV_EXCL_LINE */
6924 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006925 for (; n > 0; --n, s++) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006926 if (! poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006927 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006928 }
6929 s = pool->start;
6930 poolFinish(pool);
6931 return s;
6932}
6933
Benjamin Peterson52b94082019-09-25 21:33:58 -07006934static const XML_Char *FASTCALL
6935poolAppendString(STRING_POOL *pool, const XML_Char *s) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006936 while (*s) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07006937 if (! poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006938 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006939 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006940 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006941 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006942}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006943
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006944static XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07006945poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
6946 const char *end) {
6947 if (! poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006948 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006949 if (pool->ptr == pool->end && ! poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006950 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006951 *(pool->ptr)++ = 0;
6952 return pool->start;
6953}
6954
Victor Stinner5ff71322017-06-21 14:39:22 +02006955static size_t
Benjamin Peterson52b94082019-09-25 21:33:58 -07006956poolBytesToAllocateFor(int blockSize) {
Victor Stinner5ff71322017-06-21 14:39:22 +02006957 /* Unprotected math would be:
6958 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6959 **
6960 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6961 ** For a + b * c we check b * c in isolation first, so that addition of a
6962 ** on top has no chance of making us accept a small non-negative number
6963 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07006964 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
Victor Stinner5ff71322017-06-21 14:39:22 +02006965
6966 if (blockSize <= 0)
6967 return 0;
6968
6969 if (blockSize > (int)(INT_MAX / stretch))
6970 return 0;
6971
6972 {
6973 const int stretchedBlockSize = blockSize * (int)stretch;
Benjamin Peterson52b94082019-09-25 21:33:58 -07006974 const int bytesToAllocate
6975 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
Victor Stinner5ff71322017-06-21 14:39:22 +02006976 if (bytesToAllocate < 0)
6977 return 0;
6978
6979 return (size_t)bytesToAllocate;
6980 }
6981}
6982
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006983static XML_Bool FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07006984poolGrow(STRING_POOL *pool) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006985 if (pool->freeBlocks) {
6986 if (pool->start == 0) {
6987 pool->blocks = pool->freeBlocks;
6988 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006989 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006990 pool->start = pool->blocks->s;
6991 pool->end = pool->start + pool->blocks->size;
6992 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006993 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006994 }
6995 if (pool->end - pool->start < pool->freeBlocks->size) {
6996 BLOCK *tem = pool->freeBlocks->next;
6997 pool->freeBlocks->next = pool->blocks;
6998 pool->blocks = pool->freeBlocks;
6999 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007000 memcpy(pool->blocks->s, pool->start,
7001 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007002 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7003 pool->start = pool->blocks->s;
7004 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007005 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007006 }
7007 }
7008 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02007009 BLOCK *temp;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007010 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02007011 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007012
Benjamin Peterson4e211002018-06-26 19:25:45 -07007013 /* NOTE: Needs to be calculated prior to calling `realloc`
7014 to avoid dangling pointers: */
Victor Stinner93d0cb52017-08-18 23:43:54 +02007015 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7016
7017 if (blockSize < 0) {
7018 /* This condition traps a situation where either more than
7019 * INT_MAX/2 bytes have already been allocated. This isn't
7020 * readily testable, since it is unlikely that an average
7021 * machine will have that much memory, so we exclude it from the
7022 * coverage statistics.
7023 */
7024 return XML_FALSE; /* LCOV_EXCL_LINE */
7025 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007026
Victor Stinner5ff71322017-06-21 14:39:22 +02007027 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7028 if (bytesToAllocate == 0)
7029 return XML_FALSE;
7030
Benjamin Peterson52b94082019-09-25 21:33:58 -07007031 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7032 (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007033 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007034 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007035 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007036 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007037 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007038 pool->start = pool->blocks->s;
7039 pool->end = pool->start + blockSize;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007040 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007041 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00007042 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02007043 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007044
Victor Stinner93d0cb52017-08-18 23:43:54 +02007045 if (blockSize < 0) {
7046 /* This condition traps a situation where either more than
7047 * INT_MAX bytes have already been allocated (which is prevented
7048 * by various pieces of program logic, not least this one, never
7049 * mind the unlikelihood of actually having that much memory) or
7050 * the pool control fields have been corrupted (which could
7051 * conceivably happen in an extremely buggy user handler
7052 * function). Either way it isn't readily testable, so we
7053 * exclude it from the coverage statistics.
7054 */
Benjamin Peterson52b94082019-09-25 21:33:58 -07007055 return XML_FALSE; /* LCOV_EXCL_LINE */
Victor Stinner93d0cb52017-08-18 23:43:54 +02007056 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007057
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007058 if (blockSize < INIT_BLOCK_SIZE)
7059 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02007060 else {
7061 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7062 if ((int)((unsigned)blockSize * 2U) < 0) {
7063 return XML_FALSE;
7064 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007065 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02007066 }
7067
7068 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7069 if (bytesToAllocate == 0)
7070 return XML_FALSE;
7071
7072 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Benjamin Peterson52b94082019-09-25 21:33:58 -07007073 if (! tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007074 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007075 tem->size = blockSize;
7076 tem->next = pool->blocks;
7077 pool->blocks = tem;
7078 if (pool->ptr != pool->start)
Benjamin Peterson52b94082019-09-25 21:33:58 -07007079 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007080 pool->ptr = tem->s + (pool->ptr - pool->start);
7081 pool->start = tem->s;
7082 pool->end = tem->s + blockSize;
7083 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007084 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007085}
7086
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007087static int FASTCALL
Benjamin Peterson52b94082019-09-25 21:33:58 -07007088nextScaffoldPart(XML_Parser parser) {
7089 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7090 CONTENT_SCAFFOLD *me;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007091 int next;
7092
Benjamin Peterson52b94082019-09-25 21:33:58 -07007093 if (! dtd->scaffIndex) {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007094 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
Benjamin Peterson52b94082019-09-25 21:33:58 -07007095 if (! dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007096 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007097 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007098 }
7099
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007100 if (dtd->scaffCount >= dtd->scaffSize) {
7101 CONTENT_SCAFFOLD *temp;
7102 if (dtd->scaffold) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07007103 temp = (CONTENT_SCAFFOLD *)REALLOC(
7104 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007105 if (temp == NULL)
7106 return -1;
7107 dtd->scaffSize *= 2;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007108 } else {
Benjamin Peterson4e211002018-06-26 19:25:45 -07007109 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
Benjamin Peterson52b94082019-09-25 21:33:58 -07007110 * sizeof(CONTENT_SCAFFOLD));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007111 if (temp == NULL)
7112 return -1;
7113 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007114 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007115 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007116 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007117 next = dtd->scaffCount++;
7118 me = &dtd->scaffold[next];
7119 if (dtd->scaffLevel) {
Benjamin Peterson52b94082019-09-25 21:33:58 -07007120 CONTENT_SCAFFOLD *parent
7121 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007122 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007123 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007124 }
Benjamin Peterson52b94082019-09-25 21:33:58 -07007125 if (! parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007126 parent->firstchild = next;
7127 parent->lastchild = next;
7128 parent->childcnt++;
7129 }
7130 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7131 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007132}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007133
7134static void
Benjamin Peterson52b94082019-09-25 21:33:58 -07007135build_node(XML_Parser parser, int src_node, XML_Content *dest,
7136 XML_Content **contpos, XML_Char **strpos) {
7137 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007138 dest->type = dtd->scaffold[src_node].type;
7139 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007140 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007141 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007142 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007143 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007144 for (;;) {
7145 *(*strpos)++ = *src;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007146 if (! *src)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007147 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007148 src++;
7149 }
7150 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007151 dest->children = NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007152 } else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007153 unsigned int i;
7154 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007155 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007156 dest->children = *contpos;
7157 *contpos += dest->numchildren;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007158 for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007159 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007160 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7161 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007162 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007163 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007164}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007165
7166static XML_Content *
Benjamin Peterson52b94082019-09-25 21:33:58 -07007167build_model(XML_Parser parser) {
7168 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007169 XML_Content *ret;
7170 XML_Content *cpos;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007171 XML_Char *str;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007172 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7173 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007174
Benjamin Peterson4e211002018-06-26 19:25:45 -07007175 ret = (XML_Content *)MALLOC(parser, allocsize);
Benjamin Peterson52b94082019-09-25 21:33:58 -07007176 if (! ret)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007177 return NULL;
7178
Benjamin Peterson52b94082019-09-25 21:33:58 -07007179 str = (XML_Char *)(&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007180 cpos = &ret[1];
7181
7182 build_node(parser, 0, ret, &cpos, &str);
7183 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007184}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007185
7186static ELEMENT_TYPE *
Benjamin Peterson52b94082019-09-25 21:33:58 -07007187getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7188 const char *end) {
7189 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007190 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007191 ELEMENT_TYPE *ret;
7192
Benjamin Peterson52b94082019-09-25 21:33:58 -07007193 if (! name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007194 return NULL;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007195 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
7196 sizeof(ELEMENT_TYPE));
7197 if (! ret)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007198 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007199 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007200 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007201 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007202 poolFinish(&dtd->pool);
Benjamin Peterson52b94082019-09-25 21:33:58 -07007203 if (! setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007204 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007205 }
7206 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007207}
Victor Stinner93d0cb52017-08-18 23:43:54 +02007208
7209static XML_Char *
Benjamin Peterson52b94082019-09-25 21:33:58 -07007210copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
7211 int charsRequired = 0;
7212 XML_Char *result;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007213
Benjamin Peterson52b94082019-09-25 21:33:58 -07007214 /* First determine how long the string is */
7215 while (s[charsRequired] != 0) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02007216 charsRequired++;
Benjamin Peterson52b94082019-09-25 21:33:58 -07007217 }
7218 /* Include the terminator */
7219 charsRequired++;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007220
Benjamin Peterson52b94082019-09-25 21:33:58 -07007221 /* Now allocate space for the copy */
7222 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7223 if (result == NULL)
7224 return NULL;
7225 /* Copy the original into place */
7226 memcpy(result, s, charsRequired * sizeof(XML_Char));
7227 return result;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007228}
Miss Islington (bot)27067852021-08-29 07:32:50 -07007229
7230#ifdef XML_DTD
7231
7232static float
7233accountingGetCurrentAmplification(XML_Parser rootParser) {
7234 const XmlBigCount countBytesOutput
7235 = rootParser->m_accounting.countBytesDirect
7236 + rootParser->m_accounting.countBytesIndirect;
7237 const float amplificationFactor
7238 = rootParser->m_accounting.countBytesDirect
7239 ? (countBytesOutput
7240 / (float)(rootParser->m_accounting.countBytesDirect))
7241 : 1.0f;
7242 assert(! rootParser->m_parentParser);
7243 return amplificationFactor;
7244}
7245
7246static void
7247accountingReportStats(XML_Parser originParser, const char *epilog) {
7248 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7249 assert(! rootParser->m_parentParser);
7250
7251 if (rootParser->m_accounting.debugLevel < 1) {
7252 return;
7253 }
7254
7255 const float amplificationFactor
7256 = accountingGetCurrentAmplification(rootParser);
7257 fprintf(stderr,
7258 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
7259 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
7260 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
7261 rootParser->m_accounting.countBytesIndirect,
7262 (double)amplificationFactor, epilog);
7263}
7264
7265static void
7266accountingOnAbort(XML_Parser originParser) {
7267 accountingReportStats(originParser, " ABORTING\n");
7268}
7269
7270static void
7271accountingReportDiff(XML_Parser rootParser,
7272 unsigned int levelsAwayFromRootParser, const char *before,
7273 const char *after, ptrdiff_t bytesMore, int source_line,
7274 enum XML_Account account) {
7275 assert(! rootParser->m_parentParser);
7276
7277 fprintf(stderr,
7278 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
7279 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
7280 levelsAwayFromRootParser, source_line, 10, "");
7281
7282 const char ellipis[] = "[..]";
7283 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
7284 const unsigned int contextLength = 10;
7285
7286 /* Note: Performance is of no concern here */
7287 const char *walker = before;
7288 if ((rootParser->m_accounting.debugLevel >= 3)
7289 || (after - before)
7290 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
7291 for (; walker < after; walker++) {
7292 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7293 }
7294 } else {
7295 for (; walker < before + contextLength; walker++) {
7296 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7297 }
7298 fprintf(stderr, ellipis);
7299 walker = after - contextLength;
7300 for (; walker < after; walker++) {
7301 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
7302 }
7303 }
7304 fprintf(stderr, "\"\n");
7305}
7306
7307static XML_Bool
7308accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
7309 const char *after, int source_line,
7310 enum XML_Account account) {
7311 /* Note: We need to check the token type *first* to be sure that
7312 * we can even access variable <after>, safely.
7313 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
7314 switch (tok) {
7315 case XML_TOK_INVALID:
7316 case XML_TOK_PARTIAL:
7317 case XML_TOK_PARTIAL_CHAR:
7318 case XML_TOK_NONE:
7319 return XML_TRUE;
7320 }
7321
7322 if (account == XML_ACCOUNT_NONE)
7323 return XML_TRUE; /* because these bytes have been accounted for, already */
7324
7325 unsigned int levelsAwayFromRootParser;
7326 const XML_Parser rootParser
7327 = getRootParserOf(originParser, &levelsAwayFromRootParser);
7328 assert(! rootParser->m_parentParser);
7329
7330 const int isDirect
7331 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
7332 const ptrdiff_t bytesMore = after - before;
7333
7334 XmlBigCount *const additionTarget
7335 = isDirect ? &rootParser->m_accounting.countBytesDirect
7336 : &rootParser->m_accounting.countBytesIndirect;
7337
7338 /* Detect and avoid integer overflow */
7339 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
7340 return XML_FALSE;
7341 *additionTarget += bytesMore;
7342
7343 const XmlBigCount countBytesOutput
7344 = rootParser->m_accounting.countBytesDirect
7345 + rootParser->m_accounting.countBytesIndirect;
7346 const float amplificationFactor
7347 = accountingGetCurrentAmplification(rootParser);
7348 const XML_Bool tolerated
7349 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
7350 || (amplificationFactor
7351 <= rootParser->m_accounting.maximumAmplificationFactor);
7352
7353 if (rootParser->m_accounting.debugLevel >= 2) {
7354 accountingReportStats(rootParser, "");
7355 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
7356 bytesMore, source_line, account);
7357 }
7358
7359 return tolerated;
7360}
7361
7362unsigned long long
7363testingAccountingGetCountBytesDirect(XML_Parser parser) {
7364 if (! parser)
7365 return 0;
7366 return parser->m_accounting.countBytesDirect;
7367}
7368
7369unsigned long long
7370testingAccountingGetCountBytesIndirect(XML_Parser parser) {
7371 if (! parser)
7372 return 0;
7373 return parser->m_accounting.countBytesIndirect;
7374}
7375
7376static void
7377entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
7378 const char *action, int sourceLine) {
7379 assert(! rootParser->m_parentParser);
7380 if (rootParser->m_entity_stats.debugLevel < 1)
7381 return;
7382
7383# if defined(XML_UNICODE)
7384 const char *const entityName = "[..]";
7385# else
7386 const char *const entityName = entity->name;
7387# endif
7388
7389 fprintf(
7390 stderr,
7391 "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
7392 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
7393 rootParser->m_entity_stats.currentDepth,
7394 rootParser->m_entity_stats.maximumDepthSeen,
7395 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
7396 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
7397 sourceLine);
7398}
7399
7400static void
7401entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7402 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7403 assert(! rootParser->m_parentParser);
7404
7405 rootParser->m_entity_stats.countEverOpened++;
7406 rootParser->m_entity_stats.currentDepth++;
7407 if (rootParser->m_entity_stats.currentDepth
7408 > rootParser->m_entity_stats.maximumDepthSeen) {
7409 rootParser->m_entity_stats.maximumDepthSeen++;
7410 }
7411
7412 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
7413}
7414
7415static void
7416entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
7417 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
7418 assert(! rootParser->m_parentParser);
7419
7420 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
7421 rootParser->m_entity_stats.currentDepth--;
7422}
7423
7424static XML_Parser
7425getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
7426 XML_Parser rootParser = parser;
7427 unsigned int stepsTakenUpwards = 0;
7428 while (rootParser->m_parentParser) {
7429 rootParser = rootParser->m_parentParser;
7430 stepsTakenUpwards++;
7431 }
7432 assert(! rootParser->m_parentParser);
7433 if (outLevelDiff != NULL) {
7434 *outLevelDiff = stepsTakenUpwards;
7435 }
7436 return rootParser;
7437}
7438
7439const char *
7440unsignedCharToPrintable(unsigned char c) {
7441 switch (c) {
7442 case 0:
7443 return "\\0";
7444 case 1:
7445 return "\\x1";
7446 case 2:
7447 return "\\x2";
7448 case 3:
7449 return "\\x3";
7450 case 4:
7451 return "\\x4";
7452 case 5:
7453 return "\\x5";
7454 case 6:
7455 return "\\x6";
7456 case 7:
7457 return "\\x7";
7458 case 8:
7459 return "\\x8";
7460 case 9:
7461 return "\\t";
7462 case 10:
7463 return "\\n";
7464 case 11:
7465 return "\\xB";
7466 case 12:
7467 return "\\xC";
7468 case 13:
7469 return "\\r";
7470 case 14:
7471 return "\\xE";
7472 case 15:
7473 return "\\xF";
7474 case 16:
7475 return "\\x10";
7476 case 17:
7477 return "\\x11";
7478 case 18:
7479 return "\\x12";
7480 case 19:
7481 return "\\x13";
7482 case 20:
7483 return "\\x14";
7484 case 21:
7485 return "\\x15";
7486 case 22:
7487 return "\\x16";
7488 case 23:
7489 return "\\x17";
7490 case 24:
7491 return "\\x18";
7492 case 25:
7493 return "\\x19";
7494 case 26:
7495 return "\\x1A";
7496 case 27:
7497 return "\\x1B";
7498 case 28:
7499 return "\\x1C";
7500 case 29:
7501 return "\\x1D";
7502 case 30:
7503 return "\\x1E";
7504 case 31:
7505 return "\\x1F";
7506 case 32:
7507 return " ";
7508 case 33:
7509 return "!";
7510 case 34:
7511 return "\\\"";
7512 case 35:
7513 return "#";
7514 case 36:
7515 return "$";
7516 case 37:
7517 return "%";
7518 case 38:
7519 return "&";
7520 case 39:
7521 return "'";
7522 case 40:
7523 return "(";
7524 case 41:
7525 return ")";
7526 case 42:
7527 return "*";
7528 case 43:
7529 return "+";
7530 case 44:
7531 return ",";
7532 case 45:
7533 return "-";
7534 case 46:
7535 return ".";
7536 case 47:
7537 return "/";
7538 case 48:
7539 return "0";
7540 case 49:
7541 return "1";
7542 case 50:
7543 return "2";
7544 case 51:
7545 return "3";
7546 case 52:
7547 return "4";
7548 case 53:
7549 return "5";
7550 case 54:
7551 return "6";
7552 case 55:
7553 return "7";
7554 case 56:
7555 return "8";
7556 case 57:
7557 return "9";
7558 case 58:
7559 return ":";
7560 case 59:
7561 return ";";
7562 case 60:
7563 return "<";
7564 case 61:
7565 return "=";
7566 case 62:
7567 return ">";
7568 case 63:
7569 return "?";
7570 case 64:
7571 return "@";
7572 case 65:
7573 return "A";
7574 case 66:
7575 return "B";
7576 case 67:
7577 return "C";
7578 case 68:
7579 return "D";
7580 case 69:
7581 return "E";
7582 case 70:
7583 return "F";
7584 case 71:
7585 return "G";
7586 case 72:
7587 return "H";
7588 case 73:
7589 return "I";
7590 case 74:
7591 return "J";
7592 case 75:
7593 return "K";
7594 case 76:
7595 return "L";
7596 case 77:
7597 return "M";
7598 case 78:
7599 return "N";
7600 case 79:
7601 return "O";
7602 case 80:
7603 return "P";
7604 case 81:
7605 return "Q";
7606 case 82:
7607 return "R";
7608 case 83:
7609 return "S";
7610 case 84:
7611 return "T";
7612 case 85:
7613 return "U";
7614 case 86:
7615 return "V";
7616 case 87:
7617 return "W";
7618 case 88:
7619 return "X";
7620 case 89:
7621 return "Y";
7622 case 90:
7623 return "Z";
7624 case 91:
7625 return "[";
7626 case 92:
7627 return "\\\\";
7628 case 93:
7629 return "]";
7630 case 94:
7631 return "^";
7632 case 95:
7633 return "_";
7634 case 96:
7635 return "`";
7636 case 97:
7637 return "a";
7638 case 98:
7639 return "b";
7640 case 99:
7641 return "c";
7642 case 100:
7643 return "d";
7644 case 101:
7645 return "e";
7646 case 102:
7647 return "f";
7648 case 103:
7649 return "g";
7650 case 104:
7651 return "h";
7652 case 105:
7653 return "i";
7654 case 106:
7655 return "j";
7656 case 107:
7657 return "k";
7658 case 108:
7659 return "l";
7660 case 109:
7661 return "m";
7662 case 110:
7663 return "n";
7664 case 111:
7665 return "o";
7666 case 112:
7667 return "p";
7668 case 113:
7669 return "q";
7670 case 114:
7671 return "r";
7672 case 115:
7673 return "s";
7674 case 116:
7675 return "t";
7676 case 117:
7677 return "u";
7678 case 118:
7679 return "v";
7680 case 119:
7681 return "w";
7682 case 120:
7683 return "x";
7684 case 121:
7685 return "y";
7686 case 122:
7687 return "z";
7688 case 123:
7689 return "{";
7690 case 124:
7691 return "|";
7692 case 125:
7693 return "}";
7694 case 126:
7695 return "~";
7696 case 127:
7697 return "\\x7F";
7698 case 128:
7699 return "\\x80";
7700 case 129:
7701 return "\\x81";
7702 case 130:
7703 return "\\x82";
7704 case 131:
7705 return "\\x83";
7706 case 132:
7707 return "\\x84";
7708 case 133:
7709 return "\\x85";
7710 case 134:
7711 return "\\x86";
7712 case 135:
7713 return "\\x87";
7714 case 136:
7715 return "\\x88";
7716 case 137:
7717 return "\\x89";
7718 case 138:
7719 return "\\x8A";
7720 case 139:
7721 return "\\x8B";
7722 case 140:
7723 return "\\x8C";
7724 case 141:
7725 return "\\x8D";
7726 case 142:
7727 return "\\x8E";
7728 case 143:
7729 return "\\x8F";
7730 case 144:
7731 return "\\x90";
7732 case 145:
7733 return "\\x91";
7734 case 146:
7735 return "\\x92";
7736 case 147:
7737 return "\\x93";
7738 case 148:
7739 return "\\x94";
7740 case 149:
7741 return "\\x95";
7742 case 150:
7743 return "\\x96";
7744 case 151:
7745 return "\\x97";
7746 case 152:
7747 return "\\x98";
7748 case 153:
7749 return "\\x99";
7750 case 154:
7751 return "\\x9A";
7752 case 155:
7753 return "\\x9B";
7754 case 156:
7755 return "\\x9C";
7756 case 157:
7757 return "\\x9D";
7758 case 158:
7759 return "\\x9E";
7760 case 159:
7761 return "\\x9F";
7762 case 160:
7763 return "\\xA0";
7764 case 161:
7765 return "\\xA1";
7766 case 162:
7767 return "\\xA2";
7768 case 163:
7769 return "\\xA3";
7770 case 164:
7771 return "\\xA4";
7772 case 165:
7773 return "\\xA5";
7774 case 166:
7775 return "\\xA6";
7776 case 167:
7777 return "\\xA7";
7778 case 168:
7779 return "\\xA8";
7780 case 169:
7781 return "\\xA9";
7782 case 170:
7783 return "\\xAA";
7784 case 171:
7785 return "\\xAB";
7786 case 172:
7787 return "\\xAC";
7788 case 173:
7789 return "\\xAD";
7790 case 174:
7791 return "\\xAE";
7792 case 175:
7793 return "\\xAF";
7794 case 176:
7795 return "\\xB0";
7796 case 177:
7797 return "\\xB1";
7798 case 178:
7799 return "\\xB2";
7800 case 179:
7801 return "\\xB3";
7802 case 180:
7803 return "\\xB4";
7804 case 181:
7805 return "\\xB5";
7806 case 182:
7807 return "\\xB6";
7808 case 183:
7809 return "\\xB7";
7810 case 184:
7811 return "\\xB8";
7812 case 185:
7813 return "\\xB9";
7814 case 186:
7815 return "\\xBA";
7816 case 187:
7817 return "\\xBB";
7818 case 188:
7819 return "\\xBC";
7820 case 189:
7821 return "\\xBD";
7822 case 190:
7823 return "\\xBE";
7824 case 191:
7825 return "\\xBF";
7826 case 192:
7827 return "\\xC0";
7828 case 193:
7829 return "\\xC1";
7830 case 194:
7831 return "\\xC2";
7832 case 195:
7833 return "\\xC3";
7834 case 196:
7835 return "\\xC4";
7836 case 197:
7837 return "\\xC5";
7838 case 198:
7839 return "\\xC6";
7840 case 199:
7841 return "\\xC7";
7842 case 200:
7843 return "\\xC8";
7844 case 201:
7845 return "\\xC9";
7846 case 202:
7847 return "\\xCA";
7848 case 203:
7849 return "\\xCB";
7850 case 204:
7851 return "\\xCC";
7852 case 205:
7853 return "\\xCD";
7854 case 206:
7855 return "\\xCE";
7856 case 207:
7857 return "\\xCF";
7858 case 208:
7859 return "\\xD0";
7860 case 209:
7861 return "\\xD1";
7862 case 210:
7863 return "\\xD2";
7864 case 211:
7865 return "\\xD3";
7866 case 212:
7867 return "\\xD4";
7868 case 213:
7869 return "\\xD5";
7870 case 214:
7871 return "\\xD6";
7872 case 215:
7873 return "\\xD7";
7874 case 216:
7875 return "\\xD8";
7876 case 217:
7877 return "\\xD9";
7878 case 218:
7879 return "\\xDA";
7880 case 219:
7881 return "\\xDB";
7882 case 220:
7883 return "\\xDC";
7884 case 221:
7885 return "\\xDD";
7886 case 222:
7887 return "\\xDE";
7888 case 223:
7889 return "\\xDF";
7890 case 224:
7891 return "\\xE0";
7892 case 225:
7893 return "\\xE1";
7894 case 226:
7895 return "\\xE2";
7896 case 227:
7897 return "\\xE3";
7898 case 228:
7899 return "\\xE4";
7900 case 229:
7901 return "\\xE5";
7902 case 230:
7903 return "\\xE6";
7904 case 231:
7905 return "\\xE7";
7906 case 232:
7907 return "\\xE8";
7908 case 233:
7909 return "\\xE9";
7910 case 234:
7911 return "\\xEA";
7912 case 235:
7913 return "\\xEB";
7914 case 236:
7915 return "\\xEC";
7916 case 237:
7917 return "\\xED";
7918 case 238:
7919 return "\\xEE";
7920 case 239:
7921 return "\\xEF";
7922 case 240:
7923 return "\\xF0";
7924 case 241:
7925 return "\\xF1";
7926 case 242:
7927 return "\\xF2";
7928 case 243:
7929 return "\\xF3";
7930 case 244:
7931 return "\\xF4";
7932 case 245:
7933 return "\\xF5";
7934 case 246:
7935 return "\\xF6";
7936 case 247:
7937 return "\\xF7";
7938 case 248:
7939 return "\\xF8";
7940 case 249:
7941 return "\\xF9";
7942 case 250:
7943 return "\\xFA";
7944 case 251:
7945 return "\\xFB";
7946 case 252:
7947 return "\\xFC";
7948 case 253:
7949 return "\\xFD";
7950 case 254:
7951 return "\\xFE";
7952 case 255:
7953 return "\\xFF";
7954 default:
7955 assert(0); /* never gets here */
7956 return "dead code";
7957 }
7958 assert(0); /* never gets here */
7959}
7960
7961#endif /* XML_DTD */
7962
7963static unsigned long
7964getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
7965 const char *const valueOrNull = getenv(variableName);
7966 if (valueOrNull == NULL) {
7967 return defaultDebugLevel;
7968 }
7969 const char *const value = valueOrNull;
7970
7971 errno = 0;
7972 char *afterValue = (char *)value;
7973 unsigned long debugLevel = strtoul(value, &afterValue, 10);
7974 if ((errno != 0) || (afterValue[0] != '\0')) {
7975 errno = 0;
7976 return defaultDebugLevel;
7977 }
7978
7979 return debugLevel;
7980}