blob: 0df68830f05e3a9c6e5f2e97af2d5ec6681a6e6f [file] [log] [blame]
Victor Stinner759e30e2017-09-05 01:58:08 +02001/* 8c6b2be7c6281da65ce05218fc15c339f02a811706340824ab596aa86e1fd51a (2.2.4+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
Victor Stinner5ff71322017-06-21 14:39:22 +02008
Victor Stinner759e30e2017-09-05 01:58:08 +02009 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000031*/
32
Victor Stinner93d0cb52017-08-18 23:43:54 +020033#if !defined(_GNU_SOURCE)
34# define _GNU_SOURCE 1 /* syscall prototype */
35#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020036
Victor Stinner23ec4b52017-06-15 00:54:36 +020037#include <stddef.h>
38#include <string.h> /* memset(), memcpy() */
39#include <assert.h>
40#include <limits.h> /* UINT_MAX */
Victor Stinner5ff71322017-06-21 14:39:22 +020041#include <stdio.h> /* fprintf */
42#include <stdlib.h> /* getenv */
Victor Stinner23ec4b52017-06-15 00:54:36 +020043
Victor Stinner5ff71322017-06-21 14:39:22 +020044#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +020045#define getpid GetCurrentProcessId
46#else
47#include <sys/time.h> /* gettimeofday() */
48#include <sys/types.h> /* getpid() */
49#include <unistd.h> /* getpid() */
Victor Stinner93d0cb52017-08-18 23:43:54 +020050#include <fcntl.h> /* O_RDONLY */
51#include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020052#endif
53
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070054#define XML_BUILDING_EXPAT 1
55
Victor Stinner5ff71322017-06-21 14:39:22 +020056#ifdef _WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070057#include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070058#elif defined(HAVE_EXPAT_CONFIG_H)
59#include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020060#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010061
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070062#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000063#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020064#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000065
Victor Stinner93d0cb52017-08-18 23:43:54 +020066#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
67# if defined(HAVE_GETRANDOM)
68# include <sys/random.h> /* getrandom */
69# else
70# include <unistd.h> /* syscall */
71# include <sys/syscall.h> /* SYS_getrandom */
72# endif
73# if ! defined(GRND_NONBLOCK)
74# define GRND_NONBLOCK 0x0001
75# endif /* defined(GRND_NONBLOCK) */
76#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
77
78#if defined(HAVE_LIBBSD) \
79 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
80# include <bsd/stdlib.h>
81#endif
82
83#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
84# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
85#endif
86
87#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
88 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
89 && !defined(XML_DEV_URANDOM) \
90 && !defined(_WIN32) \
91 && !defined(XML_POOR_ENTROPY)
92# error \
93 You do not have support for any sources of high quality entropy \
94 enabled. For end user security, that is probably not what you want. \
95 \
96 Your options include: \
97 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
98 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
99 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
100 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
101 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
102 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
103 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
104 * Windows (RtlGenRandom): _WIN32. \
105 \
106 If insist on not using any of these, bypass this error by defining \
107 XML_POOR_ENTROPY; you have been warned. \
108 \
Victor Stinner93d0cb52017-08-18 23:43:54 +0200109 If you have reasons to patch this detection code away or need changes \
110 to the build system, please open a bug. Thank you!
111#endif
112
113
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000114#ifdef XML_UNICODE
115#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
116#define XmlConvert XmlUtf16Convert
117#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
118#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
119#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700120/* Using pointer subtraction to convert to integer type. */
121#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000122typedef unsigned short ICHAR;
123#else
124#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
125#define XmlConvert XmlUtf8Convert
126#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
127#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
128#define XmlEncode XmlUtf8Encode
129#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
130typedef char ICHAR;
131#endif
132
133
134#ifndef XML_NS
135
136#define XmlInitEncodingNS XmlInitEncoding
137#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
138#undef XmlGetInternalEncodingNS
139#define XmlGetInternalEncodingNS XmlGetInternalEncoding
140#define XmlParseXmlDeclNS XmlParseXmlDecl
141
142#endif
143
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000144#ifdef XML_UNICODE
145
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000146#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000147#define XML_T(x) (const wchar_t)x
148#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000149#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000150#define XML_T(x) (const unsigned short)x
151#define XML_L(x) x
152#endif
153
154#else
155
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000156#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000157#define XML_L(x) x
158
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000159#endif
160
161/* Round up n to be a multiple of sz, where sz is a power of 2. */
162#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
163
Fred Drake08317ae2003-10-21 15:38:55 +0000164/* Handle the case where memmove() doesn't exist. */
165#ifndef HAVE_MEMMOVE
166#ifdef HAVE_BCOPY
167#define memmove(d,s,l) bcopy((s),(d),(l))
168#else
169#error memmove does not exist on this platform, nor is a substitute available
170#endif /* HAVE_BCOPY */
171#endif /* HAVE_MEMMOVE */
172
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000173#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000174#include "xmltok.h"
175#include "xmlrole.h"
176
177typedef const XML_Char *KEY;
178
179typedef struct {
180 KEY name;
181} NAMED;
182
183typedef struct {
184 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000185 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000186 size_t size;
187 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000188 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000189} HASH_TABLE;
190
Victor Stinner5ff71322017-06-21 14:39:22 +0200191static size_t
192keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000193
Victor Stinner5ff71322017-06-21 14:39:22 +0200194static void
195copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
Fred Drake08317ae2003-10-21 15:38:55 +0000196
197/* For probing (after a collision) we need a step size relative prime
198 to the hash table size, which is a power of 2. We use double-hashing,
199 since we can calculate a second hash value cheaply by taking those bits
200 of the first hash value that were discarded (masked out) when the table
201 index was calculated: index = hash & mask, where mask = table->size - 1.
202 We limit the maximum step size to table->size / 4 (mask >> 2) and make
203 it odd, since odd numbers are always relative prime to a power of 2.
204*/
205#define SECOND_HASH(hash, mask, power) \
206 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
207#define PROBE_STEP(hash, mask, power) \
208 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
209
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000210typedef struct {
211 NAMED **p;
212 NAMED **end;
213} HASH_TABLE_ITER;
214
215#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
216#define INIT_DATA_BUF_SIZE 1024
217#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000218#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000219#define INIT_BLOCK_SIZE 1024
220#define INIT_BUFFER_SIZE 1024
221
222#define EXPAND_SPARE 24
223
224typedef struct binding {
225 struct prefix *prefix;
226 struct binding *nextTagBinding;
227 struct binding *prevPrefixBinding;
228 const struct attribute_id *attId;
229 XML_Char *uri;
230 int uriLen;
231 int uriAlloc;
232} BINDING;
233
234typedef struct prefix {
235 const XML_Char *name;
236 BINDING *binding;
237} PREFIX;
238
239typedef struct {
240 const XML_Char *str;
241 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000242 const XML_Char *prefix;
243 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000244 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000245 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000246} TAG_NAME;
247
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000248/* TAG represents an open element.
249 The name of the element is stored in both the document and API
250 encodings. The memory buffer 'buf' is a separately-allocated
251 memory area which stores the name. During the XML_Parse()/
252 XMLParseBuffer() when the element is open, the memory for the 'raw'
253 version of the name (in the document encoding) is shared with the
254 document buffer. If the element is open across calls to
255 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
256 contain the 'raw' name as well.
257
258 A parser re-uses these structures, maintaining a list of allocated
259 TAG objects in a free list.
260*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000261typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000262 struct tag *parent; /* parent of this element */
263 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000264 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000265 TAG_NAME name; /* tagName in the API encoding */
266 char *buf; /* buffer for name components */
267 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000268 BINDING *bindings;
269} TAG;
270
271typedef struct {
272 const XML_Char *name;
273 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000274 int textLen; /* length in XML_Chars */
275 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000276 const XML_Char *systemId;
277 const XML_Char *base;
278 const XML_Char *publicId;
279 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000280 XML_Bool open;
281 XML_Bool is_param;
282 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000283} ENTITY;
284
285typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000286 enum XML_Content_Type type;
287 enum XML_Content_Quant quant;
288 const XML_Char * name;
289 int firstchild;
290 int lastchild;
291 int childcnt;
292 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000293} CONTENT_SCAFFOLD;
294
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000295#define INIT_SCAFFOLD_ELEMENTS 32
296
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000297typedef struct block {
298 struct block *next;
299 int size;
300 XML_Char s[1];
301} BLOCK;
302
303typedef struct {
304 BLOCK *blocks;
305 BLOCK *freeBlocks;
306 const XML_Char *end;
307 XML_Char *ptr;
308 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000309 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000310} STRING_POOL;
311
312/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000313 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000314typedef struct attribute_id {
315 XML_Char *name;
316 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000317 XML_Bool maybeTokenized;
318 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000319} ATTRIBUTE_ID;
320
321typedef struct {
322 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000323 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000324 const XML_Char *value;
325} DEFAULT_ATTRIBUTE;
326
327typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000328 unsigned long version;
329 unsigned long hash;
330 const XML_Char *uriName;
331} NS_ATT;
332
333typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000334 const XML_Char *name;
335 PREFIX *prefix;
336 const ATTRIBUTE_ID *idAtt;
337 int nDefaultAtts;
338 int allocDefaultAtts;
339 DEFAULT_ATTRIBUTE *defaultAtts;
340} ELEMENT_TYPE;
341
342typedef struct {
343 HASH_TABLE generalEntities;
344 HASH_TABLE elementTypes;
345 HASH_TABLE attributeIds;
346 HASH_TABLE prefixes;
347 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000348 STRING_POOL entityValuePool;
349 /* false once a parameter entity reference has been skipped */
350 XML_Bool keepProcessing;
351 /* true once an internal or external PE reference has been encountered;
352 this includes the reference to an external subset */
353 XML_Bool hasParamEntityRefs;
354 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000355#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000356 /* indicates if external PE has been read */
357 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000358 HASH_TABLE paramEntities;
359#endif /* XML_DTD */
360 PREFIX defaultPrefix;
361 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000362 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000363 CONTENT_SCAFFOLD *scaffold;
364 unsigned contentStringLen;
365 unsigned scaffSize;
366 unsigned scaffCount;
367 int scaffLevel;
368 int *scaffIndex;
369} DTD;
370
371typedef struct open_internal_entity {
372 const char *internalEventPtr;
373 const char *internalEventEndPtr;
374 struct open_internal_entity *next;
375 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000376 int startTagLevel;
377 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000378} OPEN_INTERNAL_ENTITY;
379
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000380typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
381 const char *start,
382 const char *end,
383 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000384
385static Processor prologProcessor;
386static Processor prologInitProcessor;
387static Processor contentProcessor;
388static Processor cdataSectionProcessor;
389#ifdef XML_DTD
390static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000391static Processor externalParEntProcessor;
392static Processor externalParEntInitProcessor;
393static Processor entityValueProcessor;
394static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000395#endif /* XML_DTD */
396static Processor epilogProcessor;
397static Processor errorProcessor;
398static Processor externalEntityInitProcessor;
399static Processor externalEntityInitProcessor2;
400static Processor externalEntityInitProcessor3;
401static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000402static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000403
404static enum XML_Error
405handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
406static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000407processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000408 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000409static enum XML_Error
410initializeEncoding(XML_Parser parser);
411static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700412doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
413 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000414 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000415static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700416processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000417 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000418static enum XML_Error
419doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700420 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000421 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000422static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000423doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000424 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000425#ifdef XML_DTD
426static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000427doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000428 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000429#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000430
Victor Stinner5ff71322017-06-21 14:39:22 +0200431static void
432freeBindings(XML_Parser parser, BINDING *bindings);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000433static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000434storeAtts(XML_Parser parser, const ENCODING *, const char *s,
435 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000436static enum XML_Error
437addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
438 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000439static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700440defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000441 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000442static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000443storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
444 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000445static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000446appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
447 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000448static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000449getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
450 const char *end);
451static int
452setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000453static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000454storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
455 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000456static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000457reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
458 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000459static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000460reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
461 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000462static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000463reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
464 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000465
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000466static const XML_Char * getContext(XML_Parser parser);
467static XML_Bool
468setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000469
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000470static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000471
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000472static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
473/* do not call if parentParser != NULL */
474static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
475static void
476dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
477static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700478dtdCopy(XML_Parser oldParser,
479 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000480static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700481copyEntityTable(XML_Parser oldParser,
482 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000483static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700484lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000485static void FASTCALL
486hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
487static void FASTCALL hashTableClear(HASH_TABLE *);
488static void FASTCALL hashTableDestroy(HASH_TABLE *);
489static void FASTCALL
490hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
491static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000492
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000493static void FASTCALL
494poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
495static void FASTCALL poolClear(STRING_POOL *);
496static void FASTCALL poolDestroy(STRING_POOL *);
497static XML_Char *
498poolAppend(STRING_POOL *pool, const ENCODING *enc,
499 const char *ptr, const char *end);
500static XML_Char *
501poolStoreString(STRING_POOL *pool, const ENCODING *enc,
502 const char *ptr, const char *end);
503static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
504static const XML_Char * FASTCALL
505poolCopyString(STRING_POOL *pool, const XML_Char *s);
506static const XML_Char *
507poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
508static const XML_Char * FASTCALL
509poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000510
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000511static int FASTCALL nextScaffoldPart(XML_Parser parser);
512static XML_Content * build_model(XML_Parser parser);
513static ELEMENT_TYPE *
514getElementType(XML_Parser parser, const ENCODING *enc,
515 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000516
Victor Stinner93d0cb52017-08-18 23:43:54 +0200517static XML_Char *copyString(const XML_Char *s,
518 const XML_Memory_Handling_Suite *memsuite);
519
Victor Stinner23ec4b52017-06-15 00:54:36 +0200520static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700521static XML_Bool startParsing(XML_Parser parser);
522
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000523static XML_Parser
524parserCreate(const XML_Char *encodingName,
525 const XML_Memory_Handling_Suite *memsuite,
526 const XML_Char *nameSep,
527 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700528
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000529static void
530parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000531
532#define poolStart(pool) ((pool)->start)
533#define poolEnd(pool) ((pool)->ptr)
534#define poolLength(pool) ((pool)->ptr - (pool)->start)
535#define poolChop(pool) ((void)--(pool->ptr))
536#define poolLastChar(pool) (((pool)->ptr)[-1])
537#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
538#define poolFinish(pool) ((pool)->start = (pool)->ptr)
539#define poolAppendChar(pool, c) \
540 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
541 ? 0 \
542 : ((*((pool)->ptr)++ = c), 1))
543
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000544struct XML_ParserStruct {
545 /* The first member must be userData so that the XML_GetUserData
546 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000547 void *m_userData;
548 void *m_handlerArg;
549 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000550 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000551 /* first character to be parsed */
552 const char *m_bufferPtr;
553 /* past last character to be parsed */
554 char *m_bufferEnd;
555 /* allocated end of buffer */
556 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000557 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000558 const char *m_parseEndPtr;
559 XML_Char *m_dataBuf;
560 XML_Char *m_dataBufEnd;
561 XML_StartElementHandler m_startElementHandler;
562 XML_EndElementHandler m_endElementHandler;
563 XML_CharacterDataHandler m_characterDataHandler;
564 XML_ProcessingInstructionHandler m_processingInstructionHandler;
565 XML_CommentHandler m_commentHandler;
566 XML_StartCdataSectionHandler m_startCdataSectionHandler;
567 XML_EndCdataSectionHandler m_endCdataSectionHandler;
568 XML_DefaultHandler m_defaultHandler;
569 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
570 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
571 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
572 XML_NotationDeclHandler m_notationDeclHandler;
573 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
574 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
575 XML_NotStandaloneHandler m_notStandaloneHandler;
576 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000577 XML_Parser m_externalEntityRefHandlerArg;
578 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000579 XML_UnknownEncodingHandler m_unknownEncodingHandler;
580 XML_ElementDeclHandler m_elementDeclHandler;
581 XML_AttlistDeclHandler m_attlistDeclHandler;
582 XML_EntityDeclHandler m_entityDeclHandler;
583 XML_XmlDeclHandler m_xmlDeclHandler;
584 const ENCODING *m_encoding;
585 INIT_ENCODING m_initEncoding;
586 const ENCODING *m_internalEncoding;
587 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000588 XML_Bool m_ns;
589 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000590 void *m_unknownEncodingMem;
591 void *m_unknownEncodingData;
592 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000593 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000594 PROLOG_STATE m_prologState;
595 Processor *m_processor;
596 enum XML_Error m_errorCode;
597 const char *m_eventPtr;
598 const char *m_eventEndPtr;
599 const char *m_positionPtr;
600 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000601 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000602 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000603 int m_tagLevel;
604 ENTITY *m_declEntity;
605 const XML_Char *m_doctypeName;
606 const XML_Char *m_doctypeSysid;
607 const XML_Char *m_doctypePubid;
608 const XML_Char *m_declAttributeType;
609 const XML_Char *m_declNotationName;
610 const XML_Char *m_declNotationPublicId;
611 ELEMENT_TYPE *m_declElementType;
612 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000613 XML_Bool m_declAttributeIsCdata;
614 XML_Bool m_declAttributeIsId;
615 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000616 const XML_Char *m_curBase;
617 TAG *m_tagStack;
618 TAG *m_freeTagList;
619 BINDING *m_inheritedBindings;
620 BINDING *m_freeBindingList;
621 int m_attsSize;
622 int m_nSpecifiedAtts;
623 int m_idAttIndex;
624 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000625 NS_ATT *m_nsAtts;
626 unsigned long m_nsAttsVersion;
627 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700628#ifdef XML_ATTR_INFO
629 XML_AttrInfo *m_attInfo;
630#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000631 POSITION m_position;
632 STRING_POOL m_tempPool;
633 STRING_POOL m_temp2Pool;
634 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000635 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000636 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000637 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000638 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000639#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000640 XML_Bool m_isParamEntity;
641 XML_Bool m_useForeignDTD;
642 enum XML_ParamEntityParsing m_paramEntityParsing;
643#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700644 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000645};
646
647#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
648#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
649#define FREE(p) (parser->m_mem.free_fcn((p)))
650
651#define userData (parser->m_userData)
652#define handlerArg (parser->m_handlerArg)
653#define startElementHandler (parser->m_startElementHandler)
654#define endElementHandler (parser->m_endElementHandler)
655#define characterDataHandler (parser->m_characterDataHandler)
656#define processingInstructionHandler \
657 (parser->m_processingInstructionHandler)
658#define commentHandler (parser->m_commentHandler)
659#define startCdataSectionHandler \
660 (parser->m_startCdataSectionHandler)
661#define endCdataSectionHandler (parser->m_endCdataSectionHandler)
662#define defaultHandler (parser->m_defaultHandler)
663#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
664#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
665#define unparsedEntityDeclHandler \
666 (parser->m_unparsedEntityDeclHandler)
667#define notationDeclHandler (parser->m_notationDeclHandler)
668#define startNamespaceDeclHandler \
669 (parser->m_startNamespaceDeclHandler)
670#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
671#define notStandaloneHandler (parser->m_notStandaloneHandler)
672#define externalEntityRefHandler \
673 (parser->m_externalEntityRefHandler)
674#define externalEntityRefHandlerArg \
675 (parser->m_externalEntityRefHandlerArg)
676#define internalEntityRefHandler \
677 (parser->m_internalEntityRefHandler)
678#define skippedEntityHandler (parser->m_skippedEntityHandler)
679#define unknownEncodingHandler (parser->m_unknownEncodingHandler)
680#define elementDeclHandler (parser->m_elementDeclHandler)
681#define attlistDeclHandler (parser->m_attlistDeclHandler)
682#define entityDeclHandler (parser->m_entityDeclHandler)
683#define xmlDeclHandler (parser->m_xmlDeclHandler)
684#define encoding (parser->m_encoding)
685#define initEncoding (parser->m_initEncoding)
686#define internalEncoding (parser->m_internalEncoding)
687#define unknownEncodingMem (parser->m_unknownEncodingMem)
688#define unknownEncodingData (parser->m_unknownEncodingData)
689#define unknownEncodingHandlerData \
690 (parser->m_unknownEncodingHandlerData)
691#define unknownEncodingRelease (parser->m_unknownEncodingRelease)
692#define protocolEncodingName (parser->m_protocolEncodingName)
693#define ns (parser->m_ns)
694#define ns_triplets (parser->m_ns_triplets)
695#define prologState (parser->m_prologState)
696#define processor (parser->m_processor)
697#define errorCode (parser->m_errorCode)
698#define eventPtr (parser->m_eventPtr)
699#define eventEndPtr (parser->m_eventEndPtr)
700#define positionPtr (parser->m_positionPtr)
701#define position (parser->m_position)
702#define openInternalEntities (parser->m_openInternalEntities)
Fred Drake31d485c2004-08-03 07:06:22 +0000703#define freeInternalEntities (parser->m_freeInternalEntities)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000704#define defaultExpandInternalEntities \
705 (parser->m_defaultExpandInternalEntities)
706#define tagLevel (parser->m_tagLevel)
707#define buffer (parser->m_buffer)
708#define bufferPtr (parser->m_bufferPtr)
709#define bufferEnd (parser->m_bufferEnd)
710#define parseEndByteIndex (parser->m_parseEndByteIndex)
711#define parseEndPtr (parser->m_parseEndPtr)
712#define bufferLim (parser->m_bufferLim)
713#define dataBuf (parser->m_dataBuf)
714#define dataBufEnd (parser->m_dataBufEnd)
715#define _dtd (parser->m_dtd)
716#define curBase (parser->m_curBase)
717#define declEntity (parser->m_declEntity)
718#define doctypeName (parser->m_doctypeName)
719#define doctypeSysid (parser->m_doctypeSysid)
720#define doctypePubid (parser->m_doctypePubid)
721#define declAttributeType (parser->m_declAttributeType)
722#define declNotationName (parser->m_declNotationName)
723#define declNotationPublicId (parser->m_declNotationPublicId)
724#define declElementType (parser->m_declElementType)
725#define declAttributeId (parser->m_declAttributeId)
726#define declAttributeIsCdata (parser->m_declAttributeIsCdata)
727#define declAttributeIsId (parser->m_declAttributeIsId)
728#define freeTagList (parser->m_freeTagList)
729#define freeBindingList (parser->m_freeBindingList)
730#define inheritedBindings (parser->m_inheritedBindings)
731#define tagStack (parser->m_tagStack)
732#define atts (parser->m_atts)
733#define attsSize (parser->m_attsSize)
734#define nSpecifiedAtts (parser->m_nSpecifiedAtts)
735#define idAttIndex (parser->m_idAttIndex)
Fred Drake08317ae2003-10-21 15:38:55 +0000736#define nsAtts (parser->m_nsAtts)
737#define nsAttsVersion (parser->m_nsAttsVersion)
738#define nsAttsPower (parser->m_nsAttsPower)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700739#define attInfo (parser->m_attInfo)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000740#define tempPool (parser->m_tempPool)
741#define temp2Pool (parser->m_temp2Pool)
742#define groupConnector (parser->m_groupConnector)
743#define groupSize (parser->m_groupSize)
744#define namespaceSeparator (parser->m_namespaceSeparator)
745#define parentParser (parser->m_parentParser)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000746#define ps_parsing (parser->m_parsingStatus.parsing)
747#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000748#ifdef XML_DTD
749#define isParamEntity (parser->m_isParamEntity)
750#define useForeignDTD (parser->m_useForeignDTD)
751#define paramEntityParsing (parser->m_paramEntityParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000752#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700753#define hash_secret_salt (parser->m_hash_secret_salt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000754
Fred Drake08317ae2003-10-21 15:38:55 +0000755XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000756XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000757{
758 return XML_ParserCreate_MM(encodingName, NULL, NULL);
759}
760
Fred Drake08317ae2003-10-21 15:38:55 +0000761XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000762XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000763{
764 XML_Char tmp[2];
765 *tmp = nsSep;
766 return XML_ParserCreate_MM(encodingName, NULL, tmp);
767}
768
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000769static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700770 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
771 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
772 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
773 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
774 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
775 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000776};
777
Victor Stinner5ff71322017-06-21 14:39:22 +0200778
779#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200780
781/* Obtain entropy on Linux 3.17+ */
782static int
Victor Stinner93d0cb52017-08-18 23:43:54 +0200783writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200784 int success = 0; /* full count bytes written? */
785 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200786 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200787
788 do {
789 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
790 const size_t bytesToWrite = count - bytesWrittenTotal;
791
792 const int bytesWrittenMore =
793#if defined(HAVE_GETRANDOM)
794 getrandom(currentTarget, bytesToWrite, getrandomFlags);
795#else
796 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
797#endif
798
799 if (bytesWrittenMore > 0) {
800 bytesWrittenTotal += bytesWrittenMore;
801 if (bytesWrittenTotal >= count)
802 success = 1;
803 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200804 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200805
806 return success;
807}
808
809#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
810
811
Victor Stinner93d0cb52017-08-18 23:43:54 +0200812#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
813
814/* Extract entropy from /dev/urandom */
815static int
816writeRandomBytes_dev_urandom(void * target, size_t count) {
817 int success = 0; /* full count bytes written? */
818 size_t bytesWrittenTotal = 0;
819
820 const int fd = open("/dev/urandom", O_RDONLY);
821 if (fd < 0) {
822 return 0;
823 }
824
825 do {
826 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
827 const size_t bytesToWrite = count - bytesWrittenTotal;
828
829 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
830
831 if (bytesWrittenMore > 0) {
832 bytesWrittenTotal += bytesWrittenMore;
833 if (bytesWrittenTotal >= count)
834 success = 1;
835 }
836 } while (! success && (errno == EINTR));
837
838 close(fd);
839 return success;
840}
841
842#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
843
844
845#if defined(HAVE_ARC4RANDOM)
846
847static void
848writeRandomBytes_arc4random(void * target, size_t count) {
849 size_t bytesWrittenTotal = 0;
850
851 while (bytesWrittenTotal < count) {
852 const uint32_t random32 = arc4random();
853 size_t i = 0;
854
855 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
856 i++, bytesWrittenTotal++) {
857 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
858 ((uint8_t *)target)[bytesWrittenTotal] = random8;
859 }
860 }
861}
862
863#endif /* defined(HAVE_ARC4RANDOM) */
864
865
Victor Stinner5ff71322017-06-21 14:39:22 +0200866#ifdef _WIN32
867
868typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200869HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
Victor Stinner5ff71322017-06-21 14:39:22 +0200870
871/* Obtain entropy on Windows XP / Windows Server 2003 and later.
Victor Stinner93d0cb52017-08-18 23:43:54 +0200872 * Hint on RtlGenRandom and the following article from libsodium.
Victor Stinner5ff71322017-06-21 14:39:22 +0200873 *
874 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
875 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
876 */
877static int
878writeRandomBytes_RtlGenRandom(void * target, size_t count) {
879 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200880 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
Victor Stinner5ff71322017-06-21 14:39:22 +0200881
882 if (advapi32) {
883 const RTLGENRANDOM_FUNC RtlGenRandom
884 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
885 if (RtlGenRandom) {
886 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
887 success = 1;
888 }
889 }
890 FreeLibrary(advapi32);
891 }
892
893 return success;
894}
895
896#endif /* _WIN32 */
897
898
Victor Stinner93d0cb52017-08-18 23:43:54 +0200899#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
900
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700901static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200902gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000903{
Victor Stinner5ff71322017-06-21 14:39:22 +0200904#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200905 FILETIME ft;
906 GetSystemTimeAsFileTime(&ft); /* never fails */
907 return ft.dwHighDateTime ^ ft.dwLowDateTime;
908#else
909 struct timeval tv;
910 int gettimeofday_res;
911
912 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200913
914#if defined(NDEBUG)
915 (void)gettimeofday_res;
916#else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200917 assert (gettimeofday_res == 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200918#endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200919
920 /* Microseconds time is <20 bits entropy */
921 return tv.tv_usec;
922#endif
923}
924
Victor Stinner93d0cb52017-08-18 23:43:54 +0200925#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
926
Victor Stinner5ff71322017-06-21 14:39:22 +0200927
928static unsigned long
929ENTROPY_DEBUG(const char * label, unsigned long entropy) {
930 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
931 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
932 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
933 label,
934 (int)sizeof(entropy) * 2, entropy,
935 (unsigned long)sizeof(entropy));
936 }
937 return entropy;
938}
939
Victor Stinner23ec4b52017-06-15 00:54:36 +0200940static unsigned long
941generate_hash_secret_salt(XML_Parser parser)
942{
Victor Stinner5ff71322017-06-21 14:39:22 +0200943 unsigned long entropy;
944 (void)parser;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200945#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200946 arc4random_buf(&entropy, sizeof(entropy));
947 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200948#elif defined(HAVE_ARC4RANDOM)
949 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
950 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200951#else
952 /* Try high quality providers first .. */
953#ifdef _WIN32
954 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
955 return ENTROPY_DEBUG("RtlGenRandom", entropy);
956 }
957#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200958 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200959 return ENTROPY_DEBUG("getrandom", entropy);
960 }
961#endif
Victor Stinner93d0cb52017-08-18 23:43:54 +0200962#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
963 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
964 return ENTROPY_DEBUG("/dev/urandom", entropy);
965 }
966#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200967 /* .. and self-made low quality for backup: */
968
969 /* Process ID is 0 bits entropy if attacker has local access */
970 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200971
972 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
973 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200974 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200975 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200976 return ENTROPY_DEBUG("fallback(8)",
Victor Stinner93d0cb52017-08-18 23:43:54 +0200977 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200978 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200979#endif
980}
981
982static unsigned long
983get_hash_secret_salt(XML_Parser parser) {
984 if (parser->m_parentParser != NULL)
985 return get_hash_secret_salt(parser->m_parentParser);
986 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700987}
988
989static XML_Bool /* only valid for root parser */
990startParsing(XML_Parser parser)
991{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700992 /* hash functions must be initialized before setContext() is called */
993 if (hash_secret_salt == 0)
Victor Stinner23ec4b52017-06-15 00:54:36 +0200994 hash_secret_salt = generate_hash_secret_salt(parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700995 if (ns) {
996 /* implicit context only set for root parser, since child
997 parsers (i.e. external entity parsers) will inherit it
998 */
999 return setContext(parser, implicitContext);
1000 }
1001 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001002}
1003
1004XML_Parser XMLCALL
1005XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001006 const XML_Memory_Handling_Suite *memsuite,
1007 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001008{
1009 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001010}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001011
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001012static XML_Parser
1013parserCreate(const XML_Char *encodingName,
1014 const XML_Memory_Handling_Suite *memsuite,
1015 const XML_Char *nameSep,
1016 DTD *dtd)
1017{
1018 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001019
1020 if (memsuite) {
1021 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001022 parser = (XML_Parser)
1023 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1024 if (parser != NULL) {
1025 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1026 mtemp->malloc_fcn = memsuite->malloc_fcn;
1027 mtemp->realloc_fcn = memsuite->realloc_fcn;
1028 mtemp->free_fcn = memsuite->free_fcn;
1029 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001030 }
1031 else {
1032 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001033 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1034 if (parser != NULL) {
1035 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1036 mtemp->malloc_fcn = malloc;
1037 mtemp->realloc_fcn = realloc;
1038 mtemp->free_fcn = free;
1039 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001040 }
1041
1042 if (!parser)
1043 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001044
1045 buffer = NULL;
1046 bufferLim = NULL;
1047
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001048 attsSize = INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001049 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
1050 if (atts == NULL) {
1051 FREE(parser);
1052 return NULL;
1053 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001054#ifdef XML_ATTR_INFO
1055 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
1056 if (attInfo == NULL) {
1057 FREE(atts);
1058 FREE(parser);
1059 return NULL;
1060 }
1061#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001062 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1063 if (dataBuf == NULL) {
1064 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001065#ifdef XML_ATTR_INFO
1066 FREE(attInfo);
1067#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001068 FREE(parser);
1069 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001070 }
1071 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
1072
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001073 if (dtd)
1074 _dtd = dtd;
1075 else {
1076 _dtd = dtdCreate(&parser->m_mem);
1077 if (_dtd == NULL) {
1078 FREE(dataBuf);
1079 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001080#ifdef XML_ATTR_INFO
1081 FREE(attInfo);
1082#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001083 FREE(parser);
1084 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001085 }
1086 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001087
1088 freeBindingList = NULL;
1089 freeTagList = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001090 freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001091
1092 groupSize = 0;
1093 groupConnector = NULL;
1094
1095 unknownEncodingHandler = NULL;
1096 unknownEncodingHandlerData = NULL;
1097
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001098 namespaceSeparator = ASCII_EXCL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001099 ns = XML_FALSE;
1100 ns_triplets = XML_FALSE;
1101
Fred Drake08317ae2003-10-21 15:38:55 +00001102 nsAtts = NULL;
1103 nsAttsVersion = 0;
1104 nsAttsPower = 0;
1105
Victor Stinner93d0cb52017-08-18 23:43:54 +02001106 protocolEncodingName = NULL;
1107
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001108 poolInit(&tempPool, &(parser->m_mem));
1109 poolInit(&temp2Pool, &(parser->m_mem));
1110 parserInit(parser, encodingName);
1111
1112 if (encodingName && !protocolEncodingName) {
1113 XML_ParserFree(parser);
1114 return NULL;
1115 }
1116
1117 if (nameSep) {
1118 ns = XML_TRUE;
1119 internalEncoding = XmlGetInternalEncodingNS();
1120 namespaceSeparator = *nameSep;
1121 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001122 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001123 internalEncoding = XmlGetInternalEncoding();
1124 }
1125
1126 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001127}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001128
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001129static void
1130parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001131{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001132 processor = prologInitProcessor;
1133 XmlPrologStateInit(&prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001134 if (encodingName != NULL) {
1135 protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1136 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001137 curBase = NULL;
1138 XmlInitEncoding(&initEncoding, &encoding, 0);
1139 userData = NULL;
1140 handlerArg = NULL;
1141 startElementHandler = NULL;
1142 endElementHandler = NULL;
1143 characterDataHandler = NULL;
1144 processingInstructionHandler = NULL;
1145 commentHandler = NULL;
1146 startCdataSectionHandler = NULL;
1147 endCdataSectionHandler = NULL;
1148 defaultHandler = NULL;
1149 startDoctypeDeclHandler = NULL;
1150 endDoctypeDeclHandler = NULL;
1151 unparsedEntityDeclHandler = NULL;
1152 notationDeclHandler = NULL;
1153 startNamespaceDeclHandler = NULL;
1154 endNamespaceDeclHandler = NULL;
1155 notStandaloneHandler = NULL;
1156 externalEntityRefHandler = NULL;
1157 externalEntityRefHandlerArg = parser;
1158 skippedEntityHandler = NULL;
1159 elementDeclHandler = NULL;
1160 attlistDeclHandler = NULL;
1161 entityDeclHandler = NULL;
1162 xmlDeclHandler = NULL;
1163 bufferPtr = buffer;
1164 bufferEnd = buffer;
1165 parseEndByteIndex = 0;
1166 parseEndPtr = NULL;
1167 declElementType = NULL;
1168 declAttributeId = NULL;
1169 declEntity = NULL;
1170 doctypeName = NULL;
1171 doctypeSysid = NULL;
1172 doctypePubid = NULL;
1173 declAttributeType = NULL;
1174 declNotationName = NULL;
1175 declNotationPublicId = NULL;
1176 declAttributeIsCdata = XML_FALSE;
1177 declAttributeIsId = XML_FALSE;
1178 memset(&position, 0, sizeof(POSITION));
1179 errorCode = XML_ERROR_NONE;
1180 eventPtr = NULL;
1181 eventEndPtr = NULL;
1182 positionPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001183 openInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001184 defaultExpandInternalEntities = XML_TRUE;
1185 tagLevel = 0;
1186 tagStack = NULL;
1187 inheritedBindings = NULL;
1188 nSpecifiedAtts = 0;
1189 unknownEncodingMem = NULL;
1190 unknownEncodingRelease = NULL;
1191 unknownEncodingData = NULL;
1192 parentParser = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193 ps_parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001194#ifdef XML_DTD
1195 isParamEntity = XML_FALSE;
1196 useForeignDTD = XML_FALSE;
1197 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1198#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001199 hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001200}
1201
1202/* moves list of bindings to freeBindingList */
1203static void FASTCALL
1204moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1205{
1206 while (bindings) {
1207 BINDING *b = bindings;
1208 bindings = bindings->nextTagBinding;
1209 b->nextTagBinding = freeBindingList;
1210 freeBindingList = b;
1211 }
1212}
1213
Fred Drake08317ae2003-10-21 15:38:55 +00001214XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001215XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1216{
1217 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001218 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001219
1220 if (parser == NULL)
1221 return XML_FALSE;
1222
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001223 if (parentParser)
1224 return XML_FALSE;
1225 /* move tagStack to freeTagList */
1226 tStk = tagStack;
1227 while (tStk) {
1228 TAG *tag = tStk;
1229 tStk = tStk->parent;
1230 tag->parent = freeTagList;
1231 moveToFreeBindingList(parser, tag->bindings);
1232 tag->bindings = NULL;
1233 freeTagList = tag;
1234 }
Fred Drake31d485c2004-08-03 07:06:22 +00001235 /* move openInternalEntities to freeInternalEntities */
1236 openEntityList = openInternalEntities;
1237 while (openEntityList) {
1238 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1239 openEntityList = openEntity->next;
1240 openEntity->next = freeInternalEntities;
1241 freeInternalEntities = openEntity;
1242 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001243 moveToFreeBindingList(parser, inheritedBindings);
Fred Drake08317ae2003-10-21 15:38:55 +00001244 FREE(unknownEncodingMem);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001245 if (unknownEncodingRelease)
1246 unknownEncodingRelease(unknownEncodingData);
1247 poolClear(&tempPool);
1248 poolClear(&temp2Pool);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001249 FREE((void *)protocolEncodingName);
1250 protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001251 parserInit(parser, encodingName);
1252 dtdReset(_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001253 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001254}
1255
Fred Drake08317ae2003-10-21 15:38:55 +00001256enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001257XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1258{
Victor Stinner5ff71322017-06-21 14:39:22 +02001259 if (parser == NULL)
1260 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001261 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1262 XXX There's no way for the caller to determine which of the
1263 XXX possible error cases caused the XML_STATUS_ERROR return.
1264 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001265 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001266 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001267
1268 /* Get rid of any previous encoding name */
1269 FREE((void *)protocolEncodingName);
1270
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001271 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001272 /* No new encoding name */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001273 protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001274 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001275 /* Copy the new encoding name into allocated memory */
1276 protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001277 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001278 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001279 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001280 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001281}
1282
Fred Drake08317ae2003-10-21 15:38:55 +00001283XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001284XML_ExternalEntityParserCreate(XML_Parser oldParser,
1285 const XML_Char *context,
1286 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001287{
1288 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001289 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001290 DTD *oldDtd;
1291 XML_StartElementHandler oldStartElementHandler;
1292 XML_EndElementHandler oldEndElementHandler;
1293 XML_CharacterDataHandler oldCharacterDataHandler;
1294 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1295 XML_CommentHandler oldCommentHandler;
1296 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1297 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1298 XML_DefaultHandler oldDefaultHandler;
1299 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1300 XML_NotationDeclHandler oldNotationDeclHandler;
1301 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1302 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1303 XML_NotStandaloneHandler oldNotStandaloneHandler;
1304 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1305 XML_SkippedEntityHandler oldSkippedEntityHandler;
1306 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1307 XML_ElementDeclHandler oldElementDeclHandler;
1308 XML_AttlistDeclHandler oldAttlistDeclHandler;
1309 XML_EntityDeclHandler oldEntityDeclHandler;
1310 XML_XmlDeclHandler oldXmlDeclHandler;
1311 ELEMENT_TYPE * oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001312
Victor Stinner5ff71322017-06-21 14:39:22 +02001313 void *oldUserData;
1314 void *oldHandlerArg;
1315 XML_Bool oldDefaultExpandInternalEntities;
1316 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001317#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001318 enum XML_ParamEntityParsing oldParamEntityParsing;
1319 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001320#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001321 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001322 /* Note that the new parser shares the same hash secret as the old
1323 parser, so that dtdCopy and copyEntityTable can lookup values
1324 from hash tables associated with either parser without us having
1325 to worry which hash secrets each table has.
1326 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001327 unsigned long oldhash_secret_salt;
1328
1329 /* Validate the oldParser parameter before we pull everything out of it */
1330 if (oldParser == NULL)
1331 return NULL;
1332
1333 /* Stash the original parser contents on the stack */
1334 oldDtd = _dtd;
1335 oldStartElementHandler = startElementHandler;
1336 oldEndElementHandler = endElementHandler;
1337 oldCharacterDataHandler = characterDataHandler;
1338 oldProcessingInstructionHandler = processingInstructionHandler;
1339 oldCommentHandler = commentHandler;
1340 oldStartCdataSectionHandler = startCdataSectionHandler;
1341 oldEndCdataSectionHandler = endCdataSectionHandler;
1342 oldDefaultHandler = defaultHandler;
1343 oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler;
1344 oldNotationDeclHandler = notationDeclHandler;
1345 oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
1346 oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
1347 oldNotStandaloneHandler = notStandaloneHandler;
1348 oldExternalEntityRefHandler = externalEntityRefHandler;
1349 oldSkippedEntityHandler = skippedEntityHandler;
1350 oldUnknownEncodingHandler = unknownEncodingHandler;
1351 oldElementDeclHandler = elementDeclHandler;
1352 oldAttlistDeclHandler = attlistDeclHandler;
1353 oldEntityDeclHandler = entityDeclHandler;
1354 oldXmlDeclHandler = xmlDeclHandler;
1355 oldDeclElementType = declElementType;
1356
1357 oldUserData = userData;
1358 oldHandlerArg = handlerArg;
1359 oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1360 oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1361#ifdef XML_DTD
1362 oldParamEntityParsing = paramEntityParsing;
1363 oldInEntityValue = prologState.inEntityValue;
1364#endif
1365 oldns_triplets = ns_triplets;
1366 /* Note that the new parser shares the same hash secret as the old
1367 parser, so that dtdCopy and copyEntityTable can lookup values
1368 from hash tables associated with either parser without us having
1369 to worry which hash secrets each table has.
1370 */
1371 oldhash_secret_salt = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001372
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001373#ifdef XML_DTD
1374 if (!context)
1375 newDtd = oldDtd;
1376#endif /* XML_DTD */
1377
1378 /* Note that the magical uses of the pre-processor to make field
1379 access look more like C++ require that `parser' be overwritten
1380 here. This makes this function more painful to follow than it
1381 would be otherwise.
1382 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001383 if (ns) {
1384 XML_Char tmp[2];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001385 *tmp = namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001386 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001387 }
1388 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001389 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001390 }
1391
1392 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001393 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001394
1395 startElementHandler = oldStartElementHandler;
1396 endElementHandler = oldEndElementHandler;
1397 characterDataHandler = oldCharacterDataHandler;
1398 processingInstructionHandler = oldProcessingInstructionHandler;
1399 commentHandler = oldCommentHandler;
1400 startCdataSectionHandler = oldStartCdataSectionHandler;
1401 endCdataSectionHandler = oldEndCdataSectionHandler;
1402 defaultHandler = oldDefaultHandler;
1403 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1404 notationDeclHandler = oldNotationDeclHandler;
1405 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1406 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1407 notStandaloneHandler = oldNotStandaloneHandler;
1408 externalEntityRefHandler = oldExternalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001409 skippedEntityHandler = oldSkippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001410 unknownEncodingHandler = oldUnknownEncodingHandler;
1411 elementDeclHandler = oldElementDeclHandler;
1412 attlistDeclHandler = oldAttlistDeclHandler;
1413 entityDeclHandler = oldEntityDeclHandler;
1414 xmlDeclHandler = oldXmlDeclHandler;
1415 declElementType = oldDeclElementType;
1416 userData = oldUserData;
1417 if (oldUserData == oldHandlerArg)
1418 handlerArg = userData;
1419 else
1420 handlerArg = parser;
1421 if (oldExternalEntityRefHandlerArg != oldParser)
1422 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1423 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1424 ns_triplets = oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001425 hash_secret_salt = oldhash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001426 parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001427#ifdef XML_DTD
1428 paramEntityParsing = oldParamEntityParsing;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001429 prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001430 if (context) {
1431#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001432 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001433 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001434 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001435 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001436 }
1437 processor = externalEntityInitProcessor;
1438#ifdef XML_DTD
1439 }
1440 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001441 /* The DTD instance referenced by _dtd is shared between the document's
1442 root parser and external PE parsers, therefore one does not need to
1443 call setContext. In addition, one also *must* not call setContext,
1444 because this would overwrite existing prefix->binding pointers in
1445 _dtd with ones that get destroyed with the external PE parser.
1446 This would leave those prefixes with dangling pointers.
1447 */
1448 isParamEntity = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001449 XmlPrologStateInitExternalEntity(&prologState);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001450 processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001451 }
1452#endif /* XML_DTD */
1453 return parser;
1454}
1455
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001456static void FASTCALL
1457destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001458{
1459 for (;;) {
1460 BINDING *b = bindings;
1461 if (!b)
1462 break;
1463 bindings = b->nextTagBinding;
1464 FREE(b->uri);
1465 FREE(b);
1466 }
1467}
1468
Fred Drake08317ae2003-10-21 15:38:55 +00001469void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001470XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001471{
Fred Drake31d485c2004-08-03 07:06:22 +00001472 TAG *tagList;
1473 OPEN_INTERNAL_ENTITY *entityList;
1474 if (parser == NULL)
1475 return;
1476 /* free tagStack and freeTagList */
1477 tagList = tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001478 for (;;) {
1479 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001480 if (tagList == NULL) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001481 if (freeTagList == NULL)
1482 break;
Fred Drake31d485c2004-08-03 07:06:22 +00001483 tagList = freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001484 freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001485 }
Fred Drake31d485c2004-08-03 07:06:22 +00001486 p = tagList;
1487 tagList = tagList->parent;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001488 FREE(p->buf);
1489 destroyBindings(p->bindings, parser);
1490 FREE(p);
1491 }
Fred Drake31d485c2004-08-03 07:06:22 +00001492 /* free openInternalEntities and freeInternalEntities */
1493 entityList = openInternalEntities;
1494 for (;;) {
1495 OPEN_INTERNAL_ENTITY *openEntity;
1496 if (entityList == NULL) {
1497 if (freeInternalEntities == NULL)
1498 break;
1499 entityList = freeInternalEntities;
1500 freeInternalEntities = NULL;
1501 }
1502 openEntity = entityList;
1503 entityList = entityList->next;
1504 FREE(openEntity);
1505 }
1506
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001507 destroyBindings(freeBindingList, parser);
1508 destroyBindings(inheritedBindings, parser);
1509 poolDestroy(&tempPool);
1510 poolDestroy(&temp2Pool);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001511 FREE((void *)protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001512#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001513 /* external parameter entity parsers share the DTD structure
1514 parser->m_dtd with the root parser, so we must not destroy it
1515 */
1516 if (!isParamEntity && _dtd)
1517#else
1518 if (_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001519#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001520 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001521 FREE((void *)atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001522#ifdef XML_ATTR_INFO
1523 FREE((void *)attInfo);
1524#endif
Fred Drake08317ae2003-10-21 15:38:55 +00001525 FREE(groupConnector);
1526 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001527 FREE(dataBuf);
Fred Drake08317ae2003-10-21 15:38:55 +00001528 FREE(nsAtts);
1529 FREE(unknownEncodingMem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001530 if (unknownEncodingRelease)
1531 unknownEncodingRelease(unknownEncodingData);
1532 FREE(parser);
1533}
1534
Fred Drake08317ae2003-10-21 15:38:55 +00001535void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001536XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001537{
Victor Stinner5ff71322017-06-21 14:39:22 +02001538 if (parser != NULL)
1539 handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001540}
1541
Fred Drake08317ae2003-10-21 15:38:55 +00001542enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001543XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1544{
Victor Stinner5ff71322017-06-21 14:39:22 +02001545 if (parser == NULL)
1546 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001547#ifdef XML_DTD
1548 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001549 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001550 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1551 useForeignDTD = useDTD;
1552 return XML_ERROR_NONE;
1553#else
1554 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1555#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001556}
1557
Fred Drake08317ae2003-10-21 15:38:55 +00001558void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001559XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1560{
Victor Stinner5ff71322017-06-21 14:39:22 +02001561 if (parser == NULL)
1562 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001563 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001564 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001565 return;
1566 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1567}
1568
Fred Drake08317ae2003-10-21 15:38:55 +00001569void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001570XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001571{
Victor Stinner5ff71322017-06-21 14:39:22 +02001572 if (parser == NULL)
1573 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001574 if (handlerArg == userData)
1575 handlerArg = userData = p;
1576 else
1577 userData = p;
1578}
1579
Fred Drake08317ae2003-10-21 15:38:55 +00001580enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001581XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001582{
Victor Stinner5ff71322017-06-21 14:39:22 +02001583 if (parser == NULL)
1584 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001585 if (p) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001586 p = poolCopyString(&_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001587 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001588 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001589 curBase = p;
1590 }
1591 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001592 curBase = NULL;
1593 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001594}
1595
Fred Drake08317ae2003-10-21 15:38:55 +00001596const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001597XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001598{
Victor Stinner5ff71322017-06-21 14:39:22 +02001599 if (parser == NULL)
1600 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001601 return curBase;
1602}
1603
Fred Drake08317ae2003-10-21 15:38:55 +00001604int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001605XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001606{
Victor Stinner5ff71322017-06-21 14:39:22 +02001607 if (parser == NULL)
1608 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001609 return nSpecifiedAtts;
1610}
1611
Fred Drake08317ae2003-10-21 15:38:55 +00001612int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001613XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001614{
Victor Stinner5ff71322017-06-21 14:39:22 +02001615 if (parser == NULL)
1616 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001617 return idAttIndex;
1618}
1619
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001620#ifdef XML_ATTR_INFO
1621const XML_AttrInfo * XMLCALL
1622XML_GetAttributeInfo(XML_Parser parser)
1623{
Victor Stinner5ff71322017-06-21 14:39:22 +02001624 if (parser == NULL)
1625 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001626 return attInfo;
1627}
1628#endif
1629
Fred Drake08317ae2003-10-21 15:38:55 +00001630void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001631XML_SetElementHandler(XML_Parser parser,
1632 XML_StartElementHandler start,
1633 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001634{
Victor Stinner5ff71322017-06-21 14:39:22 +02001635 if (parser == NULL)
1636 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001637 startElementHandler = start;
1638 endElementHandler = end;
1639}
1640
Fred Drake08317ae2003-10-21 15:38:55 +00001641void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001642XML_SetStartElementHandler(XML_Parser parser,
1643 XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001644 if (parser != NULL)
1645 startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001646}
1647
Fred Drake08317ae2003-10-21 15:38:55 +00001648void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001649XML_SetEndElementHandler(XML_Parser parser,
1650 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001651 if (parser != NULL)
1652 endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001653}
1654
Fred Drake08317ae2003-10-21 15:38:55 +00001655void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001656XML_SetCharacterDataHandler(XML_Parser parser,
1657 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001658{
Victor Stinner5ff71322017-06-21 14:39:22 +02001659 if (parser != NULL)
1660 characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001661}
1662
Fred Drake08317ae2003-10-21 15:38:55 +00001663void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001664XML_SetProcessingInstructionHandler(XML_Parser parser,
1665 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001666{
Victor Stinner5ff71322017-06-21 14:39:22 +02001667 if (parser != NULL)
1668 processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001669}
1670
Fred Drake08317ae2003-10-21 15:38:55 +00001671void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001672XML_SetCommentHandler(XML_Parser parser,
1673 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001674{
Victor Stinner5ff71322017-06-21 14:39:22 +02001675 if (parser != NULL)
1676 commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001677}
1678
Fred Drake08317ae2003-10-21 15:38:55 +00001679void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001680XML_SetCdataSectionHandler(XML_Parser parser,
1681 XML_StartCdataSectionHandler start,
1682 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001683{
Victor Stinner5ff71322017-06-21 14:39:22 +02001684 if (parser == NULL)
1685 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001686 startCdataSectionHandler = start;
1687 endCdataSectionHandler = end;
1688}
1689
Fred Drake08317ae2003-10-21 15:38:55 +00001690void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001691XML_SetStartCdataSectionHandler(XML_Parser parser,
1692 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001693 if (parser != NULL)
1694 startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001695}
1696
Fred Drake08317ae2003-10-21 15:38:55 +00001697void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001698XML_SetEndCdataSectionHandler(XML_Parser parser,
1699 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001700 if (parser != NULL)
1701 endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001702}
1703
Fred Drake08317ae2003-10-21 15:38:55 +00001704void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001705XML_SetDefaultHandler(XML_Parser parser,
1706 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001707{
Victor Stinner5ff71322017-06-21 14:39:22 +02001708 if (parser == NULL)
1709 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001710 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001711 defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001712}
1713
Fred Drake08317ae2003-10-21 15:38:55 +00001714void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001715XML_SetDefaultHandlerExpand(XML_Parser parser,
1716 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001717{
Victor Stinner5ff71322017-06-21 14:39:22 +02001718 if (parser == NULL)
1719 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001720 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001721 defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001722}
1723
Fred Drake08317ae2003-10-21 15:38:55 +00001724void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001725XML_SetDoctypeDeclHandler(XML_Parser parser,
1726 XML_StartDoctypeDeclHandler start,
1727 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001728{
Victor Stinner5ff71322017-06-21 14:39:22 +02001729 if (parser == NULL)
1730 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001731 startDoctypeDeclHandler = start;
1732 endDoctypeDeclHandler = end;
1733}
1734
Fred Drake08317ae2003-10-21 15:38:55 +00001735void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001736XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1737 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001738 if (parser != NULL)
1739 startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001740}
1741
Fred Drake08317ae2003-10-21 15:38:55 +00001742void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001743XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1744 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001745 if (parser != NULL)
1746 endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001747}
1748
Fred Drake08317ae2003-10-21 15:38:55 +00001749void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001750XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1751 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001752{
Victor Stinner5ff71322017-06-21 14:39:22 +02001753 if (parser != NULL)
1754 unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001755}
1756
Fred Drake08317ae2003-10-21 15:38:55 +00001757void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001758XML_SetNotationDeclHandler(XML_Parser parser,
1759 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001760{
Victor Stinner5ff71322017-06-21 14:39:22 +02001761 if (parser != NULL)
1762 notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001763}
1764
Fred Drake08317ae2003-10-21 15:38:55 +00001765void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001766XML_SetNamespaceDeclHandler(XML_Parser parser,
1767 XML_StartNamespaceDeclHandler start,
1768 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001769{
Victor Stinner5ff71322017-06-21 14:39:22 +02001770 if (parser == NULL)
1771 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001772 startNamespaceDeclHandler = start;
1773 endNamespaceDeclHandler = end;
1774}
1775
Fred Drake08317ae2003-10-21 15:38:55 +00001776void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001777XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1778 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001779 if (parser != NULL)
1780 startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001781}
1782
Fred Drake08317ae2003-10-21 15:38:55 +00001783void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001784XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1785 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001786 if (parser != NULL)
1787 endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001788}
1789
Fred Drake08317ae2003-10-21 15:38:55 +00001790void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001791XML_SetNotStandaloneHandler(XML_Parser parser,
1792 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001793{
Victor Stinner5ff71322017-06-21 14:39:22 +02001794 if (parser != NULL)
1795 notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001796}
1797
Fred Drake08317ae2003-10-21 15:38:55 +00001798void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001799XML_SetExternalEntityRefHandler(XML_Parser parser,
1800 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001801{
Victor Stinner5ff71322017-06-21 14:39:22 +02001802 if (parser != NULL)
1803 externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001804}
1805
Fred Drake08317ae2003-10-21 15:38:55 +00001806void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001807XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001808{
Victor Stinner5ff71322017-06-21 14:39:22 +02001809 if (parser == NULL)
1810 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001811 if (arg)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001812 externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001813 else
1814 externalEntityRefHandlerArg = parser;
1815}
1816
Fred Drake08317ae2003-10-21 15:38:55 +00001817void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001818XML_SetSkippedEntityHandler(XML_Parser parser,
1819 XML_SkippedEntityHandler handler)
1820{
Victor Stinner5ff71322017-06-21 14:39:22 +02001821 if (parser != NULL)
1822 skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001823}
1824
Fred Drake08317ae2003-10-21 15:38:55 +00001825void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001826XML_SetUnknownEncodingHandler(XML_Parser parser,
1827 XML_UnknownEncodingHandler handler,
1828 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001829{
Victor Stinner5ff71322017-06-21 14:39:22 +02001830 if (parser == NULL)
1831 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001832 unknownEncodingHandler = handler;
1833 unknownEncodingHandlerData = data;
1834}
1835
Fred Drake08317ae2003-10-21 15:38:55 +00001836void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001837XML_SetElementDeclHandler(XML_Parser parser,
1838 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001839{
Victor Stinner5ff71322017-06-21 14:39:22 +02001840 if (parser != NULL)
1841 elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001842}
1843
Fred Drake08317ae2003-10-21 15:38:55 +00001844void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001845XML_SetAttlistDeclHandler(XML_Parser parser,
1846 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001847{
Victor Stinner5ff71322017-06-21 14:39:22 +02001848 if (parser != NULL)
1849 attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001850}
1851
Fred Drake08317ae2003-10-21 15:38:55 +00001852void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001853XML_SetEntityDeclHandler(XML_Parser parser,
1854 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001855{
Victor Stinner5ff71322017-06-21 14:39:22 +02001856 if (parser != NULL)
1857 entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001858}
1859
Fred Drake08317ae2003-10-21 15:38:55 +00001860void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001861XML_SetXmlDeclHandler(XML_Parser parser,
1862 XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001863 if (parser != NULL)
1864 xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001865}
1866
Fred Drake08317ae2003-10-21 15:38:55 +00001867int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001868XML_SetParamEntityParsing(XML_Parser parser,
1869 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001870{
Victor Stinner5ff71322017-06-21 14:39:22 +02001871 if (parser == NULL)
1872 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001873 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001874 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001875 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001876#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001877 paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001878 return 1;
1879#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001880 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001881#endif
1882}
1883
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001884int XMLCALL
1885XML_SetHashSalt(XML_Parser parser,
1886 unsigned long hash_salt)
1887{
Victor Stinner5ff71322017-06-21 14:39:22 +02001888 if (parser == NULL)
1889 return 0;
1890 if (parser->m_parentParser)
1891 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001892 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1893 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1894 return 0;
1895 hash_secret_salt = hash_salt;
1896 return 1;
1897}
1898
Fred Drake08317ae2003-10-21 15:38:55 +00001899enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001900XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001901{
Victor Stinner5ff71322017-06-21 14:39:22 +02001902 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001903 if (parser != NULL)
1904 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001905 return XML_STATUS_ERROR;
1906 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001907 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001908 case XML_SUSPENDED:
1909 errorCode = XML_ERROR_SUSPENDED;
1910 return XML_STATUS_ERROR;
1911 case XML_FINISHED:
1912 errorCode = XML_ERROR_FINISHED;
1913 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001914 case XML_INITIALIZED:
1915 if (parentParser == NULL && !startParsing(parser)) {
1916 errorCode = XML_ERROR_NO_MEMORY;
1917 return XML_STATUS_ERROR;
1918 }
Fred Drake31d485c2004-08-03 07:06:22 +00001919 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001920 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001921 }
1922
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001923 if (len == 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001924 ps_finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001925 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001926 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001927 positionPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001928 parseEndPtr = bufferEnd;
1929
1930 /* If data are left over from last buffer, and we now know that these
1931 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001932 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001933 */
1934 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1935
1936 if (errorCode == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001937 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001938 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001939 /* It is hard to be certain, but it seems that this case
1940 * cannot occur. This code is cleaning up a previous parse
1941 * with no new data (since len == 0). Changing the parsing
1942 * state requires getting to execute a handler function, and
1943 * there doesn't seem to be an opportunity for that while in
1944 * this circumstance.
1945 *
1946 * Given the uncertainty, we retain the code but exclude it
1947 * from coverage tests.
1948 *
1949 * LCOV_EXCL_START
1950 */
Fred Drake31d485c2004-08-03 07:06:22 +00001951 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1952 positionPtr = bufferPtr;
1953 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001954 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001955 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001956 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001957 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001958 /* fall through */
1959 default:
1960 return XML_STATUS_OK;
1961 }
1962 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001963 eventEndPtr = eventPtr;
1964 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001965 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001966 }
1967#ifndef XML_CONTEXT_BYTES
1968 else if (bufferPtr == bufferEnd) {
1969 const char *end;
1970 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001971 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001972 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1973 if (len > ((XML_Size)-1) / 2 - parseEndByteIndex) {
1974 errorCode = XML_ERROR_NO_MEMORY;
1975 eventPtr = eventEndPtr = NULL;
1976 processor = errorProcessor;
1977 return XML_STATUS_ERROR;
1978 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001979 parseEndByteIndex += len;
1980 positionPtr = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001981 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001982
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001983 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001984
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001985 if (errorCode != XML_ERROR_NONE) {
1986 eventEndPtr = eventPtr;
1987 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001988 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001989 }
Fred Drake31d485c2004-08-03 07:06:22 +00001990 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001991 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001992 case XML_SUSPENDED:
1993 result = XML_STATUS_SUSPENDED;
1994 break;
1995 case XML_INITIALIZED:
1996 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001997 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001998 ps_parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001999 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002000 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002001 /* fall through */
2002 default:
2003 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002004 }
2005 }
2006
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002007 XmlUpdatePosition(encoding, positionPtr, end, &position);
2008 nLeftOver = s + len - end;
2009 if (nLeftOver) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002010 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002011 /* avoid _signed_ integer overflow */
2012 char *temp = NULL;
2013 const int bytesToAllocate = (int)((unsigned)len * 2U);
2014 if (bytesToAllocate > 0) {
2015 temp = (buffer == NULL
2016 ? (char *)MALLOC(bytesToAllocate)
2017 : (char *)REALLOC(buffer, bytesToAllocate));
2018 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002019 if (temp == NULL) {
2020 errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002021 eventPtr = eventEndPtr = NULL;
2022 processor = errorProcessor;
2023 return XML_STATUS_ERROR;
2024 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002025 buffer = temp;
Victor Stinner5ff71322017-06-21 14:39:22 +02002026 bufferLim = buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002027 }
2028 memcpy(buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002029 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002030 bufferPtr = buffer;
2031 bufferEnd = buffer + nLeftOver;
2032 positionPtr = bufferPtr;
2033 parseEndPtr = bufferEnd;
2034 eventPtr = bufferPtr;
2035 eventEndPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002036 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002037 }
2038#endif /* not defined XML_CONTEXT_BYTES */
2039 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002040 void *buff = XML_GetBuffer(parser, len);
2041 if (buff == NULL)
2042 return XML_STATUS_ERROR;
2043 else {
2044 memcpy(buff, s, len);
2045 return XML_ParseBuffer(parser, len, isFinal);
2046 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002047 }
2048}
2049
Fred Drake08317ae2003-10-21 15:38:55 +00002050enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002051XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002052{
Fred Drake31d485c2004-08-03 07:06:22 +00002053 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002054 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002055
Victor Stinner5ff71322017-06-21 14:39:22 +02002056 if (parser == NULL)
2057 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002058 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002059 case XML_SUSPENDED:
2060 errorCode = XML_ERROR_SUSPENDED;
2061 return XML_STATUS_ERROR;
2062 case XML_FINISHED:
2063 errorCode = XML_ERROR_FINISHED;
2064 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002065 case XML_INITIALIZED:
2066 if (parentParser == NULL && !startParsing(parser)) {
2067 errorCode = XML_ERROR_NO_MEMORY;
2068 return XML_STATUS_ERROR;
2069 }
Fred Drake31d485c2004-08-03 07:06:22 +00002070 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002071 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002072 }
2073
2074 start = bufferPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002075 positionPtr = start;
2076 bufferEnd += len;
Fred Drake31d485c2004-08-03 07:06:22 +00002077 parseEndPtr = bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002078 parseEndByteIndex += len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002079 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00002080
2081 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
2082
2083 if (errorCode != XML_ERROR_NONE) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002084 eventEndPtr = eventPtr;
2085 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002086 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002087 }
Fred Drake31d485c2004-08-03 07:06:22 +00002088 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002089 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002090 case XML_SUSPENDED:
2091 result = XML_STATUS_SUSPENDED;
2092 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002093 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002094 case XML_PARSING:
2095 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002096 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002097 return result;
2098 }
2099 default: ; /* should not happen */
2100 }
2101 }
2102
2103 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
2104 positionPtr = bufferPtr;
2105 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002106}
2107
Fred Drake08317ae2003-10-21 15:38:55 +00002108void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002109XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002110{
Victor Stinner5ff71322017-06-21 14:39:22 +02002111 if (parser == NULL)
2112 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002113 if (len < 0) {
2114 errorCode = XML_ERROR_NO_MEMORY;
2115 return NULL;
2116 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002117 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002118 case XML_SUSPENDED:
2119 errorCode = XML_ERROR_SUSPENDED;
2120 return NULL;
2121 case XML_FINISHED:
2122 errorCode = XML_ERROR_FINISHED;
2123 return NULL;
2124 default: ;
2125 }
2126
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002127 if (len > bufferLim - bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002128#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002129 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002130#endif /* defined XML_CONTEXT_BYTES */
2131 /* Do not invoke signed arithmetic overflow: */
2132 int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002133 if (neededSize < 0) {
2134 errorCode = XML_ERROR_NO_MEMORY;
2135 return NULL;
2136 }
2137#ifdef XML_CONTEXT_BYTES
2138 keep = (int)(bufferPtr - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002139 if (keep > XML_CONTEXT_BYTES)
2140 keep = XML_CONTEXT_BYTES;
2141 neededSize += keep;
2142#endif /* defined XML_CONTEXT_BYTES */
2143 if (neededSize <= bufferLim - buffer) {
2144#ifdef XML_CONTEXT_BYTES
2145 if (keep < bufferPtr - buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002146 int offset = (int)(bufferPtr - buffer) - keep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002147 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
2148 bufferEnd -= offset;
2149 bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002150 }
2151#else
2152 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
2153 bufferEnd = buffer + (bufferEnd - bufferPtr);
2154 bufferPtr = buffer;
2155#endif /* not defined XML_CONTEXT_BYTES */
2156 }
2157 else {
2158 char *newBuf;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002159 int bufferSize = (int)(bufferLim - bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002160 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002161 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002162 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02002163 /* Do not invoke signed arithmetic overflow: */
2164 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002165 } while (bufferSize < neededSize && bufferSize > 0);
2166 if (bufferSize <= 0) {
2167 errorCode = XML_ERROR_NO_MEMORY;
2168 return NULL;
2169 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002170 newBuf = (char *)MALLOC(bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002171 if (newBuf == 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002172 errorCode = XML_ERROR_NO_MEMORY;
2173 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002174 }
2175 bufferLim = newBuf + bufferSize;
2176#ifdef XML_CONTEXT_BYTES
2177 if (bufferPtr) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002178 int keep = (int)(bufferPtr - buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002179 if (keep > XML_CONTEXT_BYTES)
2180 keep = XML_CONTEXT_BYTES;
2181 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
2182 FREE(buffer);
2183 buffer = newBuf;
2184 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
2185 bufferPtr = buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002186 }
2187 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002188 bufferEnd = newBuf + (bufferEnd - bufferPtr);
2189 bufferPtr = buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002190 }
2191#else
2192 if (bufferPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002193 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
2194 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002195 }
2196 bufferEnd = newBuf + (bufferEnd - bufferPtr);
2197 bufferPtr = buffer = newBuf;
2198#endif /* not defined XML_CONTEXT_BYTES */
2199 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002200 eventPtr = eventEndPtr = NULL;
2201 positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002202 }
2203 return bufferEnd;
2204}
2205
Fred Drake31d485c2004-08-03 07:06:22 +00002206enum XML_Status XMLCALL
2207XML_StopParser(XML_Parser parser, XML_Bool resumable)
2208{
Victor Stinner5ff71322017-06-21 14:39:22 +02002209 if (parser == NULL)
2210 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002211 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002212 case XML_SUSPENDED:
2213 if (resumable) {
2214 errorCode = XML_ERROR_SUSPENDED;
2215 return XML_STATUS_ERROR;
2216 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002217 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002218 break;
2219 case XML_FINISHED:
2220 errorCode = XML_ERROR_FINISHED;
2221 return XML_STATUS_ERROR;
2222 default:
2223 if (resumable) {
2224#ifdef XML_DTD
2225 if (isParamEntity) {
2226 errorCode = XML_ERROR_SUSPEND_PE;
2227 return XML_STATUS_ERROR;
2228 }
2229#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002230 ps_parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002231 }
2232 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002233 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002234 }
2235 return XML_STATUS_OK;
2236}
2237
2238enum XML_Status XMLCALL
2239XML_ResumeParser(XML_Parser parser)
2240{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002241 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002242
Victor Stinner5ff71322017-06-21 14:39:22 +02002243 if (parser == NULL)
2244 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002245 if (ps_parsing != XML_SUSPENDED) {
Fred Drake31d485c2004-08-03 07:06:22 +00002246 errorCode = XML_ERROR_NOT_SUSPENDED;
2247 return XML_STATUS_ERROR;
2248 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002249 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002250
2251 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
2252
2253 if (errorCode != XML_ERROR_NONE) {
2254 eventEndPtr = eventPtr;
2255 processor = errorProcessor;
2256 return XML_STATUS_ERROR;
2257 }
2258 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002259 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002260 case XML_SUSPENDED:
2261 result = XML_STATUS_SUSPENDED;
2262 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002263 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002264 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002265 if (ps_finalBuffer) {
2266 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002267 return result;
2268 }
2269 default: ;
2270 }
2271 }
2272
2273 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
2274 positionPtr = bufferPtr;
2275 return result;
2276}
2277
2278void XMLCALL
2279XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2280{
Victor Stinner5ff71322017-06-21 14:39:22 +02002281 if (parser == NULL)
2282 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002283 assert(status != NULL);
2284 *status = parser->m_parsingStatus;
2285}
2286
Fred Drake08317ae2003-10-21 15:38:55 +00002287enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002288XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002289{
Victor Stinner5ff71322017-06-21 14:39:22 +02002290 if (parser == NULL)
2291 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002292 return errorCode;
2293}
2294
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002295XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002296XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002297{
Victor Stinner5ff71322017-06-21 14:39:22 +02002298 if (parser == NULL)
2299 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002300 if (eventPtr)
Victor Stinner23ec4b52017-06-15 00:54:36 +02002301 return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002302 return -1;
2303}
2304
Fred Drake08317ae2003-10-21 15:38:55 +00002305int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002306XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002307{
Victor Stinner5ff71322017-06-21 14:39:22 +02002308 if (parser == NULL)
2309 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002310 if (eventEndPtr && eventPtr)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002311 return (int)(eventEndPtr - eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002312 return 0;
2313}
2314
Fred Drake08317ae2003-10-21 15:38:55 +00002315const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002316XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002317{
2318#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002319 if (parser == NULL)
2320 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002321 if (eventPtr && buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002322 if (offset != NULL)
2323 *offset = (int)(eventPtr - buffer);
2324 if (size != NULL)
2325 *size = (int)(bufferEnd - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002326 return buffer;
2327 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002328#else
2329 (void)parser;
2330 (void)offset;
2331 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002332#endif /* defined XML_CONTEXT_BYTES */
2333 return (char *) 0;
2334}
2335
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002336XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002337XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002338{
Victor Stinner5ff71322017-06-21 14:39:22 +02002339 if (parser == NULL)
2340 return 0;
Fred Drake31d485c2004-08-03 07:06:22 +00002341 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002342 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
2343 positionPtr = eventPtr;
2344 }
2345 return position.lineNumber + 1;
2346}
2347
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002348XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002349XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002350{
Victor Stinner5ff71322017-06-21 14:39:22 +02002351 if (parser == NULL)
2352 return 0;
Fred Drake31d485c2004-08-03 07:06:22 +00002353 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002354 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
2355 positionPtr = eventPtr;
2356 }
2357 return position.columnNumber;
2358}
2359
Fred Drake08317ae2003-10-21 15:38:55 +00002360void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002361XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2362{
Victor Stinner5ff71322017-06-21 14:39:22 +02002363 if (parser != NULL)
2364 FREE(model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002365}
2366
Fred Drake08317ae2003-10-21 15:38:55 +00002367void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002368XML_MemMalloc(XML_Parser parser, size_t size)
2369{
Victor Stinner5ff71322017-06-21 14:39:22 +02002370 if (parser == NULL)
2371 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002372 return MALLOC(size);
2373}
2374
Fred Drake08317ae2003-10-21 15:38:55 +00002375void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002376XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2377{
Victor Stinner5ff71322017-06-21 14:39:22 +02002378 if (parser == NULL)
2379 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002380 return REALLOC(ptr, size);
2381}
2382
Fred Drake08317ae2003-10-21 15:38:55 +00002383void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002384XML_MemFree(XML_Parser parser, void *ptr)
2385{
Victor Stinner5ff71322017-06-21 14:39:22 +02002386 if (parser != NULL)
2387 FREE(ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002388}
2389
Fred Drake08317ae2003-10-21 15:38:55 +00002390void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002391XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002392{
Victor Stinner5ff71322017-06-21 14:39:22 +02002393 if (parser == NULL)
2394 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002395 if (defaultHandler) {
2396 if (openInternalEntities)
2397 reportDefault(parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002398 internalEncoding,
2399 openInternalEntities->internalEventPtr,
2400 openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002401 else
2402 reportDefault(parser, encoding, eventPtr, eventEndPtr);
2403 }
2404}
2405
Fred Drake08317ae2003-10-21 15:38:55 +00002406const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002407XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002408{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002409 static const XML_LChar* const message[] = {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002410 0,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002411 XML_L("out of memory"),
2412 XML_L("syntax error"),
2413 XML_L("no element found"),
2414 XML_L("not well-formed (invalid token)"),
2415 XML_L("unclosed token"),
2416 XML_L("partial character"),
2417 XML_L("mismatched tag"),
2418 XML_L("duplicate attribute"),
2419 XML_L("junk after document element"),
2420 XML_L("illegal parameter entity reference"),
2421 XML_L("undefined entity"),
2422 XML_L("recursive entity reference"),
2423 XML_L("asynchronous entity"),
2424 XML_L("reference to invalid character number"),
2425 XML_L("reference to binary entity"),
2426 XML_L("reference to external entity in attribute"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002427 XML_L("XML or text declaration not at start of entity"),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002428 XML_L("unknown encoding"),
2429 XML_L("encoding specified in XML declaration is incorrect"),
2430 XML_L("unclosed CDATA section"),
2431 XML_L("error in processing external entity reference"),
2432 XML_L("document is not standalone"),
2433 XML_L("unexpected parser state - please send a bug report"),
2434 XML_L("entity declared in parameter entity"),
2435 XML_L("requested feature requires XML_DTD support in Expat"),
Fred Drake08317ae2003-10-21 15:38:55 +00002436 XML_L("cannot change setting once parsing has begun"),
Fred Drake31d485c2004-08-03 07:06:22 +00002437 XML_L("unbound prefix"),
2438 XML_L("must not undeclare prefix"),
2439 XML_L("incomplete markup in parameter entity"),
2440 XML_L("XML declaration not well-formed"),
2441 XML_L("text declaration not well-formed"),
2442 XML_L("illegal character(s) in public id"),
2443 XML_L("parser suspended"),
2444 XML_L("parser not suspended"),
2445 XML_L("parsing aborted"),
2446 XML_L("parsing finished"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002447 XML_L("cannot suspend in external parameter entity"),
2448 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
2449 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
2450 XML_L("prefix must not be bound to one of the reserved namespace names")
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002451 };
2452 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
2453 return message[code];
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002454 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002455}
2456
Fred Drake08317ae2003-10-21 15:38:55 +00002457const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002458XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002459
2460 /* V1 is used to string-ize the version number. However, it would
2461 string-ize the actual version macro *names* unless we get them
2462 substituted before being passed to V1. CPP is defined to expand
2463 a macro, then rescan for more expansions. Thus, we use V2 to expand
2464 the version macros, then CPP will expand the resulting V1() macro
2465 with the correct numerals. */
2466 /* ### I'm assuming cpp is portable in this respect... */
2467
2468#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2469#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2470
2471 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2472
2473#undef V1
2474#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002475}
2476
Fred Drake08317ae2003-10-21 15:38:55 +00002477XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002478XML_ExpatVersionInfo(void)
2479{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002480 XML_Expat_Version version;
2481
2482 version.major = XML_MAJOR_VERSION;
2483 version.minor = XML_MINOR_VERSION;
2484 version.micro = XML_MICRO_VERSION;
2485
2486 return version;
2487}
2488
Fred Drake08317ae2003-10-21 15:38:55 +00002489const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002490XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002491{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002492 static const XML_Feature features[] = {
2493 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2494 sizeof(XML_Char)},
2495 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2496 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002497#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002498 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002499#endif
2500#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002501 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002502#endif
2503#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002504 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002505#endif
2506#ifdef XML_CONTEXT_BYTES
2507 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2508 XML_CONTEXT_BYTES},
2509#endif
2510#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002511 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002512#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002513#ifdef XML_NS
2514 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2515#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002516#ifdef XML_LARGE_SIZE
2517 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2518#endif
2519#ifdef XML_ATTR_INFO
2520 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2521#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002522 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002523 };
2524
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002525 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002526}
2527
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002528/* Initially tag->rawName always points into the parse buffer;
2529 for those TAG instances opened while the current parse buffer was
2530 processed, and not yet closed, we need to store tag->rawName in a more
2531 permanent location, since the parse buffer is about to be discarded.
2532*/
2533static XML_Bool
2534storeRawNames(XML_Parser parser)
2535{
2536 TAG *tag = tagStack;
2537 while (tag) {
2538 int bufSize;
2539 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2540 char *rawNameBuf = tag->buf + nameLen;
2541 /* Stop if already stored. Since tagStack is a stack, we can stop
2542 at the first entry that has already been copied; everything
2543 below it in the stack is already been accounted for in a
2544 previous call to this function.
2545 */
2546 if (tag->rawName == rawNameBuf)
2547 break;
2548 /* For re-use purposes we need to ensure that the
2549 size of tag->buf is a multiple of sizeof(XML_Char).
2550 */
2551 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2552 if (bufSize > tag->bufEnd - tag->buf) {
2553 char *temp = (char *)REALLOC(tag->buf, bufSize);
2554 if (temp == NULL)
2555 return XML_FALSE;
2556 /* if tag->name.str points to tag->buf (only when namespace
2557 processing is off) then we have to update it
2558 */
2559 if (tag->name.str == (XML_Char *)tag->buf)
2560 tag->name.str = (XML_Char *)temp;
2561 /* if tag->name.localPart is set (when namespace processing is on)
2562 then update it as well, since it will always point into tag->buf
2563 */
2564 if (tag->name.localPart)
2565 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2566 (XML_Char *)tag->buf);
2567 tag->buf = temp;
2568 tag->bufEnd = temp + bufSize;
2569 rawNameBuf = temp + nameLen;
2570 }
2571 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2572 tag->rawName = rawNameBuf;
2573 tag = tag->parent;
2574 }
2575 return XML_TRUE;
2576}
2577
2578static enum XML_Error PTRCALL
2579contentProcessor(XML_Parser parser,
2580 const char *start,
2581 const char *end,
2582 const char **endPtr)
2583{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002584 enum XML_Error result = doContent(parser, 0, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002585 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002586 if (result == XML_ERROR_NONE) {
2587 if (!storeRawNames(parser))
2588 return XML_ERROR_NO_MEMORY;
2589 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002590 return result;
2591}
2592
2593static enum XML_Error PTRCALL
2594externalEntityInitProcessor(XML_Parser parser,
2595 const char *start,
2596 const char *end,
2597 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002598{
2599 enum XML_Error result = initializeEncoding(parser);
2600 if (result != XML_ERROR_NONE)
2601 return result;
2602 processor = externalEntityInitProcessor2;
2603 return externalEntityInitProcessor2(parser, start, end, endPtr);
2604}
2605
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002606static enum XML_Error PTRCALL
2607externalEntityInitProcessor2(XML_Parser parser,
2608 const char *start,
2609 const char *end,
2610 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002611{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002612 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002613 int tok = XmlContentTok(encoding, start, end, &next);
2614 switch (tok) {
2615 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002616 /* If we are at the end of the buffer, this would cause the next stage,
2617 i.e. externalEntityInitProcessor3, to pass control directly to
2618 doContent (by detecting XML_TOK_NONE) without processing any xml text
2619 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2620 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002621 if (next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002622 *endPtr = next;
2623 return XML_ERROR_NONE;
2624 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002625 start = next;
2626 break;
2627 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002628 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002629 *endPtr = start;
2630 return XML_ERROR_NONE;
2631 }
2632 eventPtr = start;
2633 return XML_ERROR_UNCLOSED_TOKEN;
2634 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002635 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002636 *endPtr = start;
2637 return XML_ERROR_NONE;
2638 }
2639 eventPtr = start;
2640 return XML_ERROR_PARTIAL_CHAR;
2641 }
2642 processor = externalEntityInitProcessor3;
2643 return externalEntityInitProcessor3(parser, start, end, endPtr);
2644}
2645
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002646static enum XML_Error PTRCALL
2647externalEntityInitProcessor3(XML_Parser parser,
2648 const char *start,
2649 const char *end,
2650 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002651{
Fred Drake31d485c2004-08-03 07:06:22 +00002652 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002653 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Fred Drake31d485c2004-08-03 07:06:22 +00002654 eventPtr = start;
2655 tok = XmlContentTok(encoding, start, end, &next);
2656 eventEndPtr = next;
2657
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002658 switch (tok) {
2659 case XML_TOK_XML_DECL:
2660 {
Fred Drake31d485c2004-08-03 07:06:22 +00002661 enum XML_Error result;
2662 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002663 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002664 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002665 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002666 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002667 *endPtr = next;
2668 return XML_ERROR_NONE;
2669 case XML_FINISHED:
2670 return XML_ERROR_ABORTED;
2671 default:
2672 start = next;
2673 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002674 }
2675 break;
2676 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002677 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002678 *endPtr = start;
2679 return XML_ERROR_NONE;
2680 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002681 return XML_ERROR_UNCLOSED_TOKEN;
2682 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002683 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002684 *endPtr = start;
2685 return XML_ERROR_NONE;
2686 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002687 return XML_ERROR_PARTIAL_CHAR;
2688 }
2689 processor = externalEntityContentProcessor;
2690 tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002691 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002692}
2693
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002694static enum XML_Error PTRCALL
2695externalEntityContentProcessor(XML_Parser parser,
2696 const char *start,
2697 const char *end,
2698 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002699{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002700 enum XML_Error result = doContent(parser, 1, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002701 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002702 if (result == XML_ERROR_NONE) {
2703 if (!storeRawNames(parser))
2704 return XML_ERROR_NO_MEMORY;
2705 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002706 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002707}
2708
2709static enum XML_Error
2710doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002711 int startTagLevel,
2712 const ENCODING *enc,
2713 const char *s,
2714 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002715 const char **nextPtr,
2716 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002717{
Fred Drake31d485c2004-08-03 07:06:22 +00002718 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002719 DTD * const dtd = _dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002720
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002721 const char **eventPP;
2722 const char **eventEndPP;
2723 if (enc == encoding) {
2724 eventPP = &eventPtr;
2725 eventEndPP = &eventEndPtr;
2726 }
2727 else {
2728 eventPP = &(openInternalEntities->internalEventPtr);
2729 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2730 }
2731 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002732
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002733 for (;;) {
2734 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2735 int tok = XmlContentTok(enc, s, end, &next);
2736 *eventEndPP = next;
2737 switch (tok) {
2738 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002739 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002740 *nextPtr = s;
2741 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002742 }
2743 *eventEndPP = end;
2744 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002745 XML_Char c = 0xA;
2746 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002747 }
2748 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002749 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002750 /* We are at the end of the final buffer, should we check for
2751 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002752 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002753 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002754 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002755 if (tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002756 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002757 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002758 return XML_ERROR_NONE;
2759 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002760 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002761 *nextPtr = s;
2762 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002763 }
2764 if (startTagLevel > 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002765 if (tagLevel != startTagLevel)
2766 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002767 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002768 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002769 }
2770 return XML_ERROR_NO_ELEMENTS;
2771 case XML_TOK_INVALID:
2772 *eventPP = next;
2773 return XML_ERROR_INVALID_TOKEN;
2774 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002775 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002776 *nextPtr = s;
2777 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002778 }
2779 return XML_ERROR_UNCLOSED_TOKEN;
2780 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002781 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002782 *nextPtr = s;
2783 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002784 }
2785 return XML_ERROR_PARTIAL_CHAR;
2786 case XML_TOK_ENTITY_REF:
2787 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002788 const XML_Char *name;
2789 ENTITY *entity;
2790 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2791 s + enc->minBytesPerChar,
2792 next - enc->minBytesPerChar);
2793 if (ch) {
2794 if (characterDataHandler)
2795 characterDataHandler(handlerArg, &ch, 1);
2796 else if (defaultHandler)
2797 reportDefault(parser, enc, s, next);
2798 break;
2799 }
2800 name = poolStoreString(&dtd->pool, enc,
2801 s + enc->minBytesPerChar,
2802 next - enc->minBytesPerChar);
2803 if (!name)
2804 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002805 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002806 poolDiscard(&dtd->pool);
2807 /* First, determine if a check for an existing declaration is needed;
2808 if yes, check that the entity exists, and that it is internal,
2809 otherwise call the skipped entity or default handler.
2810 */
2811 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2812 if (!entity)
2813 return XML_ERROR_UNDEFINED_ENTITY;
2814 else if (!entity->is_internal)
2815 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2816 }
2817 else if (!entity) {
2818 if (skippedEntityHandler)
2819 skippedEntityHandler(handlerArg, name, 0);
2820 else if (defaultHandler)
2821 reportDefault(parser, enc, s, next);
2822 break;
2823 }
2824 if (entity->open)
2825 return XML_ERROR_RECURSIVE_ENTITY_REF;
2826 if (entity->notation)
2827 return XML_ERROR_BINARY_ENTITY_REF;
2828 if (entity->textPtr) {
2829 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002830 if (!defaultExpandInternalEntities) {
2831 if (skippedEntityHandler)
2832 skippedEntityHandler(handlerArg, entity->name, 0);
2833 else if (defaultHandler)
2834 reportDefault(parser, enc, s, next);
2835 break;
2836 }
Fred Drake31d485c2004-08-03 07:06:22 +00002837 result = processInternalEntity(parser, entity, XML_FALSE);
2838 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002839 return result;
2840 }
2841 else if (externalEntityRefHandler) {
2842 const XML_Char *context;
2843 entity->open = XML_TRUE;
2844 context = getContext(parser);
2845 entity->open = XML_FALSE;
2846 if (!context)
2847 return XML_ERROR_NO_MEMORY;
Fred Drake31d485c2004-08-03 07:06:22 +00002848 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002849 context,
2850 entity->base,
2851 entity->systemId,
2852 entity->publicId))
2853 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2854 poolDiscard(&tempPool);
2855 }
2856 else if (defaultHandler)
2857 reportDefault(parser, enc, s, next);
2858 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002859 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002860 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002861 /* fall through */
2862 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002863 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002864 TAG *tag;
2865 enum XML_Error result;
2866 XML_Char *toPtr;
2867 if (freeTagList) {
2868 tag = freeTagList;
2869 freeTagList = freeTagList->parent;
2870 }
2871 else {
2872 tag = (TAG *)MALLOC(sizeof(TAG));
2873 if (!tag)
2874 return XML_ERROR_NO_MEMORY;
2875 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2876 if (!tag->buf) {
2877 FREE(tag);
2878 return XML_ERROR_NO_MEMORY;
2879 }
2880 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2881 }
2882 tag->bindings = NULL;
2883 tag->parent = tagStack;
2884 tagStack = tag;
2885 tag->name.localPart = NULL;
2886 tag->name.prefix = NULL;
2887 tag->rawName = s + enc->minBytesPerChar;
2888 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2889 ++tagLevel;
2890 {
2891 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2892 const char *fromPtr = tag->rawName;
2893 toPtr = (XML_Char *)tag->buf;
2894 for (;;) {
2895 int bufSize;
2896 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002897 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002898 &fromPtr, rawNameEnd,
2899 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002900 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner5ff71322017-06-21 14:39:22 +02002901 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002902 tag->name.strLen = convLen;
2903 break;
2904 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002905 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002906 {
2907 char *temp = (char *)REALLOC(tag->buf, bufSize);
2908 if (temp == NULL)
2909 return XML_ERROR_NO_MEMORY;
2910 tag->buf = temp;
2911 tag->bufEnd = temp + bufSize;
2912 toPtr = (XML_Char *)temp + convLen;
2913 }
2914 }
2915 }
2916 tag->name.str = (XML_Char *)tag->buf;
2917 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002918 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2919 if (result)
2920 return result;
2921 if (startElementHandler)
2922 startElementHandler(handlerArg, tag->name.str,
2923 (const XML_Char **)atts);
2924 else if (defaultHandler)
2925 reportDefault(parser, enc, s, next);
2926 poolClear(&tempPool);
2927 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002928 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002929 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002930 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002931 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2932 {
2933 const char *rawName = s + enc->minBytesPerChar;
2934 enum XML_Error result;
2935 BINDING *bindings = NULL;
2936 XML_Bool noElmHandlers = XML_TRUE;
2937 TAG_NAME name;
2938 name.str = poolStoreString(&tempPool, enc, rawName,
2939 rawName + XmlNameLength(enc, rawName));
2940 if (!name.str)
2941 return XML_ERROR_NO_MEMORY;
2942 poolFinish(&tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002943 result = storeAtts(parser, enc, s, &name, &bindings);
Victor Stinner5ff71322017-06-21 14:39:22 +02002944 if (result != XML_ERROR_NONE) {
2945 freeBindings(parser, bindings);
Fred Drake4faea012003-01-28 06:42:40 +00002946 return result;
Victor Stinner5ff71322017-06-21 14:39:22 +02002947 }
Fred Drake4faea012003-01-28 06:42:40 +00002948 poolFinish(&tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002949 if (startElementHandler) {
2950 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2951 noElmHandlers = XML_FALSE;
2952 }
2953 if (endElementHandler) {
2954 if (startElementHandler)
2955 *eventPP = *eventEndPP;
2956 endElementHandler(handlerArg, name.str);
2957 noElmHandlers = XML_FALSE;
2958 }
2959 if (noElmHandlers && defaultHandler)
2960 reportDefault(parser, enc, s, next);
2961 poolClear(&tempPool);
Victor Stinner5ff71322017-06-21 14:39:22 +02002962 freeBindings(parser, bindings);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002963 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002964 if (tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002965 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002966 break;
2967 case XML_TOK_END_TAG:
2968 if (tagLevel == startTagLevel)
2969 return XML_ERROR_ASYNC_ENTITY;
2970 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002971 int len;
2972 const char *rawName;
2973 TAG *tag = tagStack;
2974 tagStack = tag->parent;
2975 tag->parent = freeTagList;
2976 freeTagList = tag;
2977 rawName = s + enc->minBytesPerChar*2;
2978 len = XmlNameLength(enc, rawName);
2979 if (len != tag->rawNameLength
2980 || memcmp(tag->rawName, rawName, len) != 0) {
2981 *eventPP = rawName;
2982 return XML_ERROR_TAG_MISMATCH;
2983 }
2984 --tagLevel;
2985 if (endElementHandler) {
2986 const XML_Char *localPart;
2987 const XML_Char *prefix;
2988 XML_Char *uri;
2989 localPart = tag->name.localPart;
2990 if (ns && localPart) {
2991 /* localPart and prefix may have been overwritten in
2992 tag->name.str, since this points to the binding->uri
2993 buffer which gets re-used; so we have to add them again
2994 */
2995 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2996 /* don't need to check for space - already done in storeAtts() */
2997 while (*localPart) *uri++ = *localPart++;
2998 prefix = (XML_Char *)tag->name.prefix;
2999 if (ns_triplets && prefix) {
3000 *uri++ = namespaceSeparator;
3001 while (*prefix) *uri++ = *prefix++;
3002 }
3003 *uri = XML_T('\0');
3004 }
3005 endElementHandler(handlerArg, tag->name.str);
3006 }
3007 else if (defaultHandler)
3008 reportDefault(parser, enc, s, next);
3009 while (tag->bindings) {
3010 BINDING *b = tag->bindings;
3011 if (endNamespaceDeclHandler)
3012 endNamespaceDeclHandler(handlerArg, b->prefix->name);
3013 tag->bindings = tag->bindings->nextTagBinding;
3014 b->nextTagBinding = freeBindingList;
3015 freeBindingList = b;
3016 b->prefix->binding = b->prevPrefixBinding;
3017 }
3018 if (tagLevel == 0)
3019 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003020 }
3021 break;
3022 case XML_TOK_CHAR_REF:
3023 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003024 int n = XmlCharRefNumber(enc, s);
3025 if (n < 0)
3026 return XML_ERROR_BAD_CHAR_REF;
3027 if (characterDataHandler) {
3028 XML_Char buf[XML_ENCODE_MAX];
3029 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
3030 }
3031 else if (defaultHandler)
3032 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003033 }
3034 break;
3035 case XML_TOK_XML_DECL:
3036 return XML_ERROR_MISPLACED_XML_PI;
3037 case XML_TOK_DATA_NEWLINE:
3038 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003039 XML_Char c = 0xA;
3040 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003041 }
3042 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003043 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003044 break;
3045 case XML_TOK_CDATA_SECT_OPEN:
3046 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003047 enum XML_Error result;
3048 if (startCdataSectionHandler)
3049 startCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003050#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003051 /* Suppose you doing a transformation on a document that involves
3052 changing only the character data. You set up a defaultHandler
3053 and a characterDataHandler. The defaultHandler simply copies
3054 characters through. The characterDataHandler does the
3055 transformation and writes the characters out escaping them as
3056 necessary. This case will fail to work if we leave out the
3057 following two lines (because & and < inside CDATA sections will
3058 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003059
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003060 However, now we have a start/endCdataSectionHandler, so it seems
3061 easier to let the user deal with this.
3062 */
3063 else if (characterDataHandler)
3064 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003065#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003066 else if (defaultHandler)
3067 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00003068 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3069 if (result != XML_ERROR_NONE)
3070 return result;
3071 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003072 processor = cdataSectionProcessor;
3073 return result;
3074 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003075 }
3076 break;
3077 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00003078 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003079 *nextPtr = s;
3080 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003081 }
3082 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003083 if (MUST_CONVERT(enc, s)) {
3084 ICHAR *dataPtr = (ICHAR *)dataBuf;
3085 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
3086 characterDataHandler(handlerArg, dataBuf,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003087 (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003088 }
3089 else
3090 characterDataHandler(handlerArg,
3091 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003092 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003093 }
3094 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003095 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003096 /* We are at the end of the final buffer, should we check for
3097 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00003098 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003099 if (startTagLevel == 0) {
3100 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003101 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003102 }
3103 if (tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003104 *eventPP = end;
3105 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003106 }
Fred Drake31d485c2004-08-03 07:06:22 +00003107 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003108 return XML_ERROR_NONE;
3109 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003110 {
3111 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3112 if (charDataHandler) {
3113 if (MUST_CONVERT(enc, s)) {
3114 for (;;) {
3115 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02003116 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003117 *eventEndPP = s;
3118 charDataHandler(handlerArg, dataBuf,
3119 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003120 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003121 break;
3122 *eventPP = s;
3123 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003124 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003125 else
3126 charDataHandler(handlerArg,
3127 (XML_Char *)s,
3128 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003129 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003130 else if (defaultHandler)
3131 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003132 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003133 break;
3134 case XML_TOK_PI:
3135 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003136 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003137 break;
3138 case XML_TOK_COMMENT:
3139 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003140 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003141 break;
3142 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003143 /* All of the tokens produced by XmlContentTok() have their own
3144 * explicit cases, so this default is not strictly necessary.
3145 * However it is a useful safety net, so we retain the code and
3146 * simply exclude it from the coverage tests.
3147 *
3148 * LCOV_EXCL_START
3149 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003150 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003151 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003152 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003153 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003154 }
3155 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003156 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003157 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003158 *nextPtr = next;
3159 return XML_ERROR_NONE;
3160 case XML_FINISHED:
3161 return XML_ERROR_ABORTED;
3162 default: ;
3163 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003164 }
3165 /* not reached */
3166}
3167
Victor Stinner5ff71322017-06-21 14:39:22 +02003168/* This function does not call free() on the allocated memory, merely
3169 * moving it to the parser's freeBindingList where it can be freed or
3170 * reused as appropriate.
3171 */
3172static void
3173freeBindings(XML_Parser parser, BINDING *bindings)
3174{
3175 while (bindings) {
3176 BINDING *b = bindings;
3177
3178 /* startNamespaceDeclHandler will have been called for this
3179 * binding in addBindings(), so call the end handler now.
3180 */
3181 if (endNamespaceDeclHandler)
3182 endNamespaceDeclHandler(handlerArg, b->prefix->name);
3183
3184 bindings = bindings->nextTagBinding;
3185 b->nextTagBinding = freeBindingList;
3186 freeBindingList = b;
3187 b->prefix->binding = b->prevPrefixBinding;
3188 }
3189}
3190
Fred Drake4faea012003-01-28 06:42:40 +00003191/* Precondition: all arguments must be non-NULL;
3192 Purpose:
3193 - normalize attributes
3194 - check attributes for well-formedness
3195 - generate namespace aware attribute names (URI, prefix)
3196 - build list of attributes for startElementHandler
3197 - default attributes
3198 - process namespace declarations (check and report them)
3199 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003200*/
3201static enum XML_Error
3202storeAtts(XML_Parser parser, const ENCODING *enc,
3203 const char *attStr, TAG_NAME *tagNamePtr,
3204 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003205{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003206 DTD * const dtd = _dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003207 ELEMENT_TYPE *elementType;
3208 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003209 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003210 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003211 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003212 int i;
3213 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003214 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003215 int nPrefixes = 0;
3216 BINDING *binding;
3217 const XML_Char *localPart;
3218
3219 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003220 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00003221 if (!elementType) {
3222 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3223 if (!name)
3224 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003225 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003226 sizeof(ELEMENT_TYPE));
3227 if (!elementType)
3228 return XML_ERROR_NO_MEMORY;
3229 if (ns && !setElementTypePrefix(parser, elementType))
3230 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003231 }
Fred Drake4faea012003-01-28 06:42:40 +00003232 nDefaultAtts = elementType->nDefaultAtts;
3233
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003234 /* get the attributes from the tokenizer */
3235 n = XmlGetAttributes(enc, attStr, attsSize, atts);
3236 if (n + nDefaultAtts > attsSize) {
3237 int oldAttsSize = attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003238 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003239#ifdef XML_ATTR_INFO
3240 XML_AttrInfo *temp2;
3241#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003242 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003243 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003244 if (temp == NULL) {
3245 attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003246 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003247 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003248 atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003249#ifdef XML_ATTR_INFO
3250 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003251 if (temp2 == NULL) {
3252 attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003253 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003254 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003255 attInfo = temp2;
3256#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003257 if (n > oldAttsSize)
3258 XmlGetAttributes(enc, attStr, n, atts);
3259 }
Fred Drake4faea012003-01-28 06:42:40 +00003260
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003261 appAtts = (const XML_Char **)atts;
3262 for (i = 0; i < n; i++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003263 ATTRIBUTE *currAtt = &atts[i];
3264#ifdef XML_ATTR_INFO
3265 XML_AttrInfo *currAttInfo = &attInfo[i];
3266#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003267 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003268 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3269 currAtt->name
3270 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003271 if (!attId)
3272 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003273#ifdef XML_ATTR_INFO
3274 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
3275 currAttInfo->nameEnd = currAttInfo->nameStart +
3276 XmlNameLength(enc, currAtt->name);
3277 currAttInfo->valueStart = parseEndByteIndex -
3278 (parseEndPtr - currAtt->valuePtr);
3279 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
3280#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003281 /* Detect duplicate attributes by their QNames. This does not work when
3282 namespace processing is turned on and different prefixes for the same
3283 namespace are used. For this case we have a check further down.
3284 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003285 if ((attId->name)[-1]) {
3286 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003287 eventPtr = atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003288 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3289 }
3290 (attId->name)[-1] = 1;
3291 appAtts[attIndex++] = attId->name;
3292 if (!atts[i].normalized) {
3293 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003294 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003295
3296 /* figure out whether declared as other than CDATA */
3297 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003298 int j;
3299 for (j = 0; j < nDefaultAtts; j++) {
3300 if (attId == elementType->defaultAtts[j].id) {
3301 isCdata = elementType->defaultAtts[j].isCdata;
3302 break;
3303 }
3304 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003305 }
3306
3307 /* normalize the attribute value */
3308 result = storeAttributeValue(parser, enc, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003309 atts[i].valuePtr, atts[i].valueEnd,
3310 &tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003311 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003312 return result;
Fred Drake4faea012003-01-28 06:42:40 +00003313 appAtts[attIndex] = poolStart(&tempPool);
3314 poolFinish(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003315 }
Fred Drake4faea012003-01-28 06:42:40 +00003316 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003317 /* the value did not need normalizing */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003318 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
3319 atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003320 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003321 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003322 poolFinish(&tempPool);
3323 }
3324 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003325 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003326 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003327 /* deal with namespace declarations here */
3328 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3329 appAtts[attIndex], bindingsPtr);
3330 if (result)
3331 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003332 --attIndex;
3333 }
3334 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003335 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003336 attIndex++;
3337 nPrefixes++;
3338 (attId->name)[-1] = 2;
3339 }
3340 }
3341 else
3342 attIndex++;
3343 }
Fred Drake4faea012003-01-28 06:42:40 +00003344
3345 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3346 nSpecifiedAtts = attIndex;
3347 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3348 for (i = 0; i < attIndex; i += 2)
3349 if (appAtts[i] == elementType->idAtt->name) {
3350 idAttIndex = i;
3351 break;
3352 }
3353 }
3354 else
3355 idAttIndex = -1;
3356
3357 /* do attribute defaulting */
3358 for (i = 0; i < nDefaultAtts; i++) {
3359 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3360 if (!(da->id->name)[-1] && da->value) {
3361 if (da->id->prefix) {
3362 if (da->id->xmlns) {
3363 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3364 da->value, bindingsPtr);
3365 if (result)
3366 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003367 }
3368 else {
Fred Drake4faea012003-01-28 06:42:40 +00003369 (da->id->name)[-1] = 2;
3370 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003371 appAtts[attIndex++] = da->id->name;
3372 appAtts[attIndex++] = da->value;
3373 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003374 }
Fred Drake4faea012003-01-28 06:42:40 +00003375 else {
3376 (da->id->name)[-1] = 1;
3377 appAtts[attIndex++] = da->id->name;
3378 appAtts[attIndex++] = da->value;
3379 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003380 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003381 }
Fred Drake4faea012003-01-28 06:42:40 +00003382 appAtts[attIndex] = 0;
3383
Fred Drake08317ae2003-10-21 15:38:55 +00003384 /* expand prefixed attribute names, check for duplicates,
3385 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003386 i = 0;
3387 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00003388 int j; /* hash table index */
3389 unsigned long version = nsAttsVersion;
3390 int nsAttsSize = (int)1 << nsAttsPower;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003391 unsigned char oldNsAttsPower = nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003392 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3393 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
3394 NS_ATT *temp;
3395 /* hash table size must also be a power of 2 and >= 8 */
3396 while (nPrefixes >> nsAttsPower++);
3397 if (nsAttsPower < 3)
3398 nsAttsPower = 3;
3399 nsAttsSize = (int)1 << nsAttsPower;
3400 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003401 if (!temp) {
3402 /* Restore actual size of memory in nsAtts */
3403 nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003404 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003405 }
Fred Drake08317ae2003-10-21 15:38:55 +00003406 nsAtts = temp;
3407 version = 0; /* force re-initialization of nsAtts hash table */
3408 }
3409 /* using a version flag saves us from initializing nsAtts every time */
3410 if (!version) { /* initialize version flags when version wraps around */
3411 version = INIT_ATTS_VERSION;
3412 for (j = nsAttsSize; j != 0; )
3413 nsAtts[--j].version = version;
3414 }
3415 nsAttsVersion = --version;
3416
3417 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003418 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003419 const XML_Char *s = appAtts[i];
3420 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003421 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003422 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003423 unsigned long uriHash;
3424 struct siphash sip_state;
3425 struct sipkey sip_key;
3426
3427 copy_salt_to_sipkey(parser, &sip_key);
3428 sip24_init(&sip_state, &sip_key);
3429
Fred Drake08317ae2003-10-21 15:38:55 +00003430 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003431 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003432 if (!id || !id->prefix) {
3433 /* This code is walking through the appAtts array, dealing
3434 * with (in this case) a prefixed attribute name. To be in
3435 * the array, the attribute must have already been bound, so
3436 * has to have passed through the hash table lookup once
3437 * already. That implies that an entry for it already
3438 * exists, so the lookup above will return a pointer to
3439 * already allocated memory. There is no opportunaity for
3440 * the allocator to fail, so the condition above cannot be
3441 * fulfilled.
3442 *
3443 * Since it is difficult to be certain that the above
3444 * analysis is complete, we retain the test and merely
3445 * remove the code from coverage tests.
3446 */
3447 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3448 }
Fred Drake08317ae2003-10-21 15:38:55 +00003449 b = id->prefix->binding;
3450 if (!b)
3451 return XML_ERROR_UNBOUND_PREFIX;
3452
Fred Drake08317ae2003-10-21 15:38:55 +00003453 for (j = 0; j < b->uriLen; j++) {
3454 const XML_Char c = b->uri[j];
3455 if (!poolAppendChar(&tempPool, c))
3456 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003457 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003458
3459 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3460
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003461 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003462 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003463
3464 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3465
Fred Drake08317ae2003-10-21 15:38:55 +00003466 do { /* copies null terminator */
Fred Drake08317ae2003-10-21 15:38:55 +00003467 if (!poolAppendChar(&tempPool, *s))
3468 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003469 } while (*s++);
3470
Victor Stinner5ff71322017-06-21 14:39:22 +02003471 uriHash = (unsigned long)sip24_final(&sip_state);
3472
Fred Drake08317ae2003-10-21 15:38:55 +00003473 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003474 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003475 */
3476 unsigned char step = 0;
3477 unsigned long mask = nsAttsSize - 1;
3478 j = uriHash & mask; /* index into hash table */
3479 while (nsAtts[j].version == version) {
3480 /* for speed we compare stored hash values first */
3481 if (uriHash == nsAtts[j].hash) {
3482 const XML_Char *s1 = poolStart(&tempPool);
3483 const XML_Char *s2 = nsAtts[j].uriName;
3484 /* s1 is null terminated, but not s2 */
3485 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3486 if (*s1 == 0)
3487 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3488 }
3489 if (!step)
3490 step = PROBE_STEP(uriHash, mask, nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003491 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003492 }
Fred Drake08317ae2003-10-21 15:38:55 +00003493 }
3494
3495 if (ns_triplets) { /* append namespace separator and prefix */
3496 tempPool.ptr[-1] = namespaceSeparator;
3497 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003498 do {
3499 if (!poolAppendChar(&tempPool, *s))
3500 return XML_ERROR_NO_MEMORY;
3501 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003502 }
Fred Drake08317ae2003-10-21 15:38:55 +00003503
3504 /* store expanded name in attribute list */
3505 s = poolStart(&tempPool);
3506 poolFinish(&tempPool);
3507 appAtts[i] = s;
3508
3509 /* fill empty slot with new version, uriName and hash value */
3510 nsAtts[j].version = version;
3511 nsAtts[j].hash = uriHash;
3512 nsAtts[j].uriName = s;
3513
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003514 if (!--nPrefixes) {
3515 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003516 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003517 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003518 }
Fred Drake08317ae2003-10-21 15:38:55 +00003519 else /* not prefixed */
3520 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003521 }
3522 }
Fred Drake08317ae2003-10-21 15:38:55 +00003523 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003524 for (; i < attIndex; i += 2)
3525 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003526 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3527 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003528
Fred Drake08317ae2003-10-21 15:38:55 +00003529 if (!ns)
3530 return XML_ERROR_NONE;
3531
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003532 /* expand the element type name */
3533 if (elementType->prefix) {
3534 binding = elementType->prefix->binding;
3535 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003536 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003537 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003538 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003539 ;
3540 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003541 else if (dtd->defaultPrefix.binding) {
3542 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003543 localPart = tagNamePtr->str;
3544 }
3545 else
3546 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003547 prefixLen = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00003548 if (ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003549 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003550 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003551 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003552 tagNamePtr->localPart = localPart;
3553 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003554 tagNamePtr->prefix = binding->prefix->name;
3555 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003556 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003557 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003558 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003559 if (n > binding->uriAlloc) {
3560 TAG *p;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003561 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003562 if (!uri)
3563 return XML_ERROR_NO_MEMORY;
3564 binding->uriAlloc = n + EXPAND_SPARE;
3565 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3566 for (p = tagStack; p; p = p->parent)
3567 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003568 p->name.str = uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003569 FREE(binding->uri);
3570 binding->uri = uri;
3571 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003572 /* if namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003573 uri = binding->uri + binding->uriLen;
3574 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003575 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003576 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003577 uri += i - 1;
3578 *uri = namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003579 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3580 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003581 tagNamePtr->str = binding->uri;
3582 return XML_ERROR_NONE;
3583}
3584
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003585/* addBinding() overwrites the value of prefix->binding without checking.
3586 Therefore one must keep track of the old value outside of addBinding().
3587*/
3588static enum XML_Error
3589addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3590 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003591{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003592 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003593 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3594 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3595 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3596 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3597 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3598 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003599 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003600 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003601 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3602 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003603 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3604 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3605 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3606 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3607 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003608 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003609 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003610 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3611
3612 XML_Bool mustBeXML = XML_FALSE;
3613 XML_Bool isXML = XML_TRUE;
3614 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003615
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003616 BINDING *b;
3617 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003618
Fred Drake31d485c2004-08-03 07:06:22 +00003619 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003620 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003621 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003622
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003623 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003624 && prefix->name[0] == XML_T(ASCII_x)
3625 && prefix->name[1] == XML_T(ASCII_m)
3626 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003627
3628 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003629 if (prefix->name[3] == XML_T(ASCII_n)
3630 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003631 && prefix->name[5] == XML_T('\0'))
3632 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3633
3634 if (prefix->name[3] == XML_T('\0'))
3635 mustBeXML = XML_TRUE;
3636 }
3637
3638 for (len = 0; uri[len]; len++) {
3639 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3640 isXML = XML_FALSE;
3641
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003642 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003643 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3644 isXMLNS = XML_FALSE;
3645 }
3646 isXML = isXML && len == xmlLen;
3647 isXMLNS = isXMLNS && len == xmlnsLen;
3648
3649 if (mustBeXML != isXML)
3650 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3651 : XML_ERROR_RESERVED_NAMESPACE_URI;
3652
3653 if (isXMLNS)
3654 return XML_ERROR_RESERVED_NAMESPACE_URI;
3655
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003656 if (namespaceSeparator)
3657 len++;
3658 if (freeBindingList) {
3659 b = freeBindingList;
3660 if (len > b->uriAlloc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003661 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3662 sizeof(XML_Char) * (len + EXPAND_SPARE));
3663 if (temp == NULL)
3664 return XML_ERROR_NO_MEMORY;
3665 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003666 b->uriAlloc = len + EXPAND_SPARE;
3667 }
3668 freeBindingList = b->nextTagBinding;
3669 }
3670 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003671 b = (BINDING *)MALLOC(sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003672 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003673 return XML_ERROR_NO_MEMORY;
3674 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003675 if (!b->uri) {
3676 FREE(b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003677 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003678 }
3679 b->uriAlloc = len + EXPAND_SPARE;
3680 }
3681 b->uriLen = len;
3682 memcpy(b->uri, uri, len * sizeof(XML_Char));
3683 if (namespaceSeparator)
3684 b->uri[len - 1] = namespaceSeparator;
3685 b->prefix = prefix;
3686 b->attId = attId;
3687 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003688 /* NULL binding when default namespace undeclared */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003689 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3690 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003691 else
3692 prefix->binding = b;
3693 b->nextTagBinding = *bindingsPtr;
3694 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003695 /* if attId == NULL then we are not starting a namespace scope */
3696 if (attId && startNamespaceDeclHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003697 startNamespaceDeclHandler(handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003698 prefix->binding ? uri : 0);
3699 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003700}
3701
3702/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003703 the whole file is parsed with one call.
3704*/
3705static enum XML_Error PTRCALL
3706cdataSectionProcessor(XML_Parser parser,
3707 const char *start,
3708 const char *end,
3709 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003710{
Fred Drake31d485c2004-08-03 07:06:22 +00003711 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003712 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003713 if (result != XML_ERROR_NONE)
3714 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003715 if (start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003716 if (parentParser) { /* we are parsing an external entity */
3717 processor = externalEntityContentProcessor;
3718 return externalEntityContentProcessor(parser, start, end, endPtr);
3719 }
3720 else {
3721 processor = contentProcessor;
3722 return contentProcessor(parser, start, end, endPtr);
3723 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003724 }
3725 return result;
3726}
3727
Fred Drake31d485c2004-08-03 07:06:22 +00003728/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003729 the section is not yet closed.
3730*/
3731static enum XML_Error
3732doCdataSection(XML_Parser parser,
3733 const ENCODING *enc,
3734 const char **startPtr,
3735 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003736 const char **nextPtr,
3737 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003738{
3739 const char *s = *startPtr;
3740 const char **eventPP;
3741 const char **eventEndPP;
3742 if (enc == encoding) {
3743 eventPP = &eventPtr;
3744 *eventPP = s;
3745 eventEndPP = &eventEndPtr;
3746 }
3747 else {
3748 eventPP = &(openInternalEntities->internalEventPtr);
3749 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3750 }
3751 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003752 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003753
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003754 for (;;) {
3755 const char *next;
3756 int tok = XmlCdataSectionTok(enc, s, end, &next);
3757 *eventEndPP = next;
3758 switch (tok) {
3759 case XML_TOK_CDATA_SECT_CLOSE:
3760 if (endCdataSectionHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003761 endCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003762#if 0
3763 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3764 else if (characterDataHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003765 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003766#endif
3767 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003768 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003769 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003770 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003771 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003772 return XML_ERROR_ABORTED;
3773 else
3774 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003775 case XML_TOK_DATA_NEWLINE:
3776 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003777 XML_Char c = 0xA;
3778 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003779 }
3780 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003781 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003782 break;
3783 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003784 {
3785 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3786 if (charDataHandler) {
3787 if (MUST_CONVERT(enc, s)) {
3788 for (;;) {
3789 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02003790 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003791 *eventEndPP = next;
3792 charDataHandler(handlerArg, dataBuf,
3793 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003794 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003795 break;
3796 *eventPP = s;
3797 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003798 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003799 else
3800 charDataHandler(handlerArg,
3801 (XML_Char *)s,
3802 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003803 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003804 else if (defaultHandler)
3805 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003806 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003807 break;
3808 case XML_TOK_INVALID:
3809 *eventPP = next;
3810 return XML_ERROR_INVALID_TOKEN;
3811 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003812 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003813 *nextPtr = s;
3814 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003815 }
3816 return XML_ERROR_PARTIAL_CHAR;
3817 case XML_TOK_PARTIAL:
3818 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003819 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003820 *nextPtr = s;
3821 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003822 }
3823 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3824 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003825 /* Every token returned by XmlCdataSectionTok() has its own
3826 * explicit case, so this default case will never be executed.
3827 * We retain it as a safety net and exclude it from the coverage
3828 * statistics.
3829 *
3830 * LCOV_EXCL_START
3831 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003832 *eventPP = next;
3833 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003834 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003835 }
Fred Drake31d485c2004-08-03 07:06:22 +00003836
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003837 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003838 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003839 case XML_SUSPENDED:
3840 *nextPtr = next;
3841 return XML_ERROR_NONE;
3842 case XML_FINISHED:
3843 return XML_ERROR_ABORTED;
3844 default: ;
3845 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003846 }
3847 /* not reached */
3848}
3849
3850#ifdef XML_DTD
3851
3852/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003853 the whole file is parsed with one call.
3854*/
3855static enum XML_Error PTRCALL
3856ignoreSectionProcessor(XML_Parser parser,
3857 const char *start,
3858 const char *end,
3859 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003860{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003861 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003862 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003863 if (result != XML_ERROR_NONE)
3864 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003865 if (start) {
3866 processor = prologProcessor;
3867 return prologProcessor(parser, start, end, endPtr);
3868 }
3869 return result;
3870}
3871
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003872/* startPtr gets set to non-null is the section is closed, and to null
3873 if the section is not yet closed.
3874*/
3875static enum XML_Error
3876doIgnoreSection(XML_Parser parser,
3877 const ENCODING *enc,
3878 const char **startPtr,
3879 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003880 const char **nextPtr,
3881 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003882{
3883 const char *next;
3884 int tok;
3885 const char *s = *startPtr;
3886 const char **eventPP;
3887 const char **eventEndPP;
3888 if (enc == encoding) {
3889 eventPP = &eventPtr;
3890 *eventPP = s;
3891 eventEndPP = &eventEndPtr;
3892 }
3893 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003894 /* It's not entirely clear, but it seems the following two lines
3895 * of code cannot be executed. The only occasions on which 'enc'
3896 * is not 'parser->m_encoding' are when this function is called
3897 * from the internal entity processing, and IGNORE sections are an
3898 * error in internal entities.
3899 *
3900 * Since it really isn't clear that this is true, we keep the code
3901 * and just remove it from our coverage tests.
3902 *
3903 * LCOV_EXCL_START
3904 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003905 eventPP = &(openInternalEntities->internalEventPtr);
3906 eventEndPP = &(openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003907 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003908 }
3909 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003910 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003911 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3912 *eventEndPP = next;
3913 switch (tok) {
3914 case XML_TOK_IGNORE_SECT:
3915 if (defaultHandler)
3916 reportDefault(parser, enc, s, next);
3917 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003918 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003919 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003920 return XML_ERROR_ABORTED;
3921 else
3922 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003923 case XML_TOK_INVALID:
3924 *eventPP = next;
3925 return XML_ERROR_INVALID_TOKEN;
3926 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003927 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003928 *nextPtr = s;
3929 return XML_ERROR_NONE;
3930 }
3931 return XML_ERROR_PARTIAL_CHAR;
3932 case XML_TOK_PARTIAL:
3933 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003934 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003935 *nextPtr = s;
3936 return XML_ERROR_NONE;
3937 }
3938 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3939 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003940 /* All of the tokens that XmlIgnoreSectionTok() returns have
3941 * explicit cases to handle them, so this default case is never
3942 * executed. We keep it as a safety net anyway, and remove it
3943 * from our test coverage statistics.
3944 *
3945 * LCOV_EXCL_START
3946 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003947 *eventPP = next;
3948 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003949 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003950 }
3951 /* not reached */
3952}
3953
3954#endif /* XML_DTD */
3955
3956static enum XML_Error
3957initializeEncoding(XML_Parser parser)
3958{
3959 const char *s;
3960#ifdef XML_UNICODE
3961 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003962 /* See comments abount `protoclEncodingName` in parserInit() */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003963 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003964 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003965 else {
3966 int i;
3967 for (i = 0; protocolEncodingName[i]; i++) {
3968 if (i == sizeof(encodingBuf) - 1
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003969 || (protocolEncodingName[i] & ~0x7f) != 0) {
3970 encodingBuf[0] = '\0';
3971 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003972 }
3973 encodingBuf[i] = (char)protocolEncodingName[i];
3974 }
3975 encodingBuf[i] = '\0';
3976 s = encodingBuf;
3977 }
3978#else
3979 s = protocolEncodingName;
3980#endif
3981 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3982 return XML_ERROR_NONE;
3983 return handleUnknownEncoding(parser, protocolEncodingName);
3984}
3985
3986static enum XML_Error
3987processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003988 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003989{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003990 const char *encodingName = NULL;
3991 const XML_Char *storedEncName = NULL;
3992 const ENCODING *newEncoding = NULL;
3993 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003994 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003995 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003996 int standalone = -1;
3997 if (!(ns
3998 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003999 : XmlParseXmlDecl)(isGeneralTextEntity,
4000 encoding,
4001 s,
4002 next,
4003 &eventPtr,
4004 &version,
4005 &versionend,
4006 &encodingName,
4007 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00004008 &standalone)) {
4009 if (isGeneralTextEntity)
4010 return XML_ERROR_TEXT_DECL;
4011 else
4012 return XML_ERROR_XML_DECL;
4013 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004014 if (!isGeneralTextEntity && standalone == 1) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004015 _dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004016#ifdef XML_DTD
4017 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4018 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4019#endif /* XML_DTD */
4020 }
4021 if (xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004022 if (encodingName != NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004023 storedEncName = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004024 encoding,
4025 encodingName,
4026 encodingName
4027 + XmlNameLength(encoding, encodingName));
4028 if (!storedEncName)
4029 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004030 poolFinish(&temp2Pool);
4031 }
4032 if (version) {
4033 storedversion = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004034 encoding,
4035 version,
4036 versionend - encoding->minBytesPerChar);
4037 if (!storedversion)
4038 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004039 }
4040 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
4041 }
4042 else if (defaultHandler)
4043 reportDefault(parser, encoding, s, next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004044 if (protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004045 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004046 /* Check that the specified encoding does not conflict with what
4047 * the parser has already deduced. Do we have the same number
4048 * of bytes in the smallest representation of a character? If
4049 * this is UTF-16, is it the same endianness?
4050 */
4051 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar
4052 || (newEncoding->minBytesPerChar == 2 &&
4053 newEncoding != encoding)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004054 eventPtr = encodingName;
4055 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004056 }
4057 encoding = newEncoding;
4058 }
4059 else if (encodingName) {
4060 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004061 if (!storedEncName) {
4062 storedEncName = poolStoreString(
4063 &temp2Pool, encoding, encodingName,
4064 encodingName + XmlNameLength(encoding, encodingName));
4065 if (!storedEncName)
4066 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004067 }
4068 result = handleUnknownEncoding(parser, storedEncName);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004069 poolClear(&temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004070 if (result == XML_ERROR_UNKNOWN_ENCODING)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004071 eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004072 return result;
4073 }
4074 }
4075
4076 if (storedEncName || storedversion)
4077 poolClear(&temp2Pool);
4078
4079 return XML_ERROR_NONE;
4080}
4081
4082static enum XML_Error
4083handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4084{
4085 if (unknownEncodingHandler) {
4086 XML_Encoding info;
4087 int i;
4088 for (i = 0; i < 256; i++)
4089 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004090 info.convert = NULL;
4091 info.data = NULL;
4092 info.release = NULL;
4093 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
4094 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004095 ENCODING *enc;
4096 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
4097 if (!unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004098 if (info.release)
4099 info.release(info.data);
4100 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004101 }
4102 enc = (ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004103 ? XmlInitUnknownEncodingNS
4104 : XmlInitUnknownEncoding)(unknownEncodingMem,
4105 info.map,
4106 info.convert,
4107 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004108 if (enc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004109 unknownEncodingData = info.data;
4110 unknownEncodingRelease = info.release;
4111 encoding = enc;
4112 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004113 }
4114 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004115 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004116 info.release(info.data);
4117 }
4118 return XML_ERROR_UNKNOWN_ENCODING;
4119}
4120
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004121static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004122prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004123 const char *s,
4124 const char *end,
4125 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004126{
4127 enum XML_Error result = initializeEncoding(parser);
4128 if (result != XML_ERROR_NONE)
4129 return result;
4130 processor = prologProcessor;
4131 return prologProcessor(parser, s, end, nextPtr);
4132}
4133
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004134#ifdef XML_DTD
4135
4136static enum XML_Error PTRCALL
4137externalParEntInitProcessor(XML_Parser parser,
4138 const char *s,
4139 const char *end,
4140 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004141{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004142 enum XML_Error result = initializeEncoding(parser);
4143 if (result != XML_ERROR_NONE)
4144 return result;
4145
4146 /* we know now that XML_Parse(Buffer) has been called,
4147 so we consider the external parameter entity read */
4148 _dtd->paramEntityRead = XML_TRUE;
4149
4150 if (prologState.inEntityValue) {
4151 processor = entityValueInitProcessor;
4152 return entityValueInitProcessor(parser, s, end, nextPtr);
4153 }
4154 else {
4155 processor = externalParEntProcessor;
4156 return externalParEntProcessor(parser, s, end, nextPtr);
4157 }
4158}
4159
4160static enum XML_Error PTRCALL
4161entityValueInitProcessor(XML_Parser parser,
4162 const char *s,
4163 const char *end,
4164 const char **nextPtr)
4165{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004166 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00004167 const char *start = s;
4168 const char *next = start;
4169 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004170
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004171 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004172 tok = XmlPrologTok(encoding, start, end, &next);
Fred Drake31d485c2004-08-03 07:06:22 +00004173 eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004174 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004175 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00004176 *nextPtr = s;
4177 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004178 }
4179 switch (tok) {
4180 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004181 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004182 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004183 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004184 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004185 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004186 case XML_TOK_NONE: /* start == end */
4187 default:
4188 break;
4189 }
Fred Drake31d485c2004-08-03 07:06:22 +00004190 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004191 return storeEntityValue(parser, encoding, s, end);
4192 }
4193 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00004194 enum XML_Error result;
4195 result = processXmlDecl(parser, 0, start, next);
4196 if (result != XML_ERROR_NONE)
4197 return result;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004198 /* At this point, ps_parsing cannot be XML_SUSPENDED. For that
4199 * to happen, a parameter entity parsing handler must have
4200 * attempted to suspend the parser, which fails and raises an
4201 * error. The parser can be aborted, but can't be suspended.
4202 */
4203 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004204 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004205 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004206 /* stop scanning for text declaration - we found one */
4207 processor = entityValueProcessor;
4208 return entityValueProcessor(parser, next, end, nextPtr);
4209 }
4210 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4211 return XML_TOK_NONE on the next call, which would then cause the
4212 function to exit with *nextPtr set to s - that is what we want for other
4213 tokens, but not for the BOM - we would rather like to skip it;
4214 then, when this routine is entered the next time, XmlPrologTok will
4215 return XML_TOK_INVALID, since the BOM is still in the buffer
4216 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004217 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004218 *nextPtr = next;
4219 return XML_ERROR_NONE;
4220 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004221 /* If we get this token, we have the start of what might be a
4222 normal tag, but not a declaration (i.e. it doesn't begin with
4223 "<!"). In a DTD context, that isn't legal.
4224 */
4225 else if (tok == XML_TOK_INSTANCE_START) {
4226 *nextPtr = next;
4227 return XML_ERROR_SYNTAX;
4228 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004229 start = next;
Fred Drake31d485c2004-08-03 07:06:22 +00004230 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004231 }
4232}
4233
4234static enum XML_Error PTRCALL
4235externalParEntProcessor(XML_Parser parser,
4236 const char *s,
4237 const char *end,
4238 const char **nextPtr)
4239{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004240 const char *next = s;
4241 int tok;
4242
Fred Drake31d485c2004-08-03 07:06:22 +00004243 tok = XmlPrologTok(encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004244 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004245 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004246 *nextPtr = s;
4247 return XML_ERROR_NONE;
4248 }
4249 switch (tok) {
4250 case XML_TOK_INVALID:
4251 return XML_ERROR_INVALID_TOKEN;
4252 case XML_TOK_PARTIAL:
4253 return XML_ERROR_UNCLOSED_TOKEN;
4254 case XML_TOK_PARTIAL_CHAR:
4255 return XML_ERROR_PARTIAL_CHAR;
4256 case XML_TOK_NONE: /* start == end */
4257 default:
4258 break;
4259 }
4260 }
4261 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4262 However, when parsing an external subset, doProlog will not accept a BOM
4263 as valid, and report a syntax error, so we have to skip the BOM
4264 */
4265 else if (tok == XML_TOK_BOM) {
4266 s = next;
4267 tok = XmlPrologTok(encoding, s, end, &next);
4268 }
4269
4270 processor = prologProcessor;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004271 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004272 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004273}
4274
4275static enum XML_Error PTRCALL
4276entityValueProcessor(XML_Parser parser,
4277 const char *s,
4278 const char *end,
4279 const char **nextPtr)
4280{
4281 const char *start = s;
4282 const char *next = s;
4283 const ENCODING *enc = encoding;
4284 int tok;
4285
4286 for (;;) {
4287 tok = XmlPrologTok(enc, start, end, &next);
4288 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004289 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004290 *nextPtr = s;
4291 return XML_ERROR_NONE;
4292 }
4293 switch (tok) {
4294 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004295 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004296 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004297 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004298 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004299 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004300 case XML_TOK_NONE: /* start == end */
4301 default:
4302 break;
4303 }
Fred Drake31d485c2004-08-03 07:06:22 +00004304 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004305 return storeEntityValue(parser, enc, s, end);
4306 }
4307 start = next;
4308 }
4309}
4310
4311#endif /* XML_DTD */
4312
4313static enum XML_Error PTRCALL
4314prologProcessor(XML_Parser parser,
4315 const char *s,
4316 const char *end,
4317 const char **nextPtr)
4318{
4319 const char *next = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004320 int tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004321 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004322 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004323}
4324
4325static enum XML_Error
4326doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004327 const ENCODING *enc,
4328 const char *s,
4329 const char *end,
4330 int tok,
4331 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00004332 const char **nextPtr,
4333 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004334{
4335#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004336 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004337#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004338 static const XML_Char atypeCDATA[] =
4339 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4340 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4341 static const XML_Char atypeIDREF[] =
4342 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4343 static const XML_Char atypeIDREFS[] =
4344 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4345 static const XML_Char atypeENTITY[] =
4346 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4347 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4348 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004349 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004350 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4351 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4352 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4353 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4354 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4355 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4356 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004357
Fred Drake31d485c2004-08-03 07:06:22 +00004358 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004359 DTD * const dtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004360
4361 const char **eventPP;
4362 const char **eventEndPP;
4363 enum XML_Content_Quant quant;
4364
4365 if (enc == encoding) {
4366 eventPP = &eventPtr;
4367 eventEndPP = &eventEndPtr;
4368 }
4369 else {
4370 eventPP = &(openInternalEntities->internalEventPtr);
4371 eventEndPP = &(openInternalEntities->internalEventEndPtr);
4372 }
Fred Drake31d485c2004-08-03 07:06:22 +00004373
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004374 for (;;) {
4375 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004376 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004377 *eventPP = s;
4378 *eventEndPP = next;
4379 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004380 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004381 *nextPtr = s;
4382 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004383 }
4384 switch (tok) {
4385 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004386 *eventPP = next;
4387 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004388 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004389 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004390 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004391 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004392 case -XML_TOK_PROLOG_S:
4393 tok = -tok;
4394 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004395 case XML_TOK_NONE:
4396#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004397 /* for internal PE NOT referenced between declarations */
4398 if (enc != encoding && !openInternalEntities->betweenDecl) {
4399 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004400 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004401 }
4402 /* WFC: PE Between Declarations - must check that PE contains
4403 complete markup, not only for external PEs, but also for
4404 internal PEs if the reference occurs between declarations.
4405 */
4406 if (isParamEntity || enc != encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004407 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
4408 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004409 return XML_ERROR_INCOMPLETE_PE;
4410 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004411 return XML_ERROR_NONE;
4412 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004413#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004414 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004415 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004416 tok = -tok;
4417 next = end;
4418 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004419 }
4420 }
4421 role = XmlTokenRole(&prologState, tok, s, next, enc);
4422 switch (role) {
4423 case XML_ROLE_XML_DECL:
4424 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004425 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4426 if (result != XML_ERROR_NONE)
4427 return result;
4428 enc = encoding;
4429 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004430 }
4431 break;
4432 case XML_ROLE_DOCTYPE_NAME:
4433 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004434 doctypeName = poolStoreString(&tempPool, enc, s, next);
4435 if (!doctypeName)
4436 return XML_ERROR_NO_MEMORY;
4437 poolFinish(&tempPool);
4438 doctypePubid = NULL;
4439 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004440 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004441 doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004442 break;
4443 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4444 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004445 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
4446 doctypePubid, 1);
4447 doctypeName = NULL;
4448 poolClear(&tempPool);
4449 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004450 }
4451 break;
4452#ifdef XML_DTD
4453 case XML_ROLE_TEXT_DECL:
4454 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004455 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4456 if (result != XML_ERROR_NONE)
4457 return result;
4458 enc = encoding;
4459 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004460 }
4461 break;
4462#endif /* XML_DTD */
4463 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004464#ifdef XML_DTD
4465 useForeignDTD = XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004466 declEntity = (ENTITY *)lookup(parser,
4467 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004468 externalSubsetName,
4469 sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004470 if (!declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004471 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004472#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004473 dtd->hasParamEntityRefs = XML_TRUE;
4474 if (startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004475 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004476 if (!XmlIsPublicId(enc, s, next, eventPP))
4477 return XML_ERROR_PUBLICID;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004478 pubId = poolStoreString(&tempPool, enc,
4479 s + enc->minBytesPerChar,
4480 next - enc->minBytesPerChar);
4481 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004482 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004483 normalizePublicId(pubId);
Fred Drake31d485c2004-08-03 07:06:22 +00004484 poolFinish(&tempPool);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004485 doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004486 handleDefault = XML_FALSE;
4487 goto alreadyChecked;
4488 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004489 /* fall through */
4490 case XML_ROLE_ENTITY_PUBLIC_ID:
4491 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004492 return XML_ERROR_PUBLICID;
4493 alreadyChecked:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004494 if (dtd->keepProcessing && declEntity) {
4495 XML_Char *tem = poolStoreString(&dtd->pool,
4496 enc,
4497 s + enc->minBytesPerChar,
4498 next - enc->minBytesPerChar);
4499 if (!tem)
4500 return XML_ERROR_NO_MEMORY;
4501 normalizePublicId(tem);
4502 declEntity->publicId = tem;
4503 poolFinish(&dtd->pool);
4504 if (entityDeclHandler)
4505 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004506 }
4507 break;
4508 case XML_ROLE_DOCTYPE_CLOSE:
4509 if (doctypeName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004510 startDoctypeDeclHandler(handlerArg, doctypeName,
4511 doctypeSysid, doctypePubid, 0);
4512 poolClear(&tempPool);
4513 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004514 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004515 /* doctypeSysid will be non-NULL in the case of a previous
4516 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
4517 was not set, indicating an external subset
4518 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004519#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004520 if (doctypeSysid || useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004521 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4522 dtd->hasParamEntityRefs = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004523 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004524 ENTITY *entity = (ENTITY *)lookup(parser,
4525 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004526 externalSubsetName,
4527 sizeof(ENTITY));
Victor Stinner93d0cb52017-08-18 23:43:54 +02004528 if (!entity) {
4529 /* The external subset name "#" will have already been
4530 * inserted into the hash table at the start of the
4531 * external entity parsing, so no allocation will happen
4532 * and lookup() cannot fail.
4533 */
4534 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4535 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004536 if (useForeignDTD)
4537 entity->base = curBase;
4538 dtd->paramEntityRead = XML_FALSE;
4539 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4540 0,
4541 entity->base,
4542 entity->systemId,
4543 entity->publicId))
4544 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004545 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004546 if (!dtd->standalone &&
4547 notStandaloneHandler &&
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004548 !notStandaloneHandler(handlerArg))
4549 return XML_ERROR_NOT_STANDALONE;
4550 }
4551 /* if we didn't read the foreign DTD then this means that there
4552 is no external subset and we must reset dtd->hasParamEntityRefs
4553 */
4554 else if (!doctypeSysid)
4555 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004556 /* end of DTD - no need to update dtd->keepProcessing */
4557 }
4558 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004559 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004560#endif /* XML_DTD */
4561 if (endDoctypeDeclHandler) {
4562 endDoctypeDeclHandler(handlerArg);
4563 handleDefault = XML_FALSE;
4564 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004565 break;
4566 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004567#ifdef XML_DTD
4568 /* if there is no DOCTYPE declaration then now is the
4569 last chance to read the foreign DTD
4570 */
4571 if (useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004572 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004573 dtd->hasParamEntityRefs = XML_TRUE;
4574 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004575 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004576 externalSubsetName,
4577 sizeof(ENTITY));
4578 if (!entity)
4579 return XML_ERROR_NO_MEMORY;
4580 entity->base = curBase;
4581 dtd->paramEntityRead = XML_FALSE;
4582 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4583 0,
4584 entity->base,
4585 entity->systemId,
4586 entity->publicId))
4587 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004588 if (dtd->paramEntityRead) {
4589 if (!dtd->standalone &&
4590 notStandaloneHandler &&
4591 !notStandaloneHandler(handlerArg))
4592 return XML_ERROR_NOT_STANDALONE;
4593 }
4594 /* if we didn't read the foreign DTD then this means that there
4595 is no external subset and we must reset dtd->hasParamEntityRefs
4596 */
4597 else
4598 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004599 /* end of DTD - no need to update dtd->keepProcessing */
4600 }
4601 }
4602#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004603 processor = contentProcessor;
4604 return contentProcessor(parser, s, end, nextPtr);
4605 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4606 declElementType = getElementType(parser, enc, s, next);
4607 if (!declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004608 return XML_ERROR_NO_MEMORY;
4609 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004610 case XML_ROLE_ATTRIBUTE_NAME:
4611 declAttributeId = getAttributeId(parser, enc, s, next);
4612 if (!declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004613 return XML_ERROR_NO_MEMORY;
4614 declAttributeIsCdata = XML_FALSE;
4615 declAttributeType = NULL;
4616 declAttributeIsId = XML_FALSE;
4617 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004618 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004619 declAttributeIsCdata = XML_TRUE;
4620 declAttributeType = atypeCDATA;
4621 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004622 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004623 declAttributeIsId = XML_TRUE;
4624 declAttributeType = atypeID;
4625 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004626 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004627 declAttributeType = atypeIDREF;
4628 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004629 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004630 declAttributeType = atypeIDREFS;
4631 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004632 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004633 declAttributeType = atypeENTITY;
4634 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004635 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004636 declAttributeType = atypeENTITIES;
4637 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004638 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004639 declAttributeType = atypeNMTOKEN;
4640 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004641 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004642 declAttributeType = atypeNMTOKENS;
4643 checkAttListDeclHandler:
4644 if (dtd->keepProcessing && attlistDeclHandler)
4645 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004646 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004647 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4648 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004649 if (dtd->keepProcessing && attlistDeclHandler) {
4650 const XML_Char *prefix;
4651 if (declAttributeType) {
4652 prefix = enumValueSep;
4653 }
4654 else {
4655 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4656 ? notationPrefix
4657 : enumValueStart);
4658 }
4659 if (!poolAppendString(&tempPool, prefix))
4660 return XML_ERROR_NO_MEMORY;
4661 if (!poolAppend(&tempPool, enc, s, next))
4662 return XML_ERROR_NO_MEMORY;
4663 declAttributeType = tempPool.start;
4664 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004665 }
4666 break;
4667 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4668 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004669 if (dtd->keepProcessing) {
4670 if (!defineAttribute(declElementType, declAttributeId,
Fred Drake08317ae2003-10-21 15:38:55 +00004671 declAttributeIsCdata, declAttributeIsId,
4672 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004673 return XML_ERROR_NO_MEMORY;
4674 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004675 if (*declAttributeType == XML_T(ASCII_LPAREN)
4676 || (*declAttributeType == XML_T(ASCII_N)
4677 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004678 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004679 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004680 || !poolAppendChar(&tempPool, XML_T('\0')))
4681 return XML_ERROR_NO_MEMORY;
4682 declAttributeType = tempPool.start;
4683 poolFinish(&tempPool);
4684 }
4685 *eventEndPP = s;
4686 attlistDeclHandler(handlerArg, declElementType->name,
4687 declAttributeId->name, declAttributeType,
4688 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4689 poolClear(&tempPool);
4690 handleDefault = XML_FALSE;
4691 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004692 }
4693 break;
4694 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4695 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004696 if (dtd->keepProcessing) {
4697 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004698 enum XML_Error result =
4699 storeAttributeValue(parser, enc, declAttributeIsCdata,
4700 s + enc->minBytesPerChar,
4701 next - enc->minBytesPerChar,
4702 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004703 if (result)
4704 return result;
4705 attVal = poolStart(&dtd->pool);
4706 poolFinish(&dtd->pool);
4707 /* ID attributes aren't allowed to have a default */
4708 if (!defineAttribute(declElementType, declAttributeId,
4709 declAttributeIsCdata, XML_FALSE, attVal, parser))
4710 return XML_ERROR_NO_MEMORY;
4711 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004712 if (*declAttributeType == XML_T(ASCII_LPAREN)
4713 || (*declAttributeType == XML_T(ASCII_N)
4714 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004715 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004716 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004717 || !poolAppendChar(&tempPool, XML_T('\0')))
4718 return XML_ERROR_NO_MEMORY;
4719 declAttributeType = tempPool.start;
4720 poolFinish(&tempPool);
4721 }
4722 *eventEndPP = s;
4723 attlistDeclHandler(handlerArg, declElementType->name,
4724 declAttributeId->name, declAttributeType,
4725 attVal,
4726 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4727 poolClear(&tempPool);
4728 handleDefault = XML_FALSE;
4729 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004730 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004731 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004732 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004733 if (dtd->keepProcessing) {
4734 enum XML_Error result = storeEntityValue(parser, enc,
4735 s + enc->minBytesPerChar,
4736 next - enc->minBytesPerChar);
4737 if (declEntity) {
4738 declEntity->textPtr = poolStart(&dtd->entityValuePool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004739 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004740 poolFinish(&dtd->entityValuePool);
4741 if (entityDeclHandler) {
4742 *eventEndPP = s;
4743 entityDeclHandler(handlerArg,
4744 declEntity->name,
4745 declEntity->is_param,
4746 declEntity->textPtr,
4747 declEntity->textLen,
4748 curBase, 0, 0, 0);
4749 handleDefault = XML_FALSE;
4750 }
4751 }
4752 else
4753 poolDiscard(&dtd->entityValuePool);
4754 if (result != XML_ERROR_NONE)
4755 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004756 }
4757 break;
4758 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004759#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004760 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004761#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004762 dtd->hasParamEntityRefs = XML_TRUE;
4763 if (startDoctypeDeclHandler) {
4764 doctypeSysid = poolStoreString(&tempPool, enc,
4765 s + enc->minBytesPerChar,
4766 next - enc->minBytesPerChar);
4767 if (doctypeSysid == NULL)
4768 return XML_ERROR_NO_MEMORY;
4769 poolFinish(&tempPool);
4770 handleDefault = XML_FALSE;
4771 }
4772#ifdef XML_DTD
4773 else
4774 /* use externalSubsetName to make doctypeSysid non-NULL
4775 for the case where no startDoctypeDeclHandler is set */
4776 doctypeSysid = externalSubsetName;
4777#endif /* XML_DTD */
4778 if (!dtd->standalone
4779#ifdef XML_DTD
4780 && !paramEntityParsing
4781#endif /* XML_DTD */
4782 && notStandaloneHandler
4783 && !notStandaloneHandler(handlerArg))
4784 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004785#ifndef XML_DTD
4786 break;
4787#else /* XML_DTD */
4788 if (!declEntity) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004789 declEntity = (ENTITY *)lookup(parser,
4790 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004791 externalSubsetName,
4792 sizeof(ENTITY));
4793 if (!declEntity)
4794 return XML_ERROR_NO_MEMORY;
4795 declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004796 }
4797 /* fall through */
4798#endif /* XML_DTD */
4799 case XML_ROLE_ENTITY_SYSTEM_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004800 if (dtd->keepProcessing && declEntity) {
4801 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4802 s + enc->minBytesPerChar,
4803 next - enc->minBytesPerChar);
4804 if (!declEntity->systemId)
4805 return XML_ERROR_NO_MEMORY;
4806 declEntity->base = curBase;
4807 poolFinish(&dtd->pool);
4808 if (entityDeclHandler)
4809 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004810 }
4811 break;
4812 case XML_ROLE_ENTITY_COMPLETE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004813 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4814 *eventEndPP = s;
4815 entityDeclHandler(handlerArg,
4816 declEntity->name,
4817 declEntity->is_param,
4818 0,0,
4819 declEntity->base,
4820 declEntity->systemId,
4821 declEntity->publicId,
4822 0);
4823 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004824 }
4825 break;
4826 case XML_ROLE_ENTITY_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004827 if (dtd->keepProcessing && declEntity) {
4828 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4829 if (!declEntity->notation)
4830 return XML_ERROR_NO_MEMORY;
4831 poolFinish(&dtd->pool);
4832 if (unparsedEntityDeclHandler) {
4833 *eventEndPP = s;
4834 unparsedEntityDeclHandler(handlerArg,
4835 declEntity->name,
4836 declEntity->base,
4837 declEntity->systemId,
4838 declEntity->publicId,
4839 declEntity->notation);
4840 handleDefault = XML_FALSE;
4841 }
4842 else if (entityDeclHandler) {
4843 *eventEndPP = s;
4844 entityDeclHandler(handlerArg,
4845 declEntity->name,
4846 0,0,0,
4847 declEntity->base,
4848 declEntity->systemId,
4849 declEntity->publicId,
4850 declEntity->notation);
4851 handleDefault = XML_FALSE;
4852 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004853 }
4854 break;
4855 case XML_ROLE_GENERAL_ENTITY_NAME:
4856 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004857 if (XmlPredefinedEntityName(enc, s, next)) {
4858 declEntity = NULL;
4859 break;
4860 }
4861 if (dtd->keepProcessing) {
4862 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4863 if (!name)
4864 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004865 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004866 sizeof(ENTITY));
4867 if (!declEntity)
4868 return XML_ERROR_NO_MEMORY;
4869 if (declEntity->name != name) {
4870 poolDiscard(&dtd->pool);
4871 declEntity = NULL;
4872 }
4873 else {
4874 poolFinish(&dtd->pool);
4875 declEntity->publicId = NULL;
4876 declEntity->is_param = XML_FALSE;
4877 /* if we have a parent parser or are reading an internal parameter
4878 entity, then the entity declaration is not considered "internal"
4879 */
4880 declEntity->is_internal = !(parentParser || openInternalEntities);
4881 if (entityDeclHandler)
4882 handleDefault = XML_FALSE;
4883 }
4884 }
4885 else {
4886 poolDiscard(&dtd->pool);
4887 declEntity = NULL;
4888 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004889 }
4890 break;
4891 case XML_ROLE_PARAM_ENTITY_NAME:
4892#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004893 if (dtd->keepProcessing) {
4894 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4895 if (!name)
4896 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004897 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004898 name, sizeof(ENTITY));
4899 if (!declEntity)
4900 return XML_ERROR_NO_MEMORY;
4901 if (declEntity->name != name) {
4902 poolDiscard(&dtd->pool);
4903 declEntity = NULL;
4904 }
4905 else {
4906 poolFinish(&dtd->pool);
4907 declEntity->publicId = NULL;
4908 declEntity->is_param = XML_TRUE;
4909 /* if we have a parent parser or are reading an internal parameter
4910 entity, then the entity declaration is not considered "internal"
4911 */
4912 declEntity->is_internal = !(parentParser || openInternalEntities);
4913 if (entityDeclHandler)
4914 handleDefault = XML_FALSE;
4915 }
4916 }
4917 else {
4918 poolDiscard(&dtd->pool);
4919 declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004920 }
4921#else /* not XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004922 declEntity = NULL;
4923#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004924 break;
4925 case XML_ROLE_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004926 declNotationPublicId = NULL;
4927 declNotationName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004928 if (notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004929 declNotationName = poolStoreString(&tempPool, enc, s, next);
4930 if (!declNotationName)
4931 return XML_ERROR_NO_MEMORY;
4932 poolFinish(&tempPool);
4933 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004934 }
4935 break;
4936 case XML_ROLE_NOTATION_PUBLIC_ID:
4937 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004938 return XML_ERROR_PUBLICID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004939 if (declNotationName) { /* means notationDeclHandler != NULL */
4940 XML_Char *tem = poolStoreString(&tempPool,
4941 enc,
4942 s + enc->minBytesPerChar,
4943 next - enc->minBytesPerChar);
4944 if (!tem)
4945 return XML_ERROR_NO_MEMORY;
4946 normalizePublicId(tem);
4947 declNotationPublicId = tem;
4948 poolFinish(&tempPool);
4949 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004950 }
4951 break;
4952 case XML_ROLE_NOTATION_SYSTEM_ID:
4953 if (declNotationName && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004954 const XML_Char *systemId
4955 = poolStoreString(&tempPool, enc,
4956 s + enc->minBytesPerChar,
4957 next - enc->minBytesPerChar);
4958 if (!systemId)
4959 return XML_ERROR_NO_MEMORY;
4960 *eventEndPP = s;
4961 notationDeclHandler(handlerArg,
4962 declNotationName,
4963 curBase,
4964 systemId,
4965 declNotationPublicId);
4966 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004967 }
4968 poolClear(&tempPool);
4969 break;
4970 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4971 if (declNotationPublicId && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004972 *eventEndPP = s;
4973 notationDeclHandler(handlerArg,
4974 declNotationName,
4975 curBase,
4976 0,
4977 declNotationPublicId);
4978 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004979 }
4980 poolClear(&tempPool);
4981 break;
4982 case XML_ROLE_ERROR:
4983 switch (tok) {
4984 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004985 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004986 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004987 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004988 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004989 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004990 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004991 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004992 }
4993#ifdef XML_DTD
4994 case XML_ROLE_IGNORE_SECT:
4995 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004996 enum XML_Error result;
4997 if (defaultHandler)
4998 reportDefault(parser, enc, s, next);
4999 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00005000 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5001 if (result != XML_ERROR_NONE)
5002 return result;
5003 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005004 processor = ignoreSectionProcessor;
5005 return result;
5006 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005007 }
5008 break;
5009#endif /* XML_DTD */
5010 case XML_ROLE_GROUP_OPEN:
5011 if (prologState.level >= groupSize) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005012 if (groupSize) {
5013 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005014 if (temp == NULL) {
5015 groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005016 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005017 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005018 groupConnector = temp;
5019 if (dtd->scaffIndex) {
5020 int *temp = (int *)REALLOC(dtd->scaffIndex,
5021 groupSize * sizeof(int));
5022 if (temp == NULL)
5023 return XML_ERROR_NO_MEMORY;
5024 dtd->scaffIndex = temp;
5025 }
5026 }
5027 else {
5028 groupConnector = (char *)MALLOC(groupSize = 32);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005029 if (!groupConnector) {
5030 groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005031 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005032 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005033 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005034 }
5035 groupConnector[prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005036 if (dtd->in_eldecl) {
5037 int myindex = nextScaffoldPart(parser);
5038 if (myindex < 0)
5039 return XML_ERROR_NO_MEMORY;
5040 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5041 dtd->scaffLevel++;
5042 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5043 if (elementDeclHandler)
5044 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005045 }
5046 break;
5047 case XML_ROLE_GROUP_SEQUENCE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005048 if (groupConnector[prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005049 return XML_ERROR_SYNTAX;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005050 groupConnector[prologState.level] = ASCII_COMMA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005051 if (dtd->in_eldecl && elementDeclHandler)
5052 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005053 break;
5054 case XML_ROLE_GROUP_CHOICE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005055 if (groupConnector[prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005056 return XML_ERROR_SYNTAX;
5057 if (dtd->in_eldecl
5058 && !groupConnector[prologState.level]
5059 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5060 != XML_CTYPE_MIXED)
5061 ) {
5062 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5063 = XML_CTYPE_CHOICE;
5064 if (elementDeclHandler)
5065 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005066 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005067 groupConnector[prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005068 break;
5069 case XML_ROLE_PARAM_ENTITY_REF:
5070#ifdef XML_DTD
5071 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005072 dtd->hasParamEntityRefs = XML_TRUE;
5073 if (!paramEntityParsing)
5074 dtd->keepProcessing = dtd->standalone;
5075 else {
5076 const XML_Char *name;
5077 ENTITY *entity;
5078 name = poolStoreString(&dtd->pool, enc,
5079 s + enc->minBytesPerChar,
5080 next - enc->minBytesPerChar);
5081 if (!name)
5082 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005083 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005084 poolDiscard(&dtd->pool);
5085 /* first, determine if a check for an existing declaration is needed;
5086 if yes, check that the entity exists, and that it is internal,
5087 otherwise call the skipped entity handler
5088 */
5089 if (prologState.documentEntity &&
5090 (dtd->standalone
5091 ? !openInternalEntities
5092 : !dtd->hasParamEntityRefs)) {
5093 if (!entity)
5094 return XML_ERROR_UNDEFINED_ENTITY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005095 else if (!entity->is_internal) {
5096 /* It's hard to exhaustively search the code to be sure,
5097 * but there doesn't seem to be a way of executing the
5098 * following line. There are two cases:
5099 *
5100 * If 'standalone' is false, the DTD must have no
5101 * parameter entities or we wouldn't have passed the outer
5102 * 'if' statement. That measn the only entity in the hash
5103 * table is the external subset name "#" which cannot be
5104 * given as a parameter entity name in XML syntax, so the
5105 * lookup must have returned NULL and we don't even reach
5106 * the test for an internal entity.
5107 *
5108 * If 'standalone' is true, it does not seem to be
5109 * possible to create entities taking this code path that
5110 * are not internal entities, so fail the test above.
5111 *
5112 * Because this analysis is very uncertain, the code is
5113 * being left in place and merely removed from the
5114 * coverage test statistics.
5115 */
5116 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5117 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005118 }
5119 else if (!entity) {
5120 dtd->keepProcessing = dtd->standalone;
5121 /* cannot report skipped entities in declarations */
5122 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
5123 skippedEntityHandler(handlerArg, name, 1);
5124 handleDefault = XML_FALSE;
5125 }
5126 break;
5127 }
5128 if (entity->open)
5129 return XML_ERROR_RECURSIVE_ENTITY_REF;
5130 if (entity->textPtr) {
5131 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005132 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00005133 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5134 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005135 if (result != XML_ERROR_NONE)
5136 return result;
5137 handleDefault = XML_FALSE;
5138 break;
5139 }
5140 if (externalEntityRefHandler) {
5141 dtd->paramEntityRead = XML_FALSE;
5142 entity->open = XML_TRUE;
5143 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5144 0,
5145 entity->base,
5146 entity->systemId,
5147 entity->publicId)) {
5148 entity->open = XML_FALSE;
5149 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5150 }
5151 entity->open = XML_FALSE;
5152 handleDefault = XML_FALSE;
5153 if (!dtd->paramEntityRead) {
5154 dtd->keepProcessing = dtd->standalone;
5155 break;
5156 }
5157 }
5158 else {
5159 dtd->keepProcessing = dtd->standalone;
5160 break;
5161 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005162 }
5163#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005164 if (!dtd->standalone &&
5165 notStandaloneHandler &&
5166 !notStandaloneHandler(handlerArg))
5167 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005168 break;
5169
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005170 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005171
5172 case XML_ROLE_ELEMENT_NAME:
5173 if (elementDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005174 declElementType = getElementType(parser, enc, s, next);
5175 if (!declElementType)
5176 return XML_ERROR_NO_MEMORY;
5177 dtd->scaffLevel = 0;
5178 dtd->scaffCount = 0;
5179 dtd->in_eldecl = XML_TRUE;
5180 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005181 }
5182 break;
5183
5184 case XML_ROLE_CONTENT_ANY:
5185 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005186 if (dtd->in_eldecl) {
5187 if (elementDeclHandler) {
5188 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
5189 if (!content)
5190 return XML_ERROR_NO_MEMORY;
5191 content->quant = XML_CQUANT_NONE;
5192 content->name = NULL;
5193 content->numchildren = 0;
5194 content->children = NULL;
5195 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5196 XML_CTYPE_ANY :
5197 XML_CTYPE_EMPTY);
5198 *eventEndPP = s;
5199 elementDeclHandler(handlerArg, declElementType->name, content);
5200 handleDefault = XML_FALSE;
5201 }
5202 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005203 }
5204 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005205
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005206 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005207 if (dtd->in_eldecl) {
5208 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5209 = XML_CTYPE_MIXED;
5210 if (elementDeclHandler)
5211 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005212 }
5213 break;
5214
5215 case XML_ROLE_CONTENT_ELEMENT:
5216 quant = XML_CQUANT_NONE;
5217 goto elementContent;
5218 case XML_ROLE_CONTENT_ELEMENT_OPT:
5219 quant = XML_CQUANT_OPT;
5220 goto elementContent;
5221 case XML_ROLE_CONTENT_ELEMENT_REP:
5222 quant = XML_CQUANT_REP;
5223 goto elementContent;
5224 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5225 quant = XML_CQUANT_PLUS;
5226 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005227 if (dtd->in_eldecl) {
5228 ELEMENT_TYPE *el;
5229 const XML_Char *name;
5230 int nameLen;
5231 const char *nxt = (quant == XML_CQUANT_NONE
5232 ? next
5233 : next - enc->minBytesPerChar);
5234 int myindex = nextScaffoldPart(parser);
5235 if (myindex < 0)
5236 return XML_ERROR_NO_MEMORY;
5237 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5238 dtd->scaffold[myindex].quant = quant;
5239 el = getElementType(parser, enc, s, nxt);
5240 if (!el)
5241 return XML_ERROR_NO_MEMORY;
5242 name = el->name;
5243 dtd->scaffold[myindex].name = name;
5244 nameLen = 0;
5245 for (; name[nameLen++]; );
5246 dtd->contentStringLen += nameLen;
5247 if (elementDeclHandler)
5248 handleDefault = XML_FALSE;
5249 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005250 break;
5251
5252 case XML_ROLE_GROUP_CLOSE:
5253 quant = XML_CQUANT_NONE;
5254 goto closeGroup;
5255 case XML_ROLE_GROUP_CLOSE_OPT:
5256 quant = XML_CQUANT_OPT;
5257 goto closeGroup;
5258 case XML_ROLE_GROUP_CLOSE_REP:
5259 quant = XML_CQUANT_REP;
5260 goto closeGroup;
5261 case XML_ROLE_GROUP_CLOSE_PLUS:
5262 quant = XML_CQUANT_PLUS;
5263 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005264 if (dtd->in_eldecl) {
5265 if (elementDeclHandler)
5266 handleDefault = XML_FALSE;
5267 dtd->scaffLevel--;
5268 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5269 if (dtd->scaffLevel == 0) {
5270 if (!handleDefault) {
5271 XML_Content *model = build_model(parser);
5272 if (!model)
5273 return XML_ERROR_NO_MEMORY;
5274 *eventEndPP = s;
5275 elementDeclHandler(handlerArg, declElementType->name, model);
5276 }
5277 dtd->in_eldecl = XML_FALSE;
5278 dtd->contentStringLen = 0;
5279 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005280 }
5281 break;
5282 /* End element declaration stuff */
5283
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005284 case XML_ROLE_PI:
5285 if (!reportProcessingInstruction(parser, enc, s, next))
5286 return XML_ERROR_NO_MEMORY;
5287 handleDefault = XML_FALSE;
5288 break;
5289 case XML_ROLE_COMMENT:
5290 if (!reportComment(parser, enc, s, next))
5291 return XML_ERROR_NO_MEMORY;
5292 handleDefault = XML_FALSE;
5293 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005294 case XML_ROLE_NONE:
5295 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005296 case XML_TOK_BOM:
5297 handleDefault = XML_FALSE;
5298 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005299 }
5300 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005301 case XML_ROLE_DOCTYPE_NONE:
5302 if (startDoctypeDeclHandler)
5303 handleDefault = XML_FALSE;
5304 break;
5305 case XML_ROLE_ENTITY_NONE:
5306 if (dtd->keepProcessing && entityDeclHandler)
5307 handleDefault = XML_FALSE;
5308 break;
5309 case XML_ROLE_NOTATION_NONE:
5310 if (notationDeclHandler)
5311 handleDefault = XML_FALSE;
5312 break;
5313 case XML_ROLE_ATTLIST_NONE:
5314 if (dtd->keepProcessing && attlistDeclHandler)
5315 handleDefault = XML_FALSE;
5316 break;
5317 case XML_ROLE_ELEMENT_NONE:
5318 if (elementDeclHandler)
5319 handleDefault = XML_FALSE;
5320 break;
5321 } /* end of big switch */
5322
5323 if (handleDefault && defaultHandler)
5324 reportDefault(parser, enc, s, next);
5325
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005326 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005327 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005328 *nextPtr = next;
5329 return XML_ERROR_NONE;
5330 case XML_FINISHED:
5331 return XML_ERROR_ABORTED;
5332 default:
5333 s = next;
5334 tok = XmlPrologTok(enc, s, end, &next);
5335 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005336 }
5337 /* not reached */
5338}
5339
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005340static enum XML_Error PTRCALL
5341epilogProcessor(XML_Parser parser,
5342 const char *s,
5343 const char *end,
5344 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005345{
5346 processor = epilogProcessor;
5347 eventPtr = s;
5348 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005349 const char *next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005350 int tok = XmlPrologTok(encoding, s, end, &next);
5351 eventEndPtr = next;
5352 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005353 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005354 case -XML_TOK_PROLOG_S:
5355 if (defaultHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005356 reportDefault(parser, encoding, s, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005357 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005358 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005359 }
Fred Drake31d485c2004-08-03 07:06:22 +00005360 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005361 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005362 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005363 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005364 return XML_ERROR_NONE;
5365 case XML_TOK_PROLOG_S:
5366 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005367 reportDefault(parser, encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005368 break;
5369 case XML_TOK_PI:
5370 if (!reportProcessingInstruction(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005371 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005372 break;
5373 case XML_TOK_COMMENT:
5374 if (!reportComment(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005375 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005376 break;
5377 case XML_TOK_INVALID:
5378 eventPtr = next;
5379 return XML_ERROR_INVALID_TOKEN;
5380 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005381 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005382 *nextPtr = s;
5383 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005384 }
5385 return XML_ERROR_UNCLOSED_TOKEN;
5386 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005387 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005388 *nextPtr = s;
5389 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005390 }
5391 return XML_ERROR_PARTIAL_CHAR;
5392 default:
5393 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5394 }
5395 eventPtr = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005396 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005397 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005398 *nextPtr = next;
5399 return XML_ERROR_NONE;
5400 case XML_FINISHED:
5401 return XML_ERROR_ABORTED;
5402 default: ;
5403 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005404 }
5405}
5406
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005407static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00005408processInternalEntity(XML_Parser parser, ENTITY *entity,
5409 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005410{
Fred Drake31d485c2004-08-03 07:06:22 +00005411 const char *textStart, *textEnd;
5412 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005413 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005414 OPEN_INTERNAL_ENTITY *openEntity;
5415
5416 if (freeInternalEntities) {
5417 openEntity = freeInternalEntities;
5418 freeInternalEntities = openEntity->next;
5419 }
5420 else {
5421 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
5422 if (!openEntity)
5423 return XML_ERROR_NO_MEMORY;
5424 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005425 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005426 entity->processed = 0;
5427 openEntity->next = openInternalEntities;
5428 openInternalEntities = openEntity;
5429 openEntity->entity = entity;
5430 openEntity->startTagLevel = tagLevel;
5431 openEntity->betweenDecl = betweenDecl;
5432 openEntity->internalEventPtr = NULL;
5433 openEntity->internalEventEndPtr = NULL;
5434 textStart = (char *)entity->textPtr;
5435 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005436 /* Set a safe default value in case 'next' does not get set */
5437 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005438
5439#ifdef XML_DTD
5440 if (entity->is_param) {
5441 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005442 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005443 next, &next, XML_FALSE);
5444 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005445 else
Fred Drake31d485c2004-08-03 07:06:22 +00005446#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005447 result = doContent(parser, tagLevel, internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00005448 textEnd, &next, XML_FALSE);
5449
5450 if (result == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005451 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
5452 entity->processed = (int)(next - textStart);
Fred Drake31d485c2004-08-03 07:06:22 +00005453 processor = internalEntityProcessor;
5454 }
5455 else {
5456 entity->open = XML_FALSE;
5457 openInternalEntities = openEntity->next;
5458 /* put openEntity back in list of free instances */
5459 openEntity->next = freeInternalEntities;
5460 freeInternalEntities = openEntity;
5461 }
5462 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005463 return result;
5464}
5465
Fred Drake31d485c2004-08-03 07:06:22 +00005466static enum XML_Error PTRCALL
5467internalEntityProcessor(XML_Parser parser,
5468 const char *s,
5469 const char *end,
5470 const char **nextPtr)
5471{
5472 ENTITY *entity;
5473 const char *textStart, *textEnd;
5474 const char *next;
5475 enum XML_Error result;
5476 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
5477 if (!openEntity)
5478 return XML_ERROR_UNEXPECTED_STATE;
5479
5480 entity = openEntity->entity;
5481 textStart = ((char *)entity->textPtr) + entity->processed;
5482 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005483 /* Set a safe default value in case 'next' does not get set */
5484 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005485
5486#ifdef XML_DTD
5487 if (entity->is_param) {
5488 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005489 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005490 next, &next, XML_FALSE);
5491 }
5492 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005493#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005494 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
5495 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005496
5497 if (result != XML_ERROR_NONE)
5498 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005499 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
5500 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005501 return result;
5502 }
5503 else {
5504 entity->open = XML_FALSE;
5505 openInternalEntities = openEntity->next;
5506 /* put openEntity back in list of free instances */
5507 openEntity->next = freeInternalEntities;
5508 freeInternalEntities = openEntity;
5509 }
5510
5511#ifdef XML_DTD
5512 if (entity->is_param) {
5513 int tok;
5514 processor = prologProcessor;
5515 tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005516 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005517 (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00005518 }
5519 else
5520#endif /* XML_DTD */
5521 {
5522 processor = contentProcessor;
5523 /* see externalEntityContentProcessor vs contentProcessor */
5524 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005525 nextPtr, (XML_Bool)!ps_finalBuffer);
5526 }
Fred Drake31d485c2004-08-03 07:06:22 +00005527}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005528
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005529static enum XML_Error PTRCALL
5530errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02005531 const char *UNUSED_P(s),
5532 const char *UNUSED_P(end),
5533 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005534{
5535 return errorCode;
5536}
5537
5538static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005539storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5540 const char *ptr, const char *end,
5541 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005542{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005543 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5544 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005545 if (result)
5546 return result;
5547 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5548 poolChop(pool);
5549 if (!poolAppendChar(pool, XML_T('\0')))
5550 return XML_ERROR_NO_MEMORY;
5551 return XML_ERROR_NONE;
5552}
5553
5554static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005555appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5556 const char *ptr, const char *end,
5557 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005558{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005559 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005560 for (;;) {
5561 const char *next;
5562 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5563 switch (tok) {
5564 case XML_TOK_NONE:
5565 return XML_ERROR_NONE;
5566 case XML_TOK_INVALID:
5567 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005568 eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005569 return XML_ERROR_INVALID_TOKEN;
5570 case XML_TOK_PARTIAL:
5571 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005572 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005573 return XML_ERROR_INVALID_TOKEN;
5574 case XML_TOK_CHAR_REF:
5575 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005576 XML_Char buf[XML_ENCODE_MAX];
5577 int i;
5578 int n = XmlCharRefNumber(enc, ptr);
5579 if (n < 0) {
5580 if (enc == encoding)
5581 eventPtr = ptr;
5582 return XML_ERROR_BAD_CHAR_REF;
5583 }
5584 if (!isCdata
5585 && n == 0x20 /* space */
5586 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5587 break;
5588 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005589 /* The XmlEncode() functions can never return 0 here. That
5590 * error return happens if the code point passed in is either
5591 * negative or greater than or equal to 0x110000. The
5592 * XmlCharRefNumber() functions will all return a number
5593 * strictly less than 0x110000 or a negative value if an error
5594 * occurred. The negative value is intercepted above, so
5595 * XmlEncode() is never passed a value it might return an
5596 * error for.
5597 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005598 for (i = 0; i < n; i++) {
5599 if (!poolAppendChar(pool, buf[i]))
5600 return XML_ERROR_NO_MEMORY;
5601 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005602 }
5603 break;
5604 case XML_TOK_DATA_CHARS:
5605 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005606 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005607 break;
5608 case XML_TOK_TRAILING_CR:
5609 next = ptr + enc->minBytesPerChar;
5610 /* fall through */
5611 case XML_TOK_ATTRIBUTE_VALUE_S:
5612 case XML_TOK_DATA_NEWLINE:
5613 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005614 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005615 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005616 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005617 break;
5618 case XML_TOK_ENTITY_REF:
5619 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005620 const XML_Char *name;
5621 ENTITY *entity;
5622 char checkEntityDecl;
5623 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5624 ptr + enc->minBytesPerChar,
5625 next - enc->minBytesPerChar);
5626 if (ch) {
5627 if (!poolAppendChar(pool, ch))
5628 return XML_ERROR_NO_MEMORY;
5629 break;
5630 }
5631 name = poolStoreString(&temp2Pool, enc,
5632 ptr + enc->minBytesPerChar,
5633 next - enc->minBytesPerChar);
5634 if (!name)
5635 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005636 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005637 poolDiscard(&temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005638 /* First, determine if a check for an existing declaration is needed;
5639 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005640 */
5641 if (pool == &dtd->pool) /* are we called from prolog? */
5642 checkEntityDecl =
5643#ifdef XML_DTD
5644 prologState.documentEntity &&
5645#endif /* XML_DTD */
5646 (dtd->standalone
5647 ? !openInternalEntities
5648 : !dtd->hasParamEntityRefs);
5649 else /* if (pool == &tempPool): we are called from content */
5650 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5651 if (checkEntityDecl) {
5652 if (!entity)
5653 return XML_ERROR_UNDEFINED_ENTITY;
5654 else if (!entity->is_internal)
5655 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5656 }
5657 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005658 /* Cannot report skipped entity here - see comments on
5659 skippedEntityHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005660 if (skippedEntityHandler)
5661 skippedEntityHandler(handlerArg, name, 0);
5662 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005663 /* Cannot call the default handler because this would be
5664 out of sync with the call to the startElementHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005665 if ((pool == &tempPool) && defaultHandler)
5666 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005667 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005668 break;
5669 }
5670 if (entity->open) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005671 if (enc == encoding) {
5672 /* It does not appear that this line can be executed.
5673 *
5674 * The "if (entity->open)" check catches recursive entity
5675 * definitions. In order to be called with an open
5676 * entity, it must have gone through this code before and
5677 * been through the recursive call to
5678 * appendAttributeValue() some lines below. That call
5679 * sets the local encoding ("enc") to the parser's
5680 * internal encoding (internal_utf8 or internal_utf16),
5681 * which can never be the same as the principle encoding.
5682 * It doesn't appear there is another code path that gets
5683 * here with entity->open being TRUE.
5684 *
5685 * Since it is not certain that this logic is watertight,
5686 * we keep the line and merely exclude it from coverage
5687 * tests.
5688 */
5689 eventPtr = ptr; /* LCOV_EXCL_LINE */
5690 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005691 return XML_ERROR_RECURSIVE_ENTITY_REF;
5692 }
5693 if (entity->notation) {
5694 if (enc == encoding)
5695 eventPtr = ptr;
5696 return XML_ERROR_BINARY_ENTITY_REF;
5697 }
5698 if (!entity->textPtr) {
5699 if (enc == encoding)
5700 eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005701 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005702 }
5703 else {
5704 enum XML_Error result;
5705 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5706 entity->open = XML_TRUE;
5707 result = appendAttributeValue(parser, internalEncoding, isCdata,
5708 (char *)entity->textPtr,
5709 (char *)textEnd, pool);
5710 entity->open = XML_FALSE;
5711 if (result)
5712 return result;
5713 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005714 }
5715 break;
5716 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005717 /* The only token returned by XmlAttributeValueTok() that does
5718 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5719 * Getting that would require an entity name to contain an
5720 * incomplete XML character (e.g. \xE2\x82); however previous
5721 * tokenisers will have already recognised and rejected such
5722 * names before XmlAttributeValueTok() gets a look-in. This
5723 * default case should be retained as a safety net, but the code
5724 * excluded from coverage tests.
5725 *
5726 * LCOV_EXCL_START
5727 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005728 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005729 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005730 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005731 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005732 }
5733 ptr = next;
5734 }
5735 /* not reached */
5736}
5737
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005738static enum XML_Error
5739storeEntityValue(XML_Parser parser,
5740 const ENCODING *enc,
5741 const char *entityTextPtr,
5742 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005743{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005744 DTD * const dtd = _dtd; /* save one level of indirection */
5745 STRING_POOL *pool = &(dtd->entityValuePool);
5746 enum XML_Error result = XML_ERROR_NONE;
5747#ifdef XML_DTD
5748 int oldInEntityValue = prologState.inEntityValue;
5749 prologState.inEntityValue = 1;
5750#endif /* XML_DTD */
5751 /* never return Null for the value argument in EntityDeclHandler,
5752 since this would indicate an external entity; therefore we
5753 have to make sure that entityValuePool.start is not null */
5754 if (!pool->blocks) {
5755 if (!poolGrow(pool))
5756 return XML_ERROR_NO_MEMORY;
5757 }
5758
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005759 for (;;) {
5760 const char *next;
5761 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5762 switch (tok) {
5763 case XML_TOK_PARAM_ENTITY_REF:
5764#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005765 if (isParamEntity || enc != encoding) {
5766 const XML_Char *name;
5767 ENTITY *entity;
5768 name = poolStoreString(&tempPool, enc,
5769 entityTextPtr + enc->minBytesPerChar,
5770 next - enc->minBytesPerChar);
5771 if (!name) {
5772 result = XML_ERROR_NO_MEMORY;
5773 goto endEntityValue;
5774 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005775 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005776 poolDiscard(&tempPool);
5777 if (!entity) {
5778 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5779 /* cannot report skipped entity here - see comments on
5780 skippedEntityHandler
5781 if (skippedEntityHandler)
5782 skippedEntityHandler(handlerArg, name, 0);
5783 */
5784 dtd->keepProcessing = dtd->standalone;
5785 goto endEntityValue;
5786 }
5787 if (entity->open) {
5788 if (enc == encoding)
5789 eventPtr = entityTextPtr;
5790 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5791 goto endEntityValue;
5792 }
5793 if (entity->systemId) {
5794 if (externalEntityRefHandler) {
5795 dtd->paramEntityRead = XML_FALSE;
5796 entity->open = XML_TRUE;
5797 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5798 0,
5799 entity->base,
5800 entity->systemId,
5801 entity->publicId)) {
5802 entity->open = XML_FALSE;
5803 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5804 goto endEntityValue;
5805 }
5806 entity->open = XML_FALSE;
5807 if (!dtd->paramEntityRead)
5808 dtd->keepProcessing = dtd->standalone;
5809 }
5810 else
5811 dtd->keepProcessing = dtd->standalone;
5812 }
5813 else {
5814 entity->open = XML_TRUE;
5815 result = storeEntityValue(parser,
5816 internalEncoding,
5817 (char *)entity->textPtr,
5818 (char *)(entity->textPtr
5819 + entity->textLen));
5820 entity->open = XML_FALSE;
5821 if (result)
5822 goto endEntityValue;
5823 }
5824 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005825 }
5826#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005827 /* In the internal subset, PE references are not legal
5828 within markup declarations, e.g entity values in this case. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005829 eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005830 result = XML_ERROR_PARAM_ENTITY_REF;
5831 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005832 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005833 result = XML_ERROR_NONE;
5834 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005835 case XML_TOK_ENTITY_REF:
5836 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005837 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5838 result = XML_ERROR_NO_MEMORY;
5839 goto endEntityValue;
5840 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005841 break;
5842 case XML_TOK_TRAILING_CR:
5843 next = entityTextPtr + enc->minBytesPerChar;
5844 /* fall through */
5845 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005846 if (pool->end == pool->ptr && !poolGrow(pool)) {
5847 result = XML_ERROR_NO_MEMORY;
5848 goto endEntityValue;
5849 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005850 *(pool->ptr)++ = 0xA;
5851 break;
5852 case XML_TOK_CHAR_REF:
5853 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005854 XML_Char buf[XML_ENCODE_MAX];
5855 int i;
5856 int n = XmlCharRefNumber(enc, entityTextPtr);
5857 if (n < 0) {
5858 if (enc == encoding)
5859 eventPtr = entityTextPtr;
5860 result = XML_ERROR_BAD_CHAR_REF;
5861 goto endEntityValue;
5862 }
5863 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005864 /* The XmlEncode() functions can never return 0 here. That
5865 * error return happens if the code point passed in is either
5866 * negative or greater than or equal to 0x110000. The
5867 * XmlCharRefNumber() functions will all return a number
5868 * strictly less than 0x110000 or a negative value if an error
5869 * occurred. The negative value is intercepted above, so
5870 * XmlEncode() is never passed a value it might return an
5871 * error for.
5872 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005873 for (i = 0; i < n; i++) {
5874 if (pool->end == pool->ptr && !poolGrow(pool)) {
5875 result = XML_ERROR_NO_MEMORY;
5876 goto endEntityValue;
5877 }
5878 *(pool->ptr)++ = buf[i];
5879 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005880 }
5881 break;
5882 case XML_TOK_PARTIAL:
5883 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005884 eventPtr = entityTextPtr;
5885 result = XML_ERROR_INVALID_TOKEN;
5886 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005887 case XML_TOK_INVALID:
5888 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005889 eventPtr = next;
5890 result = XML_ERROR_INVALID_TOKEN;
5891 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005892 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005893 /* This default case should be unnecessary -- all the tokens
5894 * that XmlEntityValueTok() can return have their own explicit
5895 * cases -- but should be retained for safety. We do however
5896 * exclude it from the coverage statistics.
5897 *
5898 * LCOV_EXCL_START
5899 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005900 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005901 eventPtr = entityTextPtr;
5902 result = XML_ERROR_UNEXPECTED_STATE;
5903 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005904 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005905 }
5906 entityTextPtr = next;
5907 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005908endEntityValue:
5909#ifdef XML_DTD
5910 prologState.inEntityValue = oldInEntityValue;
5911#endif /* XML_DTD */
5912 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005913}
5914
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005915static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005916normalizeLines(XML_Char *s)
5917{
5918 XML_Char *p;
5919 for (;; s++) {
5920 if (*s == XML_T('\0'))
5921 return;
5922 if (*s == 0xD)
5923 break;
5924 }
5925 p = s;
5926 do {
5927 if (*s == 0xD) {
5928 *p++ = 0xA;
5929 if (*++s == 0xA)
5930 s++;
5931 }
5932 else
5933 *p++ = *s++;
5934 } while (*s);
5935 *p = XML_T('\0');
5936}
5937
5938static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005939reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5940 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005941{
5942 const XML_Char *target;
5943 XML_Char *data;
5944 const char *tem;
5945 if (!processingInstructionHandler) {
5946 if (defaultHandler)
5947 reportDefault(parser, enc, start, end);
5948 return 1;
5949 }
5950 start += enc->minBytesPerChar * 2;
5951 tem = start + XmlNameLength(enc, start);
5952 target = poolStoreString(&tempPool, enc, start, tem);
5953 if (!target)
5954 return 0;
5955 poolFinish(&tempPool);
5956 data = poolStoreString(&tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005957 XmlSkipS(enc, tem),
5958 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005959 if (!data)
5960 return 0;
5961 normalizeLines(data);
5962 processingInstructionHandler(handlerArg, target, data);
5963 poolClear(&tempPool);
5964 return 1;
5965}
5966
5967static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005968reportComment(XML_Parser parser, const ENCODING *enc,
5969 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005970{
5971 XML_Char *data;
5972 if (!commentHandler) {
5973 if (defaultHandler)
5974 reportDefault(parser, enc, start, end);
5975 return 1;
5976 }
5977 data = poolStoreString(&tempPool,
5978 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005979 start + enc->minBytesPerChar * 4,
5980 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005981 if (!data)
5982 return 0;
5983 normalizeLines(data);
5984 commentHandler(handlerArg, data);
5985 poolClear(&tempPool);
5986 return 1;
5987}
5988
5989static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005990reportDefault(XML_Parser parser, const ENCODING *enc,
5991 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005992{
5993 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005994 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005995 const char **eventPP;
5996 const char **eventEndPP;
5997 if (enc == encoding) {
5998 eventPP = &eventPtr;
5999 eventEndPP = &eventEndPtr;
6000 }
6001 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02006002 /* To get here, two things must be true; the parser must be
6003 * using a character encoding that is not the same as the
6004 * encoding passed in, and the encoding passed in must need
6005 * conversion to the internal format (UTF-8 unless XML_UNICODE
6006 * is defined). The only occasions on which the encoding passed
6007 * in is not the same as the parser's encoding are when it is
6008 * the internal encoding (e.g. a previously defined parameter
6009 * entity, already converted to internal format). This by
6010 * definition doesn't need conversion, so the whole branch never
6011 * gets executed.
6012 *
6013 * For safety's sake we don't delete these lines and merely
6014 * exclude them from coverage statistics.
6015 *
6016 * LCOV_EXCL_START
6017 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006018 eventPP = &(openInternalEntities->internalEventPtr);
6019 eventEndPP = &(openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02006020 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006021 }
6022 do {
6023 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006024 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006025 *eventEndPP = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006026 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006027 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006028 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006029 }
6030 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006031 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006032}
6033
6034
6035static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006036defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6037 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006038{
6039 DEFAULT_ATTRIBUTE *att;
6040 if (value || isId) {
6041 /* The handling of default attributes gets messed up if we have
6042 a default which duplicates a non-default. */
6043 int i;
6044 for (i = 0; i < type->nDefaultAtts; i++)
6045 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006046 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006047 if (isId && !type->idAtt && !attId->xmlns)
6048 type->idAtt = attId;
6049 }
6050 if (type->nDefaultAtts == type->allocDefaultAtts) {
6051 if (type->allocDefaultAtts == 0) {
6052 type->allocDefaultAtts = 8;
Fred Drake08317ae2003-10-21 15:38:55 +00006053 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006054 * sizeof(DEFAULT_ATTRIBUTE));
6055 if (!type->defaultAtts)
6056 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006057 }
6058 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006059 DEFAULT_ATTRIBUTE *temp;
6060 int count = type->allocDefaultAtts * 2;
6061 temp = (DEFAULT_ATTRIBUTE *)
6062 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
6063 if (temp == NULL)
6064 return 0;
6065 type->allocDefaultAtts = count;
6066 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006067 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006068 }
6069 att = type->defaultAtts + type->nDefaultAtts;
6070 att->id = attId;
6071 att->value = value;
6072 att->isCdata = isCdata;
6073 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006074 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006075 type->nDefaultAtts += 1;
6076 return 1;
6077}
6078
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006079static int
6080setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006081{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006082 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006083 const XML_Char *name;
6084 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006085 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006086 PREFIX *prefix;
6087 const XML_Char *s;
6088 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006089 if (!poolAppendChar(&dtd->pool, *s))
6090 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006091 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006092 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6093 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006094 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006095 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006096 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006097 return 0;
6098 if (prefix->name == poolStart(&dtd->pool))
6099 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006100 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006101 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006102 elementType->prefix = prefix;
6103
6104 }
6105 }
6106 return 1;
6107}
6108
6109static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006110getAttributeId(XML_Parser parser, const ENCODING *enc,
6111 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006112{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006113 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006114 ATTRIBUTE_ID *id;
6115 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006116 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6117 return NULL;
6118 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006119 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006120 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006121 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006122 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006123 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006124 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006125 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006126 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006127 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006128 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006129 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006130 if (!ns)
6131 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006132 else if (name[0] == XML_T(ASCII_x)
6133 && name[1] == XML_T(ASCII_m)
6134 && name[2] == XML_T(ASCII_l)
6135 && name[3] == XML_T(ASCII_n)
6136 && name[4] == XML_T(ASCII_s)
6137 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006138 if (name[5] == XML_T('\0'))
6139 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006140 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006141 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006142 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006143 }
6144 else {
6145 int i;
6146 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006147 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006148 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006149 int j;
6150 for (j = 0; j < i; j++) {
6151 if (!poolAppendChar(&dtd->pool, name[j]))
6152 return NULL;
6153 }
6154 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6155 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006156 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006157 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07006158 if (!id->prefix)
6159 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006160 if (id->prefix->name == poolStart(&dtd->pool))
6161 poolFinish(&dtd->pool);
6162 else
6163 poolDiscard(&dtd->pool);
6164 break;
6165 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006166 }
6167 }
6168 }
6169 return id;
6170}
6171
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006172#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006173
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006174static const XML_Char *
6175getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006176{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006177 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006178 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006179 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006180
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006181 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006182 int i;
6183 int len;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006184 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006185 return NULL;
6186 len = dtd->defaultPrefix.binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006187 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006188 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006189 for (i = 0; i < len; i++) {
6190 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) {
6191 /* Because of memory caching, I don't believe this line can be
6192 * executed.
6193 *
6194 * This is part of a loop copying the default prefix binding
6195 * URI into the parser's temporary string pool. Previously,
6196 * that URI was copied into the same string pool, with a
6197 * terminating NUL character, as part of setContext(). When
6198 * the pool was cleared, that leaves a block definitely big
6199 * enough to hold the URI on the free block list of the pool.
6200 * The URI copy in getContext() therefore cannot run out of
6201 * memory.
6202 *
6203 * If the pool is used between the setContext() and
6204 * getContext() calls, the worst it can do is leave a bigger
6205 * block on the front of the free list. Given that this is
6206 * all somewhat inobvious and program logic can be changed, we
6207 * don't delete the line but we do exclude it from the test
6208 * coverage statistics.
6209 */
6210 return NULL; /* LCOV_EXCL_LINE */
6211 }
6212 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006213 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006214 }
6215
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006216 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006217 for (;;) {
6218 int i;
6219 int len;
6220 const XML_Char *s;
6221 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6222 if (!prefix)
6223 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006224 if (!prefix->binding) {
6225 /* This test appears to be (justifiable) paranoia. There does
6226 * not seem to be a way of injecting a prefix without a binding
6227 * that doesn't get errored long before this function is called.
6228 * The test should remain for safety's sake, so we instead
6229 * exclude the following line from the coverage statistics.
6230 */
6231 continue; /* LCOV_EXCL_LINE */
6232 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006233 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006234 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006235 for (s = prefix->name; *s; s++)
6236 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006237 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006238 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006239 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006240 len = prefix->binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006241 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006242 len--;
6243 for (i = 0; i < len; i++)
6244 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006245 return NULL;
6246 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006247 }
6248
6249
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006250 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006251 for (;;) {
6252 const XML_Char *s;
6253 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6254 if (!e)
6255 break;
6256 if (!e->open)
6257 continue;
6258 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006259 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006260 for (s = e->name; *s; s++)
6261 if (!poolAppendChar(&tempPool, *s))
6262 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006263 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006264 }
6265
6266 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006267 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006268 return tempPool.start;
6269}
6270
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006271static XML_Bool
6272setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006273{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006274 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006275 const XML_Char *s = context;
6276
6277 while (*context != XML_T('\0')) {
6278 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6279 ENTITY *e;
6280 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006281 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006282 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006283 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006284 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006285 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006286 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006287 context = s;
6288 poolDiscard(&tempPool);
6289 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006290 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006291 PREFIX *prefix;
6292 if (poolLength(&tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006293 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006294 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006295 if (!poolAppendChar(&tempPool, XML_T('\0')))
6296 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006297 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006298 sizeof(PREFIX));
6299 if (!prefix)
6300 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006301 if (prefix->name == poolStart(&tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006302 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6303 if (!prefix->name)
6304 return XML_FALSE;
6305 }
6306 poolDiscard(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006307 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006308 for (context = s + 1;
6309 *context != CONTEXT_SEP && *context != XML_T('\0');
6310 context++)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006311 if (!poolAppendChar(&tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006312 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006313 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006314 return XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00006315 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006316 &inheritedBindings) != XML_ERROR_NONE)
6317 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006318 poolDiscard(&tempPool);
6319 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006320 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006321 s = context;
6322 }
6323 else {
6324 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006325 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006326 s++;
6327 }
6328 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006329 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006330}
6331
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006332static void FASTCALL
6333normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006334{
6335 XML_Char *p = publicId;
6336 XML_Char *s;
6337 for (s = publicId; *s; s++) {
6338 switch (*s) {
6339 case 0x20:
6340 case 0xD:
6341 case 0xA:
6342 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006343 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006344 break;
6345 default:
6346 *p++ = *s;
6347 }
6348 }
6349 if (p != publicId && p[-1] == 0x20)
6350 --p;
6351 *p = XML_T('\0');
6352}
6353
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006354static DTD *
6355dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006356{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006357 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6358 if (p == NULL)
6359 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006360 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006361 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006362 hashTableInit(&(p->generalEntities), ms);
6363 hashTableInit(&(p->elementTypes), ms);
6364 hashTableInit(&(p->attributeIds), ms);
6365 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006366#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006367 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006368 hashTableInit(&(p->paramEntities), ms);
6369#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006370 p->defaultPrefix.name = NULL;
6371 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006372
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006373 p->in_eldecl = XML_FALSE;
6374 p->scaffIndex = NULL;
6375 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006376 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006377 p->scaffSize = 0;
6378 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006379 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006380
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006381 p->keepProcessing = XML_TRUE;
6382 p->hasParamEntityRefs = XML_FALSE;
6383 p->standalone = XML_FALSE;
6384 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006385}
6386
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006387static void
6388dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006389{
6390 HASH_TABLE_ITER iter;
6391 hashTableIterInit(&iter, &(p->elementTypes));
6392 for (;;) {
6393 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6394 if (!e)
6395 break;
6396 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006397 ms->free_fcn(e->defaultAtts);
6398 }
6399 hashTableClear(&(p->generalEntities));
6400#ifdef XML_DTD
6401 p->paramEntityRead = XML_FALSE;
6402 hashTableClear(&(p->paramEntities));
6403#endif /* XML_DTD */
6404 hashTableClear(&(p->elementTypes));
6405 hashTableClear(&(p->attributeIds));
6406 hashTableClear(&(p->prefixes));
6407 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006408 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006409 p->defaultPrefix.name = NULL;
6410 p->defaultPrefix.binding = NULL;
6411
6412 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006413
6414 ms->free_fcn(p->scaffIndex);
6415 p->scaffIndex = NULL;
6416 ms->free_fcn(p->scaffold);
6417 p->scaffold = NULL;
6418
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006419 p->scaffLevel = 0;
6420 p->scaffSize = 0;
6421 p->scaffCount = 0;
6422 p->contentStringLen = 0;
6423
6424 p->keepProcessing = XML_TRUE;
6425 p->hasParamEntityRefs = XML_FALSE;
6426 p->standalone = XML_FALSE;
6427}
6428
6429static void
6430dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6431{
6432 HASH_TABLE_ITER iter;
6433 hashTableIterInit(&iter, &(p->elementTypes));
6434 for (;;) {
6435 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6436 if (!e)
6437 break;
6438 if (e->allocDefaultAtts != 0)
6439 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006440 }
6441 hashTableDestroy(&(p->generalEntities));
6442#ifdef XML_DTD
6443 hashTableDestroy(&(p->paramEntities));
6444#endif /* XML_DTD */
6445 hashTableDestroy(&(p->elementTypes));
6446 hashTableDestroy(&(p->attributeIds));
6447 hashTableDestroy(&(p->prefixes));
6448 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006449 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006450 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006451 ms->free_fcn(p->scaffIndex);
6452 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006453 }
6454 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006455}
6456
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006457/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6458 The new DTD has already been initialized.
6459*/
6460static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006461dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006462{
6463 HASH_TABLE_ITER iter;
6464
6465 /* Copy the prefix table. */
6466
6467 hashTableIterInit(&iter, &(oldDtd->prefixes));
6468 for (;;) {
6469 const XML_Char *name;
6470 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6471 if (!oldP)
6472 break;
6473 name = poolCopyString(&(newDtd->pool), oldP->name);
6474 if (!name)
6475 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006476 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006477 return 0;
6478 }
6479
6480 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6481
6482 /* Copy the attribute id table. */
6483
6484 for (;;) {
6485 ATTRIBUTE_ID *newA;
6486 const XML_Char *name;
6487 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6488
6489 if (!oldA)
6490 break;
6491 /* Remember to allocate the scratch byte before the name. */
6492 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6493 return 0;
6494 name = poolCopyString(&(newDtd->pool), oldA->name);
6495 if (!name)
6496 return 0;
6497 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006498 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006499 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006500 if (!newA)
6501 return 0;
6502 newA->maybeTokenized = oldA->maybeTokenized;
6503 if (oldA->prefix) {
6504 newA->xmlns = oldA->xmlns;
6505 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006506 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006507 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006508 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006509 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006510 }
6511 }
6512
6513 /* Copy the element type table. */
6514
6515 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6516
6517 for (;;) {
6518 int i;
6519 ELEMENT_TYPE *newE;
6520 const XML_Char *name;
6521 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6522 if (!oldE)
6523 break;
6524 name = poolCopyString(&(newDtd->pool), oldE->name);
6525 if (!name)
6526 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006527 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006528 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006529 if (!newE)
6530 return 0;
6531 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006532 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6533 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6534 if (!newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006535 return 0;
6536 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006537 }
6538 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006539 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006540 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006541 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6542 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006543 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006544 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006545 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006546 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006547 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006548 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6549 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006550 newE->defaultAtts[i].value
6551 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6552 if (!newE->defaultAtts[i].value)
6553 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006554 }
6555 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006556 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006557 }
6558 }
6559
6560 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006561 if (!copyEntityTable(oldParser,
6562 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006563 &(newDtd->pool),
6564 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006565 return 0;
6566
6567#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006568 if (!copyEntityTable(oldParser,
6569 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006570 &(newDtd->pool),
6571 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006572 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006573 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006574#endif /* XML_DTD */
6575
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006576 newDtd->keepProcessing = oldDtd->keepProcessing;
6577 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006578 newDtd->standalone = oldDtd->standalone;
6579
6580 /* Don't want deep copying for scaffolding */
6581 newDtd->in_eldecl = oldDtd->in_eldecl;
6582 newDtd->scaffold = oldDtd->scaffold;
6583 newDtd->contentStringLen = oldDtd->contentStringLen;
6584 newDtd->scaffSize = oldDtd->scaffSize;
6585 newDtd->scaffLevel = oldDtd->scaffLevel;
6586 newDtd->scaffIndex = oldDtd->scaffIndex;
6587
6588 return 1;
6589} /* End dtdCopy */
6590
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006591static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006592copyEntityTable(XML_Parser oldParser,
6593 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006594 STRING_POOL *newPool,
6595 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006596{
6597 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006598 const XML_Char *cachedOldBase = NULL;
6599 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006600
6601 hashTableIterInit(&iter, oldTable);
6602
6603 for (;;) {
6604 ENTITY *newE;
6605 const XML_Char *name;
6606 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6607 if (!oldE)
6608 break;
6609 name = poolCopyString(newPool, oldE->name);
6610 if (!name)
6611 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006612 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006613 if (!newE)
6614 return 0;
6615 if (oldE->systemId) {
6616 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6617 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006618 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006619 newE->systemId = tem;
6620 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006621 if (oldE->base == cachedOldBase)
6622 newE->base = cachedNewBase;
6623 else {
6624 cachedOldBase = oldE->base;
6625 tem = poolCopyString(newPool, cachedOldBase);
6626 if (!tem)
6627 return 0;
6628 cachedNewBase = newE->base = tem;
6629 }
6630 }
6631 if (oldE->publicId) {
6632 tem = poolCopyString(newPool, oldE->publicId);
6633 if (!tem)
6634 return 0;
6635 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006636 }
6637 }
6638 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006639 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6640 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006641 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006642 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006643 newE->textPtr = tem;
6644 newE->textLen = oldE->textLen;
6645 }
6646 if (oldE->notation) {
6647 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6648 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006649 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006650 newE->notation = tem;
6651 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006652 newE->is_param = oldE->is_param;
6653 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006654 }
6655 return 1;
6656}
6657
Fred Drake08317ae2003-10-21 15:38:55 +00006658#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006659
Fred Drake08317ae2003-10-21 15:38:55 +00006660static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006661keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006662{
6663 for (; *s1 == *s2; s1++, s2++)
6664 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006665 return XML_TRUE;
6666 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006667}
6668
Victor Stinner5ff71322017-06-21 14:39:22 +02006669static size_t
6670keylen(KEY s)
6671{
6672 size_t len = 0;
6673 for (; *s; s++, len++);
6674 return len;
6675}
6676
6677static void
6678copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6679{
6680 key->k[0] = 0;
6681 key->k[1] = get_hash_secret_salt(parser);
6682}
6683
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006684static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006685hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006686{
Victor Stinner5ff71322017-06-21 14:39:22 +02006687 struct siphash state;
6688 struct sipkey key;
6689 (void)sip_tobin;
6690 (void)sip24_valid;
6691 copy_salt_to_sipkey(parser, &key);
6692 sip24_init(&state, &key);
6693 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6694 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006695}
6696
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006697static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006698lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006699{
6700 size_t i;
6701 if (table->size == 0) {
6702 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006703 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006704 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006705 table->power = INIT_POWER;
6706 /* table->size is a power of 2 */
6707 table->size = (size_t)1 << INIT_POWER;
6708 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006709 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006710 if (!table->v) {
6711 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006712 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006713 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006714 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006715 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006716 }
6717 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006718 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006719 unsigned long mask = (unsigned long)table->size - 1;
6720 unsigned char step = 0;
6721 i = h & mask;
6722 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006723 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006724 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006725 if (!step)
6726 step = PROBE_STEP(h, mask, table->power);
6727 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006728 }
6729 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006730 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006731
6732 /* check for overflow (table is half full) */
6733 if (table->used >> (table->power - 1)) {
6734 unsigned char newPower = table->power + 1;
6735 size_t newSize = (size_t)1 << newPower;
6736 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006737 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006738 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006739 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006740 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006741 memset(newV, 0, tsize);
6742 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006743 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006744 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006745 size_t j = newHash & newMask;
6746 step = 0;
6747 while (newV[j]) {
6748 if (!step)
6749 step = PROBE_STEP(newHash, newMask, newPower);
6750 j < step ? (j += newSize - step) : (j -= step);
6751 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006752 newV[j] = table->v[i];
6753 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006754 table->mem->free_fcn(table->v);
6755 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006756 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006757 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006758 i = h & newMask;
6759 step = 0;
6760 while (table->v[i]) {
6761 if (!step)
6762 step = PROBE_STEP(h, newMask, newPower);
6763 i < step ? (i += newSize - step) : (i -= step);
6764 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006765 }
6766 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006767 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006768 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006769 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006770 memset(table->v[i], 0, createSize);
6771 table->v[i]->name = name;
6772 (table->used)++;
6773 return table->v[i];
6774}
6775
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006776static void FASTCALL
6777hashTableClear(HASH_TABLE *table)
6778{
6779 size_t i;
6780 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006781 table->mem->free_fcn(table->v[i]);
6782 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006783 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006784 table->used = 0;
6785}
6786
6787static void FASTCALL
6788hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006789{
6790 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006791 for (i = 0; i < table->size; i++)
6792 table->mem->free_fcn(table->v[i]);
6793 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006794}
6795
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006796static void FASTCALL
6797hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006798{
Fred Drake08317ae2003-10-21 15:38:55 +00006799 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006800 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006801 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006802 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006803 p->mem = ms;
6804}
6805
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006806static void FASTCALL
6807hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006808{
6809 iter->p = table->v;
6810 iter->end = iter->p + table->size;
6811}
6812
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006813static NAMED * FASTCALL
6814hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006815{
6816 while (iter->p != iter->end) {
6817 NAMED *tem = *(iter->p)++;
6818 if (tem)
6819 return tem;
6820 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006821 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006822}
6823
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006824static void FASTCALL
6825poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006826{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006827 pool->blocks = NULL;
6828 pool->freeBlocks = NULL;
6829 pool->start = NULL;
6830 pool->ptr = NULL;
6831 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006832 pool->mem = ms;
6833}
6834
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006835static void FASTCALL
6836poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006837{
6838 if (!pool->freeBlocks)
6839 pool->freeBlocks = pool->blocks;
6840 else {
6841 BLOCK *p = pool->blocks;
6842 while (p) {
6843 BLOCK *tem = p->next;
6844 p->next = pool->freeBlocks;
6845 pool->freeBlocks = p;
6846 p = tem;
6847 }
6848 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006849 pool->blocks = NULL;
6850 pool->start = NULL;
6851 pool->ptr = NULL;
6852 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006853}
6854
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006855static void FASTCALL
6856poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006857{
6858 BLOCK *p = pool->blocks;
6859 while (p) {
6860 BLOCK *tem = p->next;
6861 pool->mem->free_fcn(p);
6862 p = tem;
6863 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006864 p = pool->freeBlocks;
6865 while (p) {
6866 BLOCK *tem = p->next;
6867 pool->mem->free_fcn(p);
6868 p = tem;
6869 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006870}
6871
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006872static XML_Char *
6873poolAppend(STRING_POOL *pool, const ENCODING *enc,
6874 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006875{
6876 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006877 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006878 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006879 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6880 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006881 break;
6882 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006883 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006884 }
6885 return pool->start;
6886}
6887
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006888static const XML_Char * FASTCALL
6889poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006890{
6891 do {
6892 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006893 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006894 } while (*s++);
6895 s = pool->start;
6896 poolFinish(pool);
6897 return s;
6898}
6899
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006900static const XML_Char *
6901poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006902{
Victor Stinner93d0cb52017-08-18 23:43:54 +02006903 if (!pool->ptr && !poolGrow(pool)) {
6904 /* The following line is unreachable given the current usage of
6905 * poolCopyStringN(). Currently it is called from exactly one
6906 * place to copy the text of a simple general entity. By that
6907 * point, the name of the entity is already stored in the pool, so
6908 * pool->ptr cannot be NULL.
6909 *
6910 * If poolCopyStringN() is used elsewhere as it well might be,
6911 * this line may well become executable again. Regardless, this
6912 * sort of check shouldn't be removed lightly, so we just exclude
6913 * it from the coverage statistics.
6914 */
6915 return NULL; /* LCOV_EXCL_LINE */
6916 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006917 for (; n > 0; --n, s++) {
6918 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006919 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006920 }
6921 s = pool->start;
6922 poolFinish(pool);
6923 return s;
6924}
6925
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006926static const XML_Char * FASTCALL
6927poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006928{
6929 while (*s) {
6930 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006931 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006932 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006933 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006934 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006935}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006936
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006937static XML_Char *
6938poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6939 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006940{
6941 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006942 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006943 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006944 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006945 *(pool->ptr)++ = 0;
6946 return pool->start;
6947}
6948
Victor Stinner5ff71322017-06-21 14:39:22 +02006949static size_t
6950poolBytesToAllocateFor(int blockSize)
6951{
6952 /* Unprotected math would be:
6953 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6954 **
6955 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6956 ** For a + b * c we check b * c in isolation first, so that addition of a
6957 ** on top has no chance of making us accept a small non-negative number
6958 */
6959 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6960
6961 if (blockSize <= 0)
6962 return 0;
6963
6964 if (blockSize > (int)(INT_MAX / stretch))
6965 return 0;
6966
6967 {
6968 const int stretchedBlockSize = blockSize * (int)stretch;
6969 const int bytesToAllocate = (int)(
6970 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6971 if (bytesToAllocate < 0)
6972 return 0;
6973
6974 return (size_t)bytesToAllocate;
6975 }
6976}
6977
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006978static XML_Bool FASTCALL
6979poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006980{
6981 if (pool->freeBlocks) {
6982 if (pool->start == 0) {
6983 pool->blocks = pool->freeBlocks;
6984 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006985 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006986 pool->start = pool->blocks->s;
6987 pool->end = pool->start + pool->blocks->size;
6988 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006989 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006990 }
6991 if (pool->end - pool->start < pool->freeBlocks->size) {
6992 BLOCK *tem = pool->freeBlocks->next;
6993 pool->freeBlocks->next = pool->blocks;
6994 pool->blocks = pool->freeBlocks;
6995 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006996 memcpy(pool->blocks->s, pool->start,
6997 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006998 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6999 pool->start = pool->blocks->s;
7000 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007001 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007002 }
7003 }
7004 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02007005 BLOCK *temp;
7006 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02007007 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007008
Victor Stinner93d0cb52017-08-18 23:43:54 +02007009 // NOTE: Needs to be calculated prior to calling `realloc`
7010 // to avoid dangling pointers:
7011 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7012
7013 if (blockSize < 0) {
7014 /* This condition traps a situation where either more than
7015 * INT_MAX/2 bytes have already been allocated. This isn't
7016 * readily testable, since it is unlikely that an average
7017 * machine will have that much memory, so we exclude it from the
7018 * coverage statistics.
7019 */
7020 return XML_FALSE; /* LCOV_EXCL_LINE */
7021 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007022
Victor Stinner5ff71322017-06-21 14:39:22 +02007023 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7024 if (bytesToAllocate == 0)
7025 return XML_FALSE;
7026
Victor Stinner23ec4b52017-06-15 00:54:36 +02007027 temp = (BLOCK *)
Victor Stinner5ff71322017-06-21 14:39:22 +02007028 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007029 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007030 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007031 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007032 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007033 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007034 pool->start = pool->blocks->s;
7035 pool->end = pool->start + blockSize;
7036 }
7037 else {
7038 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00007039 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02007040 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007041
Victor Stinner93d0cb52017-08-18 23:43:54 +02007042 if (blockSize < 0) {
7043 /* This condition traps a situation where either more than
7044 * INT_MAX bytes have already been allocated (which is prevented
7045 * by various pieces of program logic, not least this one, never
7046 * mind the unlikelihood of actually having that much memory) or
7047 * the pool control fields have been corrupted (which could
7048 * conceivably happen in an extremely buggy user handler
7049 * function). Either way it isn't readily testable, so we
7050 * exclude it from the coverage statistics.
7051 */
7052 return XML_FALSE; /* LCOV_EXCL_LINE */
7053 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007054
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007055 if (blockSize < INIT_BLOCK_SIZE)
7056 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02007057 else {
7058 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7059 if ((int)((unsigned)blockSize * 2U) < 0) {
7060 return XML_FALSE;
7061 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007062 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02007063 }
7064
7065 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7066 if (bytesToAllocate == 0)
7067 return XML_FALSE;
7068
7069 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007070 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007071 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007072 tem->size = blockSize;
7073 tem->next = pool->blocks;
7074 pool->blocks = tem;
7075 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007076 memcpy(tem->s, pool->start,
7077 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007078 pool->ptr = tem->s + (pool->ptr - pool->start);
7079 pool->start = tem->s;
7080 pool->end = tem->s + blockSize;
7081 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007082 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007083}
7084
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007085static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007086nextScaffoldPart(XML_Parser parser)
7087{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007088 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007089 CONTENT_SCAFFOLD * me;
7090 int next;
7091
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007092 if (!dtd->scaffIndex) {
7093 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
7094 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007095 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007096 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007097 }
7098
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007099 if (dtd->scaffCount >= dtd->scaffSize) {
7100 CONTENT_SCAFFOLD *temp;
7101 if (dtd->scaffold) {
7102 temp = (CONTENT_SCAFFOLD *)
7103 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7104 if (temp == NULL)
7105 return -1;
7106 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007107 }
7108 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007109 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
7110 * sizeof(CONTENT_SCAFFOLD));
7111 if (temp == NULL)
7112 return -1;
7113 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007114 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007115 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007116 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007117 next = dtd->scaffCount++;
7118 me = &dtd->scaffold[next];
7119 if (dtd->scaffLevel) {
7120 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007121 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007122 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007123 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007124 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007125 parent->firstchild = next;
7126 parent->lastchild = next;
7127 parent->childcnt++;
7128 }
7129 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7130 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007131}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007132
7133static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007134build_node(XML_Parser parser,
7135 int src_node,
7136 XML_Content *dest,
7137 XML_Content **contpos,
7138 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007139{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007140 DTD * const dtd = _dtd; /* save one level of indirection */
7141 dest->type = dtd->scaffold[src_node].type;
7142 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007143 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007144 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007145 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007146 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007147 for (;;) {
7148 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007149 if (!*src)
7150 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007151 src++;
7152 }
7153 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007154 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007155 }
7156 else {
7157 unsigned int i;
7158 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007159 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007160 dest->children = *contpos;
7161 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007162 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7163 i < dest->numchildren;
7164 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007165 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7166 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007167 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007168 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007169}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007170
7171static XML_Content *
7172build_model (XML_Parser parser)
7173{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007174 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007175 XML_Content *ret;
7176 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007177 XML_Char * str;
7178 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7179 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007180
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007181 ret = (XML_Content *)MALLOC(allocsize);
7182 if (!ret)
7183 return NULL;
7184
7185 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007186 cpos = &ret[1];
7187
7188 build_node(parser, 0, ret, &cpos, &str);
7189 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007190}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007191
7192static ELEMENT_TYPE *
7193getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007194 const ENCODING *enc,
7195 const char *ptr,
7196 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007197{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007198 DTD * const dtd = _dtd; /* save one level of indirection */
7199 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007200 ELEMENT_TYPE *ret;
7201
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007202 if (!name)
7203 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07007204 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007205 if (!ret)
7206 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007207 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007208 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007209 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007210 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007211 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007212 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007213 }
7214 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007215}
Victor Stinner93d0cb52017-08-18 23:43:54 +02007216
7217static XML_Char *
7218copyString(const XML_Char *s,
7219 const XML_Memory_Handling_Suite *memsuite)
7220{
7221 int charsRequired = 0;
7222 XML_Char *result;
7223
7224 /* First determine how long the string is */
7225 while (s[charsRequired] != 0) {
7226 charsRequired++;
7227 }
7228 /* Include the terminator */
7229 charsRequired++;
7230
7231 /* Now allocate space for the copy */
7232 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7233 if (result == NULL)
7234 return NULL;
7235 /* Copy the original into place */
7236 memcpy(result, s, charsRequired * sizeof(XML_Char));
7237 return result;
7238}