blob: b703e61a040454997366a052cc21c7201cc1ed6d [file] [log] [blame]
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
Victor Stinner5ff71322017-06-21 14:39:22 +02003
Victor Stinner93d0cb52017-08-18 23:43:54 +02004 101bfd65d1ff3d1511cf6671e6aae65f82cd97df6f4da137d46d510731830ad9 (2.2.3+)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005*/
6
Victor Stinner93d0cb52017-08-18 23:43:54 +02007#if !defined(_GNU_SOURCE)
8# define _GNU_SOURCE 1 /* syscall prototype */
9#endif
Victor Stinner5ff71322017-06-21 14:39:22 +020010
Victor Stinner23ec4b52017-06-15 00:54:36 +020011#include <stddef.h>
12#include <string.h> /* memset(), memcpy() */
13#include <assert.h>
14#include <limits.h> /* UINT_MAX */
Victor Stinner5ff71322017-06-21 14:39:22 +020015#include <stdio.h> /* fprintf */
16#include <stdlib.h> /* getenv */
Victor Stinner23ec4b52017-06-15 00:54:36 +020017
Victor Stinner5ff71322017-06-21 14:39:22 +020018#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +020019#define getpid GetCurrentProcessId
20#else
21#include <sys/time.h> /* gettimeofday() */
22#include <sys/types.h> /* getpid() */
23#include <unistd.h> /* getpid() */
Victor Stinner93d0cb52017-08-18 23:43:54 +020024#include <fcntl.h> /* O_RDONLY */
25#include <errno.h>
Victor Stinner23ec4b52017-06-15 00:54:36 +020026#endif
27
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070028#define XML_BUILDING_EXPAT 1
29
Victor Stinner5ff71322017-06-21 14:39:22 +020030#ifdef _WIN32
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070031#include "winconfig.h"
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070032#elif defined(HAVE_EXPAT_CONFIG_H)
33#include <expat_config.h>
Victor Stinner5ff71322017-06-21 14:39:22 +020034#endif /* ndef _WIN32 */
Christian Heimesaa152762013-12-06 23:43:50 +010035
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070036#include "ascii.h"
Fred Drake08317ae2003-10-21 15:38:55 +000037#include "expat.h"
Victor Stinner5ff71322017-06-21 14:39:22 +020038#include "siphash.h"
Fred Drake08317ae2003-10-21 15:38:55 +000039
Victor Stinner93d0cb52017-08-18 23:43:54 +020040#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
41# if defined(HAVE_GETRANDOM)
42# include <sys/random.h> /* getrandom */
43# else
44# include <unistd.h> /* syscall */
45# include <sys/syscall.h> /* SYS_getrandom */
46# endif
47# if ! defined(GRND_NONBLOCK)
48# define GRND_NONBLOCK 0x0001
49# endif /* defined(GRND_NONBLOCK) */
50#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
51
52#if defined(HAVE_LIBBSD) \
53 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
54# include <bsd/stdlib.h>
55#endif
56
57#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
58# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
59#endif
60
61#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \
62 && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \
63 && !defined(XML_DEV_URANDOM) \
64 && !defined(_WIN32) \
65 && !defined(XML_POOR_ENTROPY)
66# error \
67 You do not have support for any sources of high quality entropy \
68 enabled. For end user security, that is probably not what you want. \
69 \
70 Your options include: \
71 * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
72 * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
73 * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
74 * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
75 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
76 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
77 * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
78 * Windows (RtlGenRandom): _WIN32. \
79 \
80 If insist on not using any of these, bypass this error by defining \
81 XML_POOR_ENTROPY; you have been warned. \
82 \
83 For CMake, one way to pass the define is: \
84 cmake -DCMAKE_C_FLAGS="-pipe -O2 -DHAVE_SYSCALL_GETRANDOM" . \
85 \
86 If you have reasons to patch this detection code away or need changes \
87 to the build system, please open a bug. Thank you!
88#endif
89
90
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000091#ifdef XML_UNICODE
92#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
93#define XmlConvert XmlUtf16Convert
94#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
95#define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
96#define XmlEncode XmlUtf16Encode
Gregory P. Smith7c6309c2012-07-14 14:12:35 -070097/* Using pointer subtraction to convert to integer type. */
98#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +000099typedef unsigned short ICHAR;
100#else
101#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
102#define XmlConvert XmlUtf8Convert
103#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
104#define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
105#define XmlEncode XmlUtf8Encode
106#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
107typedef char ICHAR;
108#endif
109
110
111#ifndef XML_NS
112
113#define XmlInitEncodingNS XmlInitEncoding
114#define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
115#undef XmlGetInternalEncodingNS
116#define XmlGetInternalEncodingNS XmlGetInternalEncoding
117#define XmlParseXmlDeclNS XmlParseXmlDecl
118
119#endif
120
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000121#ifdef XML_UNICODE
122
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000123#ifdef XML_UNICODE_WCHAR_T
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000124#define XML_T(x) (const wchar_t)x
125#define XML_L(x) L ## x
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000126#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000127#define XML_T(x) (const unsigned short)x
128#define XML_L(x) x
129#endif
130
131#else
132
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000133#define XML_T(x) x
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000134#define XML_L(x) x
135
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000136#endif
137
138/* Round up n to be a multiple of sz, where sz is a power of 2. */
139#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
140
Fred Drake08317ae2003-10-21 15:38:55 +0000141/* Handle the case where memmove() doesn't exist. */
142#ifndef HAVE_MEMMOVE
143#ifdef HAVE_BCOPY
144#define memmove(d,s,l) bcopy((s),(d),(l))
145#else
146#error memmove does not exist on this platform, nor is a substitute available
147#endif /* HAVE_BCOPY */
148#endif /* HAVE_MEMMOVE */
149
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000150#include "internal.h"
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000151#include "xmltok.h"
152#include "xmlrole.h"
153
154typedef const XML_Char *KEY;
155
156typedef struct {
157 KEY name;
158} NAMED;
159
160typedef struct {
161 NAMED **v;
Fred Drake08317ae2003-10-21 15:38:55 +0000162 unsigned char power;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000163 size_t size;
164 size_t used;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000165 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000166} HASH_TABLE;
167
Victor Stinner5ff71322017-06-21 14:39:22 +0200168static size_t
169keylen(KEY s);
Fred Drake08317ae2003-10-21 15:38:55 +0000170
Victor Stinner5ff71322017-06-21 14:39:22 +0200171static void
172copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key);
Fred Drake08317ae2003-10-21 15:38:55 +0000173
174/* For probing (after a collision) we need a step size relative prime
175 to the hash table size, which is a power of 2. We use double-hashing,
176 since we can calculate a second hash value cheaply by taking those bits
177 of the first hash value that were discarded (masked out) when the table
178 index was calculated: index = hash & mask, where mask = table->size - 1.
179 We limit the maximum step size to table->size / 4 (mask >> 2) and make
180 it odd, since odd numbers are always relative prime to a power of 2.
181*/
182#define SECOND_HASH(hash, mask, power) \
183 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
184#define PROBE_STEP(hash, mask, power) \
185 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
186
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000187typedef struct {
188 NAMED **p;
189 NAMED **end;
190} HASH_TABLE_ITER;
191
192#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
193#define INIT_DATA_BUF_SIZE 1024
194#define INIT_ATTS_SIZE 16
Fred Drake08317ae2003-10-21 15:38:55 +0000195#define INIT_ATTS_VERSION 0xFFFFFFFF
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000196#define INIT_BLOCK_SIZE 1024
197#define INIT_BUFFER_SIZE 1024
198
199#define EXPAND_SPARE 24
200
201typedef struct binding {
202 struct prefix *prefix;
203 struct binding *nextTagBinding;
204 struct binding *prevPrefixBinding;
205 const struct attribute_id *attId;
206 XML_Char *uri;
207 int uriLen;
208 int uriAlloc;
209} BINDING;
210
211typedef struct prefix {
212 const XML_Char *name;
213 BINDING *binding;
214} PREFIX;
215
216typedef struct {
217 const XML_Char *str;
218 const XML_Char *localPart;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000219 const XML_Char *prefix;
220 int strLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000221 int uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000222 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000223} TAG_NAME;
224
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000225/* TAG represents an open element.
226 The name of the element is stored in both the document and API
227 encodings. The memory buffer 'buf' is a separately-allocated
228 memory area which stores the name. During the XML_Parse()/
229 XMLParseBuffer() when the element is open, the memory for the 'raw'
230 version of the name (in the document encoding) is shared with the
231 document buffer. If the element is open across calls to
232 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
233 contain the 'raw' name as well.
234
235 A parser re-uses these structures, maintaining a list of allocated
236 TAG objects in a free list.
237*/
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000238typedef struct tag {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000239 struct tag *parent; /* parent of this element */
240 const char *rawName; /* tagName in the original encoding */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000241 int rawNameLength;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000242 TAG_NAME name; /* tagName in the API encoding */
243 char *buf; /* buffer for name components */
244 char *bufEnd; /* end of the buffer */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000245 BINDING *bindings;
246} TAG;
247
248typedef struct {
249 const XML_Char *name;
250 const XML_Char *textPtr;
Fred Drake31d485c2004-08-03 07:06:22 +0000251 int textLen; /* length in XML_Chars */
252 int processed; /* # of processed bytes - when suspended */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000253 const XML_Char *systemId;
254 const XML_Char *base;
255 const XML_Char *publicId;
256 const XML_Char *notation;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000257 XML_Bool open;
258 XML_Bool is_param;
259 XML_Bool is_internal; /* true if declared in internal subset outside PE */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000260} ENTITY;
261
262typedef struct {
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000263 enum XML_Content_Type type;
264 enum XML_Content_Quant quant;
265 const XML_Char * name;
266 int firstchild;
267 int lastchild;
268 int childcnt;
269 int nextsib;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000270} CONTENT_SCAFFOLD;
271
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000272#define INIT_SCAFFOLD_ELEMENTS 32
273
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000274typedef struct block {
275 struct block *next;
276 int size;
277 XML_Char s[1];
278} BLOCK;
279
280typedef struct {
281 BLOCK *blocks;
282 BLOCK *freeBlocks;
283 const XML_Char *end;
284 XML_Char *ptr;
285 XML_Char *start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000286 const XML_Memory_Handling_Suite *mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000287} STRING_POOL;
288
289/* The XML_Char before the name is used to determine whether
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000290 an attribute has been specified. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000291typedef struct attribute_id {
292 XML_Char *name;
293 PREFIX *prefix;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000294 XML_Bool maybeTokenized;
295 XML_Bool xmlns;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000296} ATTRIBUTE_ID;
297
298typedef struct {
299 const ATTRIBUTE_ID *id;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000300 XML_Bool isCdata;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000301 const XML_Char *value;
302} DEFAULT_ATTRIBUTE;
303
304typedef struct {
Fred Drake08317ae2003-10-21 15:38:55 +0000305 unsigned long version;
306 unsigned long hash;
307 const XML_Char *uriName;
308} NS_ATT;
309
310typedef struct {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000311 const XML_Char *name;
312 PREFIX *prefix;
313 const ATTRIBUTE_ID *idAtt;
314 int nDefaultAtts;
315 int allocDefaultAtts;
316 DEFAULT_ATTRIBUTE *defaultAtts;
317} ELEMENT_TYPE;
318
319typedef struct {
320 HASH_TABLE generalEntities;
321 HASH_TABLE elementTypes;
322 HASH_TABLE attributeIds;
323 HASH_TABLE prefixes;
324 STRING_POOL pool;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000325 STRING_POOL entityValuePool;
326 /* false once a parameter entity reference has been skipped */
327 XML_Bool keepProcessing;
328 /* true once an internal or external PE reference has been encountered;
329 this includes the reference to an external subset */
330 XML_Bool hasParamEntityRefs;
331 XML_Bool standalone;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000332#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000333 /* indicates if external PE has been read */
334 XML_Bool paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000335 HASH_TABLE paramEntities;
336#endif /* XML_DTD */
337 PREFIX defaultPrefix;
338 /* === scaffolding for building content model === */
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000339 XML_Bool in_eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000340 CONTENT_SCAFFOLD *scaffold;
341 unsigned contentStringLen;
342 unsigned scaffSize;
343 unsigned scaffCount;
344 int scaffLevel;
345 int *scaffIndex;
346} DTD;
347
348typedef struct open_internal_entity {
349 const char *internalEventPtr;
350 const char *internalEventEndPtr;
351 struct open_internal_entity *next;
352 ENTITY *entity;
Fred Drake31d485c2004-08-03 07:06:22 +0000353 int startTagLevel;
354 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000355} OPEN_INTERNAL_ENTITY;
356
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000357typedef enum XML_Error PTRCALL Processor(XML_Parser parser,
358 const char *start,
359 const char *end,
360 const char **endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000361
362static Processor prologProcessor;
363static Processor prologInitProcessor;
364static Processor contentProcessor;
365static Processor cdataSectionProcessor;
366#ifdef XML_DTD
367static Processor ignoreSectionProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000368static Processor externalParEntProcessor;
369static Processor externalParEntInitProcessor;
370static Processor entityValueProcessor;
371static Processor entityValueInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000372#endif /* XML_DTD */
373static Processor epilogProcessor;
374static Processor errorProcessor;
375static Processor externalEntityInitProcessor;
376static Processor externalEntityInitProcessor2;
377static Processor externalEntityInitProcessor3;
378static Processor externalEntityContentProcessor;
Fred Drake31d485c2004-08-03 07:06:22 +0000379static Processor internalEntityProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000380
381static enum XML_Error
382handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
383static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000384processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Fred Drake31d485c2004-08-03 07:06:22 +0000385 const char *s, const char *next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000386static enum XML_Error
387initializeEncoding(XML_Parser parser);
388static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700389doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
390 const char *end, int tok, const char *next, const char **nextPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000391 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000392static enum XML_Error
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700393processInternalEntity(XML_Parser parser, ENTITY *entity,
Fred Drake31d485c2004-08-03 07:06:22 +0000394 XML_Bool betweenDecl);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000395static enum XML_Error
396doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700397 const char *start, const char *end, const char **endPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000398 XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000399static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000400doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000401 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000402#ifdef XML_DTD
403static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000404doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr,
Fred Drake31d485c2004-08-03 07:06:22 +0000405 const char *end, const char **nextPtr, XML_Bool haveMore);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000406#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000407
Victor Stinner5ff71322017-06-21 14:39:22 +0200408static void
409freeBindings(XML_Parser parser, BINDING *bindings);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000410static enum XML_Error
Fred Drake4faea012003-01-28 06:42:40 +0000411storeAtts(XML_Parser parser, const ENCODING *, const char *s,
412 TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000413static enum XML_Error
414addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
415 const XML_Char *uri, BINDING **bindingsPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000416static int
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700417defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
Fred Drake31d485c2004-08-03 07:06:22 +0000418 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000419static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000420storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
421 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000422static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000423appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
424 const char *, const char *, STRING_POOL *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000425static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000426getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
427 const char *end);
428static int
429setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000430static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000431storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start,
432 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000433static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000434reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
435 const char *start, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000436static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000437reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
438 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000439static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000440reportDefault(XML_Parser parser, const ENCODING *enc, const char *start,
441 const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000442
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000443static const XML_Char * getContext(XML_Parser parser);
444static XML_Bool
445setContext(XML_Parser parser, const XML_Char *context);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000446
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000447static void FASTCALL normalizePublicId(XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000448
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000449static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms);
450/* do not call if parentParser != NULL */
451static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
452static void
453dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms);
454static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700455dtdCopy(XML_Parser oldParser,
456 DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000457static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700458copyEntityTable(XML_Parser oldParser,
459 HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000460static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700461lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000462static void FASTCALL
463hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms);
464static void FASTCALL hashTableClear(HASH_TABLE *);
465static void FASTCALL hashTableDestroy(HASH_TABLE *);
466static void FASTCALL
467hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
468static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000469
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000470static void FASTCALL
471poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms);
472static void FASTCALL poolClear(STRING_POOL *);
473static void FASTCALL poolDestroy(STRING_POOL *);
474static XML_Char *
475poolAppend(STRING_POOL *pool, const ENCODING *enc,
476 const char *ptr, const char *end);
477static XML_Char *
478poolStoreString(STRING_POOL *pool, const ENCODING *enc,
479 const char *ptr, const char *end);
480static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
481static const XML_Char * FASTCALL
482poolCopyString(STRING_POOL *pool, const XML_Char *s);
483static const XML_Char *
484poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
485static const XML_Char * FASTCALL
486poolAppendString(STRING_POOL *pool, const XML_Char *s);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000487
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000488static int FASTCALL nextScaffoldPart(XML_Parser parser);
489static XML_Content * build_model(XML_Parser parser);
490static ELEMENT_TYPE *
491getElementType(XML_Parser parser, const ENCODING *enc,
492 const char *ptr, const char *end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000493
Victor Stinner93d0cb52017-08-18 23:43:54 +0200494static XML_Char *copyString(const XML_Char *s,
495 const XML_Memory_Handling_Suite *memsuite);
496
Victor Stinner23ec4b52017-06-15 00:54:36 +0200497static unsigned long generate_hash_secret_salt(XML_Parser parser);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700498static XML_Bool startParsing(XML_Parser parser);
499
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000500static XML_Parser
501parserCreate(const XML_Char *encodingName,
502 const XML_Memory_Handling_Suite *memsuite,
503 const XML_Char *nameSep,
504 DTD *dtd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700505
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000506static void
507parserInit(XML_Parser parser, const XML_Char *encodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000508
509#define poolStart(pool) ((pool)->start)
510#define poolEnd(pool) ((pool)->ptr)
511#define poolLength(pool) ((pool)->ptr - (pool)->start)
512#define poolChop(pool) ((void)--(pool->ptr))
513#define poolLastChar(pool) (((pool)->ptr)[-1])
514#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
515#define poolFinish(pool) ((pool)->start = (pool)->ptr)
516#define poolAppendChar(pool, c) \
517 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
518 ? 0 \
519 : ((*((pool)->ptr)++ = c), 1))
520
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000521struct XML_ParserStruct {
522 /* The first member must be userData so that the XML_GetUserData
523 macro works. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000524 void *m_userData;
525 void *m_handlerArg;
526 char *m_buffer;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000527 const XML_Memory_Handling_Suite m_mem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000528 /* first character to be parsed */
529 const char *m_bufferPtr;
530 /* past last character to be parsed */
531 char *m_bufferEnd;
532 /* allocated end of buffer */
533 const char *m_bufferLim;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000534 XML_Index m_parseEndByteIndex;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000535 const char *m_parseEndPtr;
536 XML_Char *m_dataBuf;
537 XML_Char *m_dataBufEnd;
538 XML_StartElementHandler m_startElementHandler;
539 XML_EndElementHandler m_endElementHandler;
540 XML_CharacterDataHandler m_characterDataHandler;
541 XML_ProcessingInstructionHandler m_processingInstructionHandler;
542 XML_CommentHandler m_commentHandler;
543 XML_StartCdataSectionHandler m_startCdataSectionHandler;
544 XML_EndCdataSectionHandler m_endCdataSectionHandler;
545 XML_DefaultHandler m_defaultHandler;
546 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
547 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
548 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
549 XML_NotationDeclHandler m_notationDeclHandler;
550 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
551 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
552 XML_NotStandaloneHandler m_notStandaloneHandler;
553 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000554 XML_Parser m_externalEntityRefHandlerArg;
555 XML_SkippedEntityHandler m_skippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000556 XML_UnknownEncodingHandler m_unknownEncodingHandler;
557 XML_ElementDeclHandler m_elementDeclHandler;
558 XML_AttlistDeclHandler m_attlistDeclHandler;
559 XML_EntityDeclHandler m_entityDeclHandler;
560 XML_XmlDeclHandler m_xmlDeclHandler;
561 const ENCODING *m_encoding;
562 INIT_ENCODING m_initEncoding;
563 const ENCODING *m_internalEncoding;
564 const XML_Char *m_protocolEncodingName;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000565 XML_Bool m_ns;
566 XML_Bool m_ns_triplets;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000567 void *m_unknownEncodingMem;
568 void *m_unknownEncodingData;
569 void *m_unknownEncodingHandlerData;
Fred Drake31d485c2004-08-03 07:06:22 +0000570 void (XMLCALL *m_unknownEncodingRelease)(void *);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000571 PROLOG_STATE m_prologState;
572 Processor *m_processor;
573 enum XML_Error m_errorCode;
574 const char *m_eventPtr;
575 const char *m_eventEndPtr;
576 const char *m_positionPtr;
577 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
Fred Drake31d485c2004-08-03 07:06:22 +0000578 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000579 XML_Bool m_defaultExpandInternalEntities;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000580 int m_tagLevel;
581 ENTITY *m_declEntity;
582 const XML_Char *m_doctypeName;
583 const XML_Char *m_doctypeSysid;
584 const XML_Char *m_doctypePubid;
585 const XML_Char *m_declAttributeType;
586 const XML_Char *m_declNotationName;
587 const XML_Char *m_declNotationPublicId;
588 ELEMENT_TYPE *m_declElementType;
589 ATTRIBUTE_ID *m_declAttributeId;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000590 XML_Bool m_declAttributeIsCdata;
591 XML_Bool m_declAttributeIsId;
592 DTD *m_dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000593 const XML_Char *m_curBase;
594 TAG *m_tagStack;
595 TAG *m_freeTagList;
596 BINDING *m_inheritedBindings;
597 BINDING *m_freeBindingList;
598 int m_attsSize;
599 int m_nSpecifiedAtts;
600 int m_idAttIndex;
601 ATTRIBUTE *m_atts;
Fred Drake08317ae2003-10-21 15:38:55 +0000602 NS_ATT *m_nsAtts;
603 unsigned long m_nsAttsVersion;
604 unsigned char m_nsAttsPower;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700605#ifdef XML_ATTR_INFO
606 XML_AttrInfo *m_attInfo;
607#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000608 POSITION m_position;
609 STRING_POOL m_tempPool;
610 STRING_POOL m_temp2Pool;
611 char *m_groupConnector;
Fred Drake08317ae2003-10-21 15:38:55 +0000612 unsigned int m_groupSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000613 XML_Char m_namespaceSeparator;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000614 XML_Parser m_parentParser;
Fred Drake31d485c2004-08-03 07:06:22 +0000615 XML_ParsingStatus m_parsingStatus;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000616#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000617 XML_Bool m_isParamEntity;
618 XML_Bool m_useForeignDTD;
619 enum XML_ParamEntityParsing m_paramEntityParsing;
620#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700621 unsigned long m_hash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000622};
623
624#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
625#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s)))
626#define FREE(p) (parser->m_mem.free_fcn((p)))
627
628#define userData (parser->m_userData)
629#define handlerArg (parser->m_handlerArg)
630#define startElementHandler (parser->m_startElementHandler)
631#define endElementHandler (parser->m_endElementHandler)
632#define characterDataHandler (parser->m_characterDataHandler)
633#define processingInstructionHandler \
634 (parser->m_processingInstructionHandler)
635#define commentHandler (parser->m_commentHandler)
636#define startCdataSectionHandler \
637 (parser->m_startCdataSectionHandler)
638#define endCdataSectionHandler (parser->m_endCdataSectionHandler)
639#define defaultHandler (parser->m_defaultHandler)
640#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler)
641#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler)
642#define unparsedEntityDeclHandler \
643 (parser->m_unparsedEntityDeclHandler)
644#define notationDeclHandler (parser->m_notationDeclHandler)
645#define startNamespaceDeclHandler \
646 (parser->m_startNamespaceDeclHandler)
647#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler)
648#define notStandaloneHandler (parser->m_notStandaloneHandler)
649#define externalEntityRefHandler \
650 (parser->m_externalEntityRefHandler)
651#define externalEntityRefHandlerArg \
652 (parser->m_externalEntityRefHandlerArg)
653#define internalEntityRefHandler \
654 (parser->m_internalEntityRefHandler)
655#define skippedEntityHandler (parser->m_skippedEntityHandler)
656#define unknownEncodingHandler (parser->m_unknownEncodingHandler)
657#define elementDeclHandler (parser->m_elementDeclHandler)
658#define attlistDeclHandler (parser->m_attlistDeclHandler)
659#define entityDeclHandler (parser->m_entityDeclHandler)
660#define xmlDeclHandler (parser->m_xmlDeclHandler)
661#define encoding (parser->m_encoding)
662#define initEncoding (parser->m_initEncoding)
663#define internalEncoding (parser->m_internalEncoding)
664#define unknownEncodingMem (parser->m_unknownEncodingMem)
665#define unknownEncodingData (parser->m_unknownEncodingData)
666#define unknownEncodingHandlerData \
667 (parser->m_unknownEncodingHandlerData)
668#define unknownEncodingRelease (parser->m_unknownEncodingRelease)
669#define protocolEncodingName (parser->m_protocolEncodingName)
670#define ns (parser->m_ns)
671#define ns_triplets (parser->m_ns_triplets)
672#define prologState (parser->m_prologState)
673#define processor (parser->m_processor)
674#define errorCode (parser->m_errorCode)
675#define eventPtr (parser->m_eventPtr)
676#define eventEndPtr (parser->m_eventEndPtr)
677#define positionPtr (parser->m_positionPtr)
678#define position (parser->m_position)
679#define openInternalEntities (parser->m_openInternalEntities)
Fred Drake31d485c2004-08-03 07:06:22 +0000680#define freeInternalEntities (parser->m_freeInternalEntities)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000681#define defaultExpandInternalEntities \
682 (parser->m_defaultExpandInternalEntities)
683#define tagLevel (parser->m_tagLevel)
684#define buffer (parser->m_buffer)
685#define bufferPtr (parser->m_bufferPtr)
686#define bufferEnd (parser->m_bufferEnd)
687#define parseEndByteIndex (parser->m_parseEndByteIndex)
688#define parseEndPtr (parser->m_parseEndPtr)
689#define bufferLim (parser->m_bufferLim)
690#define dataBuf (parser->m_dataBuf)
691#define dataBufEnd (parser->m_dataBufEnd)
692#define _dtd (parser->m_dtd)
693#define curBase (parser->m_curBase)
694#define declEntity (parser->m_declEntity)
695#define doctypeName (parser->m_doctypeName)
696#define doctypeSysid (parser->m_doctypeSysid)
697#define doctypePubid (parser->m_doctypePubid)
698#define declAttributeType (parser->m_declAttributeType)
699#define declNotationName (parser->m_declNotationName)
700#define declNotationPublicId (parser->m_declNotationPublicId)
701#define declElementType (parser->m_declElementType)
702#define declAttributeId (parser->m_declAttributeId)
703#define declAttributeIsCdata (parser->m_declAttributeIsCdata)
704#define declAttributeIsId (parser->m_declAttributeIsId)
705#define freeTagList (parser->m_freeTagList)
706#define freeBindingList (parser->m_freeBindingList)
707#define inheritedBindings (parser->m_inheritedBindings)
708#define tagStack (parser->m_tagStack)
709#define atts (parser->m_atts)
710#define attsSize (parser->m_attsSize)
711#define nSpecifiedAtts (parser->m_nSpecifiedAtts)
712#define idAttIndex (parser->m_idAttIndex)
Fred Drake08317ae2003-10-21 15:38:55 +0000713#define nsAtts (parser->m_nsAtts)
714#define nsAttsVersion (parser->m_nsAttsVersion)
715#define nsAttsPower (parser->m_nsAttsPower)
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700716#define attInfo (parser->m_attInfo)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000717#define tempPool (parser->m_tempPool)
718#define temp2Pool (parser->m_temp2Pool)
719#define groupConnector (parser->m_groupConnector)
720#define groupSize (parser->m_groupSize)
721#define namespaceSeparator (parser->m_namespaceSeparator)
722#define parentParser (parser->m_parentParser)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000723#define ps_parsing (parser->m_parsingStatus.parsing)
724#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000725#ifdef XML_DTD
726#define isParamEntity (parser->m_isParamEntity)
727#define useForeignDTD (parser->m_useForeignDTD)
728#define paramEntityParsing (parser->m_paramEntityParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000729#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700730#define hash_secret_salt (parser->m_hash_secret_salt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000731
Fred Drake08317ae2003-10-21 15:38:55 +0000732XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000733XML_ParserCreate(const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000734{
735 return XML_ParserCreate_MM(encodingName, NULL, NULL);
736}
737
Fred Drake08317ae2003-10-21 15:38:55 +0000738XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000739XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000740{
741 XML_Char tmp[2];
742 *tmp = nsSep;
743 return XML_ParserCreate_MM(encodingName, NULL, tmp);
744}
745
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000746static const XML_Char implicitContext[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700747 ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
748 ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
749 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
750 ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
751 ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
752 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000753};
754
Victor Stinner5ff71322017-06-21 14:39:22 +0200755
756#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner5ff71322017-06-21 14:39:22 +0200757
758/* Obtain entropy on Linux 3.17+ */
759static int
Victor Stinner93d0cb52017-08-18 23:43:54 +0200760writeRandomBytes_getrandom_nonblock(void * target, size_t count) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200761 int success = 0; /* full count bytes written? */
762 size_t bytesWrittenTotal = 0;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200763 const unsigned int getrandomFlags = GRND_NONBLOCK;
Victor Stinner5ff71322017-06-21 14:39:22 +0200764
765 do {
766 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
767 const size_t bytesToWrite = count - bytesWrittenTotal;
768
769 const int bytesWrittenMore =
770#if defined(HAVE_GETRANDOM)
771 getrandom(currentTarget, bytesToWrite, getrandomFlags);
772#else
773 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
774#endif
775
776 if (bytesWrittenMore > 0) {
777 bytesWrittenTotal += bytesWrittenMore;
778 if (bytesWrittenTotal >= count)
779 success = 1;
780 }
Victor Stinner93d0cb52017-08-18 23:43:54 +0200781 } while (! success && (errno == EINTR));
Victor Stinner5ff71322017-06-21 14:39:22 +0200782
783 return success;
784}
785
786#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
787
788
Victor Stinner93d0cb52017-08-18 23:43:54 +0200789#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
790
791/* Extract entropy from /dev/urandom */
792static int
793writeRandomBytes_dev_urandom(void * target, size_t count) {
794 int success = 0; /* full count bytes written? */
795 size_t bytesWrittenTotal = 0;
796
797 const int fd = open("/dev/urandom", O_RDONLY);
798 if (fd < 0) {
799 return 0;
800 }
801
802 do {
803 void * const currentTarget = (void*)((char*)target + bytesWrittenTotal);
804 const size_t bytesToWrite = count - bytesWrittenTotal;
805
806 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
807
808 if (bytesWrittenMore > 0) {
809 bytesWrittenTotal += bytesWrittenMore;
810 if (bytesWrittenTotal >= count)
811 success = 1;
812 }
813 } while (! success && (errno == EINTR));
814
815 close(fd);
816 return success;
817}
818
819#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
820
821
822#if defined(HAVE_ARC4RANDOM)
823
824static void
825writeRandomBytes_arc4random(void * target, size_t count) {
826 size_t bytesWrittenTotal = 0;
827
828 while (bytesWrittenTotal < count) {
829 const uint32_t random32 = arc4random();
830 size_t i = 0;
831
832 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
833 i++, bytesWrittenTotal++) {
834 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
835 ((uint8_t *)target)[bytesWrittenTotal] = random8;
836 }
837 }
838}
839
840#endif /* defined(HAVE_ARC4RANDOM) */
841
842
Victor Stinner5ff71322017-06-21 14:39:22 +0200843#ifdef _WIN32
844
845typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200846HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */
Victor Stinner5ff71322017-06-21 14:39:22 +0200847
848/* Obtain entropy on Windows XP / Windows Server 2003 and later.
Victor Stinner93d0cb52017-08-18 23:43:54 +0200849 * Hint on RtlGenRandom and the following article from libsodium.
Victor Stinner5ff71322017-06-21 14:39:22 +0200850 *
851 * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI
852 * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/
853 */
854static int
855writeRandomBytes_RtlGenRandom(void * target, size_t count) {
856 int success = 0; /* full count bytes written? */
Victor Stinner93d0cb52017-08-18 23:43:54 +0200857 const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL"));
Victor Stinner5ff71322017-06-21 14:39:22 +0200858
859 if (advapi32) {
860 const RTLGENRANDOM_FUNC RtlGenRandom
861 = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036");
862 if (RtlGenRandom) {
863 if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) {
864 success = 1;
865 }
866 }
867 FreeLibrary(advapi32);
868 }
869
870 return success;
871}
872
873#endif /* _WIN32 */
874
875
Victor Stinner93d0cb52017-08-18 23:43:54 +0200876#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
877
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700878static unsigned long
Victor Stinner23ec4b52017-06-15 00:54:36 +0200879gather_time_entropy(void)
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000880{
Victor Stinner5ff71322017-06-21 14:39:22 +0200881#ifdef _WIN32
Victor Stinner23ec4b52017-06-15 00:54:36 +0200882 FILETIME ft;
883 GetSystemTimeAsFileTime(&ft); /* never fails */
884 return ft.dwHighDateTime ^ ft.dwLowDateTime;
885#else
886 struct timeval tv;
887 int gettimeofday_res;
888
889 gettimeofday_res = gettimeofday(&tv, NULL);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200890
891#if defined(NDEBUG)
892 (void)gettimeofday_res;
893#else
Victor Stinner23ec4b52017-06-15 00:54:36 +0200894 assert (gettimeofday_res == 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200895#endif /* defined(NDEBUG) */
Victor Stinner23ec4b52017-06-15 00:54:36 +0200896
897 /* Microseconds time is <20 bits entropy */
898 return tv.tv_usec;
899#endif
900}
901
Victor Stinner93d0cb52017-08-18 23:43:54 +0200902#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
903
Victor Stinner5ff71322017-06-21 14:39:22 +0200904
905static unsigned long
906ENTROPY_DEBUG(const char * label, unsigned long entropy) {
907 const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
908 if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
909 fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n",
910 label,
911 (int)sizeof(entropy) * 2, entropy,
912 (unsigned long)sizeof(entropy));
913 }
914 return entropy;
915}
916
Victor Stinner23ec4b52017-06-15 00:54:36 +0200917static unsigned long
918generate_hash_secret_salt(XML_Parser parser)
919{
Victor Stinner5ff71322017-06-21 14:39:22 +0200920 unsigned long entropy;
921 (void)parser;
Victor Stinner93d0cb52017-08-18 23:43:54 +0200922#if defined(HAVE_ARC4RANDOM_BUF)
Victor Stinner5ff71322017-06-21 14:39:22 +0200923 arc4random_buf(&entropy, sizeof(entropy));
924 return ENTROPY_DEBUG("arc4random_buf", entropy);
Victor Stinner93d0cb52017-08-18 23:43:54 +0200925#elif defined(HAVE_ARC4RANDOM)
926 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
927 return ENTROPY_DEBUG("arc4random", entropy);
Victor Stinner5ff71322017-06-21 14:39:22 +0200928#else
929 /* Try high quality providers first .. */
930#ifdef _WIN32
931 if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) {
932 return ENTROPY_DEBUG("RtlGenRandom", entropy);
933 }
934#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
Victor Stinner93d0cb52017-08-18 23:43:54 +0200935 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200936 return ENTROPY_DEBUG("getrandom", entropy);
937 }
938#endif
Victor Stinner93d0cb52017-08-18 23:43:54 +0200939#if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
940 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
941 return ENTROPY_DEBUG("/dev/urandom", entropy);
942 }
943#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
Victor Stinner5ff71322017-06-21 14:39:22 +0200944 /* .. and self-made low quality for backup: */
945
946 /* Process ID is 0 bits entropy if attacker has local access */
947 entropy = gather_time_entropy() ^ getpid();
Victor Stinner23ec4b52017-06-15 00:54:36 +0200948
949 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
950 if (sizeof(unsigned long) == 4) {
Victor Stinner5ff71322017-06-21 14:39:22 +0200951 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200952 } else {
Victor Stinner5ff71322017-06-21 14:39:22 +0200953 return ENTROPY_DEBUG("fallback(8)",
Victor Stinner93d0cb52017-08-18 23:43:54 +0200954 entropy * (unsigned long)2305843009213693951ULL);
Victor Stinner23ec4b52017-06-15 00:54:36 +0200955 }
Victor Stinner5ff71322017-06-21 14:39:22 +0200956#endif
957}
958
959static unsigned long
960get_hash_secret_salt(XML_Parser parser) {
961 if (parser->m_parentParser != NULL)
962 return get_hash_secret_salt(parser->m_parentParser);
963 return parser->m_hash_secret_salt;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700964}
965
966static XML_Bool /* only valid for root parser */
967startParsing(XML_Parser parser)
968{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700969 /* hash functions must be initialized before setContext() is called */
970 if (hash_secret_salt == 0)
Victor Stinner23ec4b52017-06-15 00:54:36 +0200971 hash_secret_salt = generate_hash_secret_salt(parser);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700972 if (ns) {
973 /* implicit context only set for root parser, since child
974 parsers (i.e. external entity parsers) will inherit it
975 */
976 return setContext(parser, implicitContext);
977 }
978 return XML_TRUE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700979}
980
981XML_Parser XMLCALL
982XML_ParserCreate_MM(const XML_Char *encodingName,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -0700983 const XML_Memory_Handling_Suite *memsuite,
984 const XML_Char *nameSep)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -0700985{
986 return parserCreate(encodingName, memsuite, nameSep, NULL);
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000987}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000988
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000989static XML_Parser
990parserCreate(const XML_Char *encodingName,
991 const XML_Memory_Handling_Suite *memsuite,
992 const XML_Char *nameSep,
993 DTD *dtd)
994{
995 XML_Parser parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +0000996
997 if (memsuite) {
998 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +0000999 parser = (XML_Parser)
1000 memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1001 if (parser != NULL) {
1002 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1003 mtemp->malloc_fcn = memsuite->malloc_fcn;
1004 mtemp->realloc_fcn = memsuite->realloc_fcn;
1005 mtemp->free_fcn = memsuite->free_fcn;
1006 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001007 }
1008 else {
1009 XML_Memory_Handling_Suite *mtemp;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001010 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1011 if (parser != NULL) {
1012 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1013 mtemp->malloc_fcn = malloc;
1014 mtemp->realloc_fcn = realloc;
1015 mtemp->free_fcn = free;
1016 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001017 }
1018
1019 if (!parser)
1020 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001021
1022 buffer = NULL;
1023 bufferLim = NULL;
1024
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001025 attsSize = INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001026 atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
1027 if (atts == NULL) {
1028 FREE(parser);
1029 return NULL;
1030 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001031#ifdef XML_ATTR_INFO
1032 attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
1033 if (attInfo == NULL) {
1034 FREE(atts);
1035 FREE(parser);
1036 return NULL;
1037 }
1038#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001039 dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1040 if (dataBuf == NULL) {
1041 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001042#ifdef XML_ATTR_INFO
1043 FREE(attInfo);
1044#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001045 FREE(parser);
1046 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001047 }
1048 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
1049
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001050 if (dtd)
1051 _dtd = dtd;
1052 else {
1053 _dtd = dtdCreate(&parser->m_mem);
1054 if (_dtd == NULL) {
1055 FREE(dataBuf);
1056 FREE(atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001057#ifdef XML_ATTR_INFO
1058 FREE(attInfo);
1059#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001060 FREE(parser);
1061 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001062 }
1063 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001064
1065 freeBindingList = NULL;
1066 freeTagList = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001067 freeInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001068
1069 groupSize = 0;
1070 groupConnector = NULL;
1071
1072 unknownEncodingHandler = NULL;
1073 unknownEncodingHandlerData = NULL;
1074
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001075 namespaceSeparator = ASCII_EXCL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001076 ns = XML_FALSE;
1077 ns_triplets = XML_FALSE;
1078
Fred Drake08317ae2003-10-21 15:38:55 +00001079 nsAtts = NULL;
1080 nsAttsVersion = 0;
1081 nsAttsPower = 0;
1082
Victor Stinner93d0cb52017-08-18 23:43:54 +02001083 protocolEncodingName = NULL;
1084
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001085 poolInit(&tempPool, &(parser->m_mem));
1086 poolInit(&temp2Pool, &(parser->m_mem));
1087 parserInit(parser, encodingName);
1088
1089 if (encodingName && !protocolEncodingName) {
1090 XML_ParserFree(parser);
1091 return NULL;
1092 }
1093
1094 if (nameSep) {
1095 ns = XML_TRUE;
1096 internalEncoding = XmlGetInternalEncodingNS();
1097 namespaceSeparator = *nameSep;
1098 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001099 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001100 internalEncoding = XmlGetInternalEncoding();
1101 }
1102
1103 return parser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001104}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001105
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001106static void
1107parserInit(XML_Parser parser, const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001108{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001109 processor = prologInitProcessor;
1110 XmlPrologStateInit(&prologState);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001111 if (encodingName != NULL) {
1112 protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1113 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001114 curBase = NULL;
1115 XmlInitEncoding(&initEncoding, &encoding, 0);
1116 userData = NULL;
1117 handlerArg = NULL;
1118 startElementHandler = NULL;
1119 endElementHandler = NULL;
1120 characterDataHandler = NULL;
1121 processingInstructionHandler = NULL;
1122 commentHandler = NULL;
1123 startCdataSectionHandler = NULL;
1124 endCdataSectionHandler = NULL;
1125 defaultHandler = NULL;
1126 startDoctypeDeclHandler = NULL;
1127 endDoctypeDeclHandler = NULL;
1128 unparsedEntityDeclHandler = NULL;
1129 notationDeclHandler = NULL;
1130 startNamespaceDeclHandler = NULL;
1131 endNamespaceDeclHandler = NULL;
1132 notStandaloneHandler = NULL;
1133 externalEntityRefHandler = NULL;
1134 externalEntityRefHandlerArg = parser;
1135 skippedEntityHandler = NULL;
1136 elementDeclHandler = NULL;
1137 attlistDeclHandler = NULL;
1138 entityDeclHandler = NULL;
1139 xmlDeclHandler = NULL;
1140 bufferPtr = buffer;
1141 bufferEnd = buffer;
1142 parseEndByteIndex = 0;
1143 parseEndPtr = NULL;
1144 declElementType = NULL;
1145 declAttributeId = NULL;
1146 declEntity = NULL;
1147 doctypeName = NULL;
1148 doctypeSysid = NULL;
1149 doctypePubid = NULL;
1150 declAttributeType = NULL;
1151 declNotationName = NULL;
1152 declNotationPublicId = NULL;
1153 declAttributeIsCdata = XML_FALSE;
1154 declAttributeIsId = XML_FALSE;
1155 memset(&position, 0, sizeof(POSITION));
1156 errorCode = XML_ERROR_NONE;
1157 eventPtr = NULL;
1158 eventEndPtr = NULL;
1159 positionPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00001160 openInternalEntities = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001161 defaultExpandInternalEntities = XML_TRUE;
1162 tagLevel = 0;
1163 tagStack = NULL;
1164 inheritedBindings = NULL;
1165 nSpecifiedAtts = 0;
1166 unknownEncodingMem = NULL;
1167 unknownEncodingRelease = NULL;
1168 unknownEncodingData = NULL;
1169 parentParser = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170 ps_parsing = XML_INITIALIZED;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001171#ifdef XML_DTD
1172 isParamEntity = XML_FALSE;
1173 useForeignDTD = XML_FALSE;
1174 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1175#endif
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001176 hash_secret_salt = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001177}
1178
1179/* moves list of bindings to freeBindingList */
1180static void FASTCALL
1181moveToFreeBindingList(XML_Parser parser, BINDING *bindings)
1182{
1183 while (bindings) {
1184 BINDING *b = bindings;
1185 bindings = bindings->nextTagBinding;
1186 b->nextTagBinding = freeBindingList;
1187 freeBindingList = b;
1188 }
1189}
1190
Fred Drake08317ae2003-10-21 15:38:55 +00001191XML_Bool XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001192XML_ParserReset(XML_Parser parser, const XML_Char *encodingName)
1193{
1194 TAG *tStk;
Fred Drake31d485c2004-08-03 07:06:22 +00001195 OPEN_INTERNAL_ENTITY *openEntityList;
Victor Stinner5ff71322017-06-21 14:39:22 +02001196
1197 if (parser == NULL)
1198 return XML_FALSE;
1199
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001200 if (parentParser)
1201 return XML_FALSE;
1202 /* move tagStack to freeTagList */
1203 tStk = tagStack;
1204 while (tStk) {
1205 TAG *tag = tStk;
1206 tStk = tStk->parent;
1207 tag->parent = freeTagList;
1208 moveToFreeBindingList(parser, tag->bindings);
1209 tag->bindings = NULL;
1210 freeTagList = tag;
1211 }
Fred Drake31d485c2004-08-03 07:06:22 +00001212 /* move openInternalEntities to freeInternalEntities */
1213 openEntityList = openInternalEntities;
1214 while (openEntityList) {
1215 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1216 openEntityList = openEntity->next;
1217 openEntity->next = freeInternalEntities;
1218 freeInternalEntities = openEntity;
1219 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001220 moveToFreeBindingList(parser, inheritedBindings);
Fred Drake08317ae2003-10-21 15:38:55 +00001221 FREE(unknownEncodingMem);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001222 if (unknownEncodingRelease)
1223 unknownEncodingRelease(unknownEncodingData);
1224 poolClear(&tempPool);
1225 poolClear(&temp2Pool);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001226 FREE((void *)protocolEncodingName);
1227 protocolEncodingName = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001228 parserInit(parser, encodingName);
1229 dtdReset(_dtd, &parser->m_mem);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001230 return XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001231}
1232
Fred Drake08317ae2003-10-21 15:38:55 +00001233enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001234XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
1235{
Victor Stinner5ff71322017-06-21 14:39:22 +02001236 if (parser == NULL)
1237 return XML_STATUS_ERROR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001238 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1239 XXX There's no way for the caller to determine which of the
1240 XXX possible error cases caused the XML_STATUS_ERROR return.
1241 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001242 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001243 return XML_STATUS_ERROR;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001244
1245 /* Get rid of any previous encoding name */
1246 FREE((void *)protocolEncodingName);
1247
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001248 if (encodingName == NULL)
Victor Stinner93d0cb52017-08-18 23:43:54 +02001249 /* No new encoding name */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001250 protocolEncodingName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001251 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001252 /* Copy the new encoding name into allocated memory */
1253 protocolEncodingName = copyString(encodingName, &(parser->m_mem));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001254 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001255 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001256 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001257 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001258}
1259
Fred Drake08317ae2003-10-21 15:38:55 +00001260XML_Parser XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001261XML_ExternalEntityParserCreate(XML_Parser oldParser,
1262 const XML_Char *context,
1263 const XML_Char *encodingName)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001264{
1265 XML_Parser parser = oldParser;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001266 DTD *newDtd = NULL;
Victor Stinner5ff71322017-06-21 14:39:22 +02001267 DTD *oldDtd;
1268 XML_StartElementHandler oldStartElementHandler;
1269 XML_EndElementHandler oldEndElementHandler;
1270 XML_CharacterDataHandler oldCharacterDataHandler;
1271 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1272 XML_CommentHandler oldCommentHandler;
1273 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1274 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1275 XML_DefaultHandler oldDefaultHandler;
1276 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1277 XML_NotationDeclHandler oldNotationDeclHandler;
1278 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1279 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1280 XML_NotStandaloneHandler oldNotStandaloneHandler;
1281 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1282 XML_SkippedEntityHandler oldSkippedEntityHandler;
1283 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1284 XML_ElementDeclHandler oldElementDeclHandler;
1285 XML_AttlistDeclHandler oldAttlistDeclHandler;
1286 XML_EntityDeclHandler oldEntityDeclHandler;
1287 XML_XmlDeclHandler oldXmlDeclHandler;
1288 ELEMENT_TYPE * oldDeclElementType;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001289
Victor Stinner5ff71322017-06-21 14:39:22 +02001290 void *oldUserData;
1291 void *oldHandlerArg;
1292 XML_Bool oldDefaultExpandInternalEntities;
1293 XML_Parser oldExternalEntityRefHandlerArg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001294#ifdef XML_DTD
Victor Stinner5ff71322017-06-21 14:39:22 +02001295 enum XML_ParamEntityParsing oldParamEntityParsing;
1296 int oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001297#endif
Victor Stinner5ff71322017-06-21 14:39:22 +02001298 XML_Bool oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001299 /* Note that the new parser shares the same hash secret as the old
1300 parser, so that dtdCopy and copyEntityTable can lookup values
1301 from hash tables associated with either parser without us having
1302 to worry which hash secrets each table has.
1303 */
Victor Stinner5ff71322017-06-21 14:39:22 +02001304 unsigned long oldhash_secret_salt;
1305
1306 /* Validate the oldParser parameter before we pull everything out of it */
1307 if (oldParser == NULL)
1308 return NULL;
1309
1310 /* Stash the original parser contents on the stack */
1311 oldDtd = _dtd;
1312 oldStartElementHandler = startElementHandler;
1313 oldEndElementHandler = endElementHandler;
1314 oldCharacterDataHandler = characterDataHandler;
1315 oldProcessingInstructionHandler = processingInstructionHandler;
1316 oldCommentHandler = commentHandler;
1317 oldStartCdataSectionHandler = startCdataSectionHandler;
1318 oldEndCdataSectionHandler = endCdataSectionHandler;
1319 oldDefaultHandler = defaultHandler;
1320 oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler;
1321 oldNotationDeclHandler = notationDeclHandler;
1322 oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
1323 oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
1324 oldNotStandaloneHandler = notStandaloneHandler;
1325 oldExternalEntityRefHandler = externalEntityRefHandler;
1326 oldSkippedEntityHandler = skippedEntityHandler;
1327 oldUnknownEncodingHandler = unknownEncodingHandler;
1328 oldElementDeclHandler = elementDeclHandler;
1329 oldAttlistDeclHandler = attlistDeclHandler;
1330 oldEntityDeclHandler = entityDeclHandler;
1331 oldXmlDeclHandler = xmlDeclHandler;
1332 oldDeclElementType = declElementType;
1333
1334 oldUserData = userData;
1335 oldHandlerArg = handlerArg;
1336 oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
1337 oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1338#ifdef XML_DTD
1339 oldParamEntityParsing = paramEntityParsing;
1340 oldInEntityValue = prologState.inEntityValue;
1341#endif
1342 oldns_triplets = ns_triplets;
1343 /* Note that the new parser shares the same hash secret as the old
1344 parser, so that dtdCopy and copyEntityTable can lookup values
1345 from hash tables associated with either parser without us having
1346 to worry which hash secrets each table has.
1347 */
1348 oldhash_secret_salt = hash_secret_salt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001349
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001350#ifdef XML_DTD
1351 if (!context)
1352 newDtd = oldDtd;
1353#endif /* XML_DTD */
1354
1355 /* Note that the magical uses of the pre-processor to make field
1356 access look more like C++ require that `parser' be overwritten
1357 here. This makes this function more painful to follow than it
1358 would be otherwise.
1359 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001360 if (ns) {
1361 XML_Char tmp[2];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001362 *tmp = namespaceSeparator;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001363 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001364 }
1365 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001366 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001367 }
1368
1369 if (!parser)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001370 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001371
1372 startElementHandler = oldStartElementHandler;
1373 endElementHandler = oldEndElementHandler;
1374 characterDataHandler = oldCharacterDataHandler;
1375 processingInstructionHandler = oldProcessingInstructionHandler;
1376 commentHandler = oldCommentHandler;
1377 startCdataSectionHandler = oldStartCdataSectionHandler;
1378 endCdataSectionHandler = oldEndCdataSectionHandler;
1379 defaultHandler = oldDefaultHandler;
1380 unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1381 notationDeclHandler = oldNotationDeclHandler;
1382 startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1383 endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1384 notStandaloneHandler = oldNotStandaloneHandler;
1385 externalEntityRefHandler = oldExternalEntityRefHandler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001386 skippedEntityHandler = oldSkippedEntityHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001387 unknownEncodingHandler = oldUnknownEncodingHandler;
1388 elementDeclHandler = oldElementDeclHandler;
1389 attlistDeclHandler = oldAttlistDeclHandler;
1390 entityDeclHandler = oldEntityDeclHandler;
1391 xmlDeclHandler = oldXmlDeclHandler;
1392 declElementType = oldDeclElementType;
1393 userData = oldUserData;
1394 if (oldUserData == oldHandlerArg)
1395 handlerArg = userData;
1396 else
1397 handlerArg = parser;
1398 if (oldExternalEntityRefHandlerArg != oldParser)
1399 externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1400 defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1401 ns_triplets = oldns_triplets;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001402 hash_secret_salt = oldhash_secret_salt;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001403 parentParser = oldParser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001404#ifdef XML_DTD
1405 paramEntityParsing = oldParamEntityParsing;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001406 prologState.inEntityValue = oldInEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001407 if (context) {
1408#endif /* XML_DTD */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001409 if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001410 || !setContext(parser, context)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001411 XML_ParserFree(parser);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001412 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001413 }
1414 processor = externalEntityInitProcessor;
1415#ifdef XML_DTD
1416 }
1417 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001418 /* The DTD instance referenced by _dtd is shared between the document's
1419 root parser and external PE parsers, therefore one does not need to
1420 call setContext. In addition, one also *must* not call setContext,
1421 because this would overwrite existing prefix->binding pointers in
1422 _dtd with ones that get destroyed with the external PE parser.
1423 This would leave those prefixes with dangling pointers.
1424 */
1425 isParamEntity = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001426 XmlPrologStateInitExternalEntity(&prologState);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001427 processor = externalParEntInitProcessor;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001428 }
1429#endif /* XML_DTD */
1430 return parser;
1431}
1432
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001433static void FASTCALL
1434destroyBindings(BINDING *bindings, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001435{
1436 for (;;) {
1437 BINDING *b = bindings;
1438 if (!b)
1439 break;
1440 bindings = b->nextTagBinding;
1441 FREE(b->uri);
1442 FREE(b);
1443 }
1444}
1445
Fred Drake08317ae2003-10-21 15:38:55 +00001446void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001447XML_ParserFree(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001448{
Fred Drake31d485c2004-08-03 07:06:22 +00001449 TAG *tagList;
1450 OPEN_INTERNAL_ENTITY *entityList;
1451 if (parser == NULL)
1452 return;
1453 /* free tagStack and freeTagList */
1454 tagList = tagStack;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001455 for (;;) {
1456 TAG *p;
Fred Drake31d485c2004-08-03 07:06:22 +00001457 if (tagList == NULL) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001458 if (freeTagList == NULL)
1459 break;
Fred Drake31d485c2004-08-03 07:06:22 +00001460 tagList = freeTagList;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001461 freeTagList = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001462 }
Fred Drake31d485c2004-08-03 07:06:22 +00001463 p = tagList;
1464 tagList = tagList->parent;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001465 FREE(p->buf);
1466 destroyBindings(p->bindings, parser);
1467 FREE(p);
1468 }
Fred Drake31d485c2004-08-03 07:06:22 +00001469 /* free openInternalEntities and freeInternalEntities */
1470 entityList = openInternalEntities;
1471 for (;;) {
1472 OPEN_INTERNAL_ENTITY *openEntity;
1473 if (entityList == NULL) {
1474 if (freeInternalEntities == NULL)
1475 break;
1476 entityList = freeInternalEntities;
1477 freeInternalEntities = NULL;
1478 }
1479 openEntity = entityList;
1480 entityList = entityList->next;
1481 FREE(openEntity);
1482 }
1483
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001484 destroyBindings(freeBindingList, parser);
1485 destroyBindings(inheritedBindings, parser);
1486 poolDestroy(&tempPool);
1487 poolDestroy(&temp2Pool);
Victor Stinner93d0cb52017-08-18 23:43:54 +02001488 FREE((void *)protocolEncodingName);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001489#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001490 /* external parameter entity parsers share the DTD structure
1491 parser->m_dtd with the root parser, so we must not destroy it
1492 */
1493 if (!isParamEntity && _dtd)
1494#else
1495 if (_dtd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001496#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001497 dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001498 FREE((void *)atts);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001499#ifdef XML_ATTR_INFO
1500 FREE((void *)attInfo);
1501#endif
Fred Drake08317ae2003-10-21 15:38:55 +00001502 FREE(groupConnector);
1503 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001504 FREE(dataBuf);
Fred Drake08317ae2003-10-21 15:38:55 +00001505 FREE(nsAtts);
1506 FREE(unknownEncodingMem);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001507 if (unknownEncodingRelease)
1508 unknownEncodingRelease(unknownEncodingData);
1509 FREE(parser);
1510}
1511
Fred Drake08317ae2003-10-21 15:38:55 +00001512void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001513XML_UseParserAsHandlerArg(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001514{
Victor Stinner5ff71322017-06-21 14:39:22 +02001515 if (parser != NULL)
1516 handlerArg = parser;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001517}
1518
Fred Drake08317ae2003-10-21 15:38:55 +00001519enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001520XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
1521{
Victor Stinner5ff71322017-06-21 14:39:22 +02001522 if (parser == NULL)
1523 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001524#ifdef XML_DTD
1525 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001526 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001527 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1528 useForeignDTD = useDTD;
1529 return XML_ERROR_NONE;
1530#else
1531 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1532#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001533}
1534
Fred Drake08317ae2003-10-21 15:38:55 +00001535void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001536XML_SetReturnNSTriplet(XML_Parser parser, int do_nst)
1537{
Victor Stinner5ff71322017-06-21 14:39:22 +02001538 if (parser == NULL)
1539 return;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001540 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001541 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001542 return;
1543 ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1544}
1545
Fred Drake08317ae2003-10-21 15:38:55 +00001546void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001547XML_SetUserData(XML_Parser parser, void *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001548{
Victor Stinner5ff71322017-06-21 14:39:22 +02001549 if (parser == NULL)
1550 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001551 if (handlerArg == userData)
1552 handlerArg = userData = p;
1553 else
1554 userData = p;
1555}
1556
Fred Drake08317ae2003-10-21 15:38:55 +00001557enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001558XML_SetBase(XML_Parser parser, const XML_Char *p)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001559{
Victor Stinner5ff71322017-06-21 14:39:22 +02001560 if (parser == NULL)
1561 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001562 if (p) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001563 p = poolCopyString(&_dtd->pool, p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001564 if (!p)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001565 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001566 curBase = p;
1567 }
1568 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001569 curBase = NULL;
1570 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001571}
1572
Fred Drake08317ae2003-10-21 15:38:55 +00001573const XML_Char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001574XML_GetBase(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001575{
Victor Stinner5ff71322017-06-21 14:39:22 +02001576 if (parser == NULL)
1577 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001578 return curBase;
1579}
1580
Fred Drake08317ae2003-10-21 15:38:55 +00001581int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001582XML_GetSpecifiedAttributeCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001583{
Victor Stinner5ff71322017-06-21 14:39:22 +02001584 if (parser == NULL)
1585 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001586 return nSpecifiedAtts;
1587}
1588
Fred Drake08317ae2003-10-21 15:38:55 +00001589int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001590XML_GetIdAttributeIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001591{
Victor Stinner5ff71322017-06-21 14:39:22 +02001592 if (parser == NULL)
1593 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001594 return idAttIndex;
1595}
1596
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001597#ifdef XML_ATTR_INFO
1598const XML_AttrInfo * XMLCALL
1599XML_GetAttributeInfo(XML_Parser parser)
1600{
Victor Stinner5ff71322017-06-21 14:39:22 +02001601 if (parser == NULL)
1602 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001603 return attInfo;
1604}
1605#endif
1606
Fred Drake08317ae2003-10-21 15:38:55 +00001607void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001608XML_SetElementHandler(XML_Parser parser,
1609 XML_StartElementHandler start,
1610 XML_EndElementHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001611{
Victor Stinner5ff71322017-06-21 14:39:22 +02001612 if (parser == NULL)
1613 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001614 startElementHandler = start;
1615 endElementHandler = end;
1616}
1617
Fred Drake08317ae2003-10-21 15:38:55 +00001618void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001619XML_SetStartElementHandler(XML_Parser parser,
1620 XML_StartElementHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001621 if (parser != NULL)
1622 startElementHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001623}
1624
Fred Drake08317ae2003-10-21 15:38:55 +00001625void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001626XML_SetEndElementHandler(XML_Parser parser,
1627 XML_EndElementHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001628 if (parser != NULL)
1629 endElementHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001630}
1631
Fred Drake08317ae2003-10-21 15:38:55 +00001632void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001633XML_SetCharacterDataHandler(XML_Parser parser,
1634 XML_CharacterDataHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001635{
Victor Stinner5ff71322017-06-21 14:39:22 +02001636 if (parser != NULL)
1637 characterDataHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001638}
1639
Fred Drake08317ae2003-10-21 15:38:55 +00001640void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001641XML_SetProcessingInstructionHandler(XML_Parser parser,
1642 XML_ProcessingInstructionHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001643{
Victor Stinner5ff71322017-06-21 14:39:22 +02001644 if (parser != NULL)
1645 processingInstructionHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001646}
1647
Fred Drake08317ae2003-10-21 15:38:55 +00001648void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001649XML_SetCommentHandler(XML_Parser parser,
1650 XML_CommentHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001651{
Victor Stinner5ff71322017-06-21 14:39:22 +02001652 if (parser != NULL)
1653 commentHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001654}
1655
Fred Drake08317ae2003-10-21 15:38:55 +00001656void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001657XML_SetCdataSectionHandler(XML_Parser parser,
1658 XML_StartCdataSectionHandler start,
1659 XML_EndCdataSectionHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001660{
Victor Stinner5ff71322017-06-21 14:39:22 +02001661 if (parser == NULL)
1662 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001663 startCdataSectionHandler = start;
1664 endCdataSectionHandler = end;
1665}
1666
Fred Drake08317ae2003-10-21 15:38:55 +00001667void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001668XML_SetStartCdataSectionHandler(XML_Parser parser,
1669 XML_StartCdataSectionHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001670 if (parser != NULL)
1671 startCdataSectionHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001672}
1673
Fred Drake08317ae2003-10-21 15:38:55 +00001674void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001675XML_SetEndCdataSectionHandler(XML_Parser parser,
1676 XML_EndCdataSectionHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001677 if (parser != NULL)
1678 endCdataSectionHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001679}
1680
Fred Drake08317ae2003-10-21 15:38:55 +00001681void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001682XML_SetDefaultHandler(XML_Parser parser,
1683 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001684{
Victor Stinner5ff71322017-06-21 14:39:22 +02001685 if (parser == NULL)
1686 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001687 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001688 defaultExpandInternalEntities = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001689}
1690
Fred Drake08317ae2003-10-21 15:38:55 +00001691void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001692XML_SetDefaultHandlerExpand(XML_Parser parser,
1693 XML_DefaultHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001694{
Victor Stinner5ff71322017-06-21 14:39:22 +02001695 if (parser == NULL)
1696 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001697 defaultHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001698 defaultExpandInternalEntities = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001699}
1700
Fred Drake08317ae2003-10-21 15:38:55 +00001701void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001702XML_SetDoctypeDeclHandler(XML_Parser parser,
1703 XML_StartDoctypeDeclHandler start,
1704 XML_EndDoctypeDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001705{
Victor Stinner5ff71322017-06-21 14:39:22 +02001706 if (parser == NULL)
1707 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001708 startDoctypeDeclHandler = start;
1709 endDoctypeDeclHandler = end;
1710}
1711
Fred Drake08317ae2003-10-21 15:38:55 +00001712void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001713XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1714 XML_StartDoctypeDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001715 if (parser != NULL)
1716 startDoctypeDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001717}
1718
Fred Drake08317ae2003-10-21 15:38:55 +00001719void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001720XML_SetEndDoctypeDeclHandler(XML_Parser parser,
1721 XML_EndDoctypeDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001722 if (parser != NULL)
1723 endDoctypeDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001724}
1725
Fred Drake08317ae2003-10-21 15:38:55 +00001726void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001727XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1728 XML_UnparsedEntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001729{
Victor Stinner5ff71322017-06-21 14:39:22 +02001730 if (parser != NULL)
1731 unparsedEntityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001732}
1733
Fred Drake08317ae2003-10-21 15:38:55 +00001734void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001735XML_SetNotationDeclHandler(XML_Parser parser,
1736 XML_NotationDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001737{
Victor Stinner5ff71322017-06-21 14:39:22 +02001738 if (parser != NULL)
1739 notationDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001740}
1741
Fred Drake08317ae2003-10-21 15:38:55 +00001742void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001743XML_SetNamespaceDeclHandler(XML_Parser parser,
1744 XML_StartNamespaceDeclHandler start,
1745 XML_EndNamespaceDeclHandler end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001746{
Victor Stinner5ff71322017-06-21 14:39:22 +02001747 if (parser == NULL)
1748 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001749 startNamespaceDeclHandler = start;
1750 endNamespaceDeclHandler = end;
1751}
1752
Fred Drake08317ae2003-10-21 15:38:55 +00001753void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001754XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1755 XML_StartNamespaceDeclHandler start) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001756 if (parser != NULL)
1757 startNamespaceDeclHandler = start;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001758}
1759
Fred Drake08317ae2003-10-21 15:38:55 +00001760void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001761XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1762 XML_EndNamespaceDeclHandler end) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001763 if (parser != NULL)
1764 endNamespaceDeclHandler = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001765}
1766
Fred Drake08317ae2003-10-21 15:38:55 +00001767void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001768XML_SetNotStandaloneHandler(XML_Parser parser,
1769 XML_NotStandaloneHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001770{
Victor Stinner5ff71322017-06-21 14:39:22 +02001771 if (parser != NULL)
1772 notStandaloneHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001773}
1774
Fred Drake08317ae2003-10-21 15:38:55 +00001775void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001776XML_SetExternalEntityRefHandler(XML_Parser parser,
1777 XML_ExternalEntityRefHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001778{
Victor Stinner5ff71322017-06-21 14:39:22 +02001779 if (parser != NULL)
1780 externalEntityRefHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001781}
1782
Fred Drake08317ae2003-10-21 15:38:55 +00001783void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001784XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001785{
Victor Stinner5ff71322017-06-21 14:39:22 +02001786 if (parser == NULL)
1787 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001788 if (arg)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001789 externalEntityRefHandlerArg = (XML_Parser)arg;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001790 else
1791 externalEntityRefHandlerArg = parser;
1792}
1793
Fred Drake08317ae2003-10-21 15:38:55 +00001794void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001795XML_SetSkippedEntityHandler(XML_Parser parser,
1796 XML_SkippedEntityHandler handler)
1797{
Victor Stinner5ff71322017-06-21 14:39:22 +02001798 if (parser != NULL)
1799 skippedEntityHandler = handler;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001800}
1801
Fred Drake08317ae2003-10-21 15:38:55 +00001802void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001803XML_SetUnknownEncodingHandler(XML_Parser parser,
1804 XML_UnknownEncodingHandler handler,
1805 void *data)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001806{
Victor Stinner5ff71322017-06-21 14:39:22 +02001807 if (parser == NULL)
1808 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001809 unknownEncodingHandler = handler;
1810 unknownEncodingHandlerData = data;
1811}
1812
Fred Drake08317ae2003-10-21 15:38:55 +00001813void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001814XML_SetElementDeclHandler(XML_Parser parser,
1815 XML_ElementDeclHandler eldecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001816{
Victor Stinner5ff71322017-06-21 14:39:22 +02001817 if (parser != NULL)
1818 elementDeclHandler = eldecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001819}
1820
Fred Drake08317ae2003-10-21 15:38:55 +00001821void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001822XML_SetAttlistDeclHandler(XML_Parser parser,
1823 XML_AttlistDeclHandler attdecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001824{
Victor Stinner5ff71322017-06-21 14:39:22 +02001825 if (parser != NULL)
1826 attlistDeclHandler = attdecl;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001827}
1828
Fred Drake08317ae2003-10-21 15:38:55 +00001829void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001830XML_SetEntityDeclHandler(XML_Parser parser,
1831 XML_EntityDeclHandler handler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001832{
Victor Stinner5ff71322017-06-21 14:39:22 +02001833 if (parser != NULL)
1834 entityDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001835}
1836
Fred Drake08317ae2003-10-21 15:38:55 +00001837void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001838XML_SetXmlDeclHandler(XML_Parser parser,
1839 XML_XmlDeclHandler handler) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001840 if (parser != NULL)
1841 xmlDeclHandler = handler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001842}
1843
Fred Drake08317ae2003-10-21 15:38:55 +00001844int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001845XML_SetParamEntityParsing(XML_Parser parser,
1846 enum XML_ParamEntityParsing peParsing)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001847{
Victor Stinner5ff71322017-06-21 14:39:22 +02001848 if (parser == NULL)
1849 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001850 /* block after XML_Parse()/XML_ParseBuffer() has been called */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001851 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001852 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001853#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001854 paramEntityParsing = peParsing;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001855 return 1;
1856#else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001857 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001858#endif
1859}
1860
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001861int XMLCALL
1862XML_SetHashSalt(XML_Parser parser,
1863 unsigned long hash_salt)
1864{
Victor Stinner5ff71322017-06-21 14:39:22 +02001865 if (parser == NULL)
1866 return 0;
1867 if (parser->m_parentParser)
1868 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001869 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1870 if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED)
1871 return 0;
1872 hash_secret_salt = hash_salt;
1873 return 1;
1874}
1875
Fred Drake08317ae2003-10-21 15:38:55 +00001876enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001877XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001878{
Victor Stinner5ff71322017-06-21 14:39:22 +02001879 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02001880 if (parser != NULL)
1881 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
Victor Stinner5ff71322017-06-21 14:39:22 +02001882 return XML_STATUS_ERROR;
1883 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001884 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001885 case XML_SUSPENDED:
1886 errorCode = XML_ERROR_SUSPENDED;
1887 return XML_STATUS_ERROR;
1888 case XML_FINISHED:
1889 errorCode = XML_ERROR_FINISHED;
1890 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001891 case XML_INITIALIZED:
1892 if (parentParser == NULL && !startParsing(parser)) {
1893 errorCode = XML_ERROR_NO_MEMORY;
1894 return XML_STATUS_ERROR;
1895 }
Fred Drake31d485c2004-08-03 07:06:22 +00001896 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001897 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00001898 }
1899
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001900 if (len == 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001901 ps_finalBuffer = (XML_Bool)isFinal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001902 if (!isFinal)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001903 return XML_STATUS_OK;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001904 positionPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00001905 parseEndPtr = bufferEnd;
1906
1907 /* If data are left over from last buffer, and we now know that these
1908 data are the final chunk of input, then we have to check them again
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001909 to detect errors based on that fact.
Fred Drake31d485c2004-08-03 07:06:22 +00001910 */
1911 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
1912
1913 if (errorCode == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001914 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001915 case XML_SUSPENDED:
Victor Stinner93d0cb52017-08-18 23:43:54 +02001916 /* It is hard to be certain, but it seems that this case
1917 * cannot occur. This code is cleaning up a previous parse
1918 * with no new data (since len == 0). Changing the parsing
1919 * state requires getting to execute a handler function, and
1920 * there doesn't seem to be an opportunity for that while in
1921 * this circumstance.
1922 *
1923 * Given the uncertainty, we retain the code but exclude it
1924 * from coverage tests.
1925 *
1926 * LCOV_EXCL_START
1927 */
Fred Drake31d485c2004-08-03 07:06:22 +00001928 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
1929 positionPtr = bufferPtr;
1930 return XML_STATUS_SUSPENDED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02001931 /* LCOV_EXCL_STOP */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07001932 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00001933 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001934 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00001935 /* fall through */
1936 default:
1937 return XML_STATUS_OK;
1938 }
1939 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001940 eventEndPtr = eventPtr;
1941 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001942 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001943 }
1944#ifndef XML_CONTEXT_BYTES
1945 else if (bufferPtr == bufferEnd) {
1946 const char *end;
1947 int nLeftOver;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07001948 enum XML_Status result;
Victor Stinner5ff71322017-06-21 14:39:22 +02001949 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
1950 if (len > ((XML_Size)-1) / 2 - parseEndByteIndex) {
1951 errorCode = XML_ERROR_NO_MEMORY;
1952 eventPtr = eventEndPtr = NULL;
1953 processor = errorProcessor;
1954 return XML_STATUS_ERROR;
1955 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001956 parseEndByteIndex += len;
1957 positionPtr = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001958 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00001959
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001960 errorCode = processor(parser, s, parseEndPtr = s + len, &end);
Fred Drake31d485c2004-08-03 07:06:22 +00001961
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001962 if (errorCode != XML_ERROR_NONE) {
1963 eventEndPtr = eventPtr;
1964 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001965 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001966 }
Fred Drake31d485c2004-08-03 07:06:22 +00001967 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001968 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00001969 case XML_SUSPENDED:
1970 result = XML_STATUS_SUSPENDED;
1971 break;
1972 case XML_INITIALIZED:
1973 case XML_PARSING:
Fred Drake31d485c2004-08-03 07:06:22 +00001974 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001975 ps_parsing = XML_FINISHED;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001976 return XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001977 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001978 /* fall through */
1979 default:
1980 result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00001981 }
1982 }
1983
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00001984 XmlUpdatePosition(encoding, positionPtr, end, &position);
1985 nLeftOver = s + len - end;
1986 if (nLeftOver) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001987 if (buffer == NULL || nLeftOver > bufferLim - buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02001988 /* avoid _signed_ integer overflow */
1989 char *temp = NULL;
1990 const int bytesToAllocate = (int)((unsigned)len * 2U);
1991 if (bytesToAllocate > 0) {
1992 temp = (buffer == NULL
1993 ? (char *)MALLOC(bytesToAllocate)
1994 : (char *)REALLOC(buffer, bytesToAllocate));
1995 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001996 if (temp == NULL) {
1997 errorCode = XML_ERROR_NO_MEMORY;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00001998 eventPtr = eventEndPtr = NULL;
1999 processor = errorProcessor;
2000 return XML_STATUS_ERROR;
2001 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002002 buffer = temp;
Victor Stinner5ff71322017-06-21 14:39:22 +02002003 bufferLim = buffer + bytesToAllocate;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002004 }
2005 memcpy(buffer, end, nLeftOver);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002006 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002007 bufferPtr = buffer;
2008 bufferEnd = buffer + nLeftOver;
2009 positionPtr = bufferPtr;
2010 parseEndPtr = bufferEnd;
2011 eventPtr = bufferPtr;
2012 eventEndPtr = bufferPtr;
Fred Drake31d485c2004-08-03 07:06:22 +00002013 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002014 }
2015#endif /* not defined XML_CONTEXT_BYTES */
2016 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002017 void *buff = XML_GetBuffer(parser, len);
2018 if (buff == NULL)
2019 return XML_STATUS_ERROR;
2020 else {
2021 memcpy(buff, s, len);
2022 return XML_ParseBuffer(parser, len, isFinal);
2023 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002024 }
2025}
2026
Fred Drake08317ae2003-10-21 15:38:55 +00002027enum XML_Status XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002028XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002029{
Fred Drake31d485c2004-08-03 07:06:22 +00002030 const char *start;
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002031 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002032
Victor Stinner5ff71322017-06-21 14:39:22 +02002033 if (parser == NULL)
2034 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002035 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002036 case XML_SUSPENDED:
2037 errorCode = XML_ERROR_SUSPENDED;
2038 return XML_STATUS_ERROR;
2039 case XML_FINISHED:
2040 errorCode = XML_ERROR_FINISHED;
2041 return XML_STATUS_ERROR;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002042 case XML_INITIALIZED:
2043 if (parentParser == NULL && !startParsing(parser)) {
2044 errorCode = XML_ERROR_NO_MEMORY;
2045 return XML_STATUS_ERROR;
2046 }
Fred Drake31d485c2004-08-03 07:06:22 +00002047 default:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002048 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002049 }
2050
2051 start = bufferPtr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002052 positionPtr = start;
2053 bufferEnd += len;
Fred Drake31d485c2004-08-03 07:06:22 +00002054 parseEndPtr = bufferEnd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002055 parseEndByteIndex += len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002056 ps_finalBuffer = (XML_Bool)isFinal;
Fred Drake31d485c2004-08-03 07:06:22 +00002057
2058 errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
2059
2060 if (errorCode != XML_ERROR_NONE) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002061 eventEndPtr = eventPtr;
2062 processor = errorProcessor;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002063 return XML_STATUS_ERROR;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002064 }
Fred Drake31d485c2004-08-03 07:06:22 +00002065 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002066 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002067 case XML_SUSPENDED:
2068 result = XML_STATUS_SUSPENDED;
2069 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002070 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002071 case XML_PARSING:
2072 if (isFinal) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002073 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002074 return result;
2075 }
2076 default: ; /* should not happen */
2077 }
2078 }
2079
2080 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
2081 positionPtr = bufferPtr;
2082 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002083}
2084
Fred Drake08317ae2003-10-21 15:38:55 +00002085void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002086XML_GetBuffer(XML_Parser parser, int len)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002087{
Victor Stinner5ff71322017-06-21 14:39:22 +02002088 if (parser == NULL)
2089 return NULL;
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002090 if (len < 0) {
2091 errorCode = XML_ERROR_NO_MEMORY;
2092 return NULL;
2093 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002094 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002095 case XML_SUSPENDED:
2096 errorCode = XML_ERROR_SUSPENDED;
2097 return NULL;
2098 case XML_FINISHED:
2099 errorCode = XML_ERROR_FINISHED;
2100 return NULL;
2101 default: ;
2102 }
2103
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002104 if (len > bufferLim - bufferEnd) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002105#ifdef XML_CONTEXT_BYTES
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002106 int keep;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002107#endif /* defined XML_CONTEXT_BYTES */
2108 /* Do not invoke signed arithmetic overflow: */
2109 int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002110 if (neededSize < 0) {
2111 errorCode = XML_ERROR_NO_MEMORY;
2112 return NULL;
2113 }
2114#ifdef XML_CONTEXT_BYTES
2115 keep = (int)(bufferPtr - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002116 if (keep > XML_CONTEXT_BYTES)
2117 keep = XML_CONTEXT_BYTES;
2118 neededSize += keep;
2119#endif /* defined XML_CONTEXT_BYTES */
2120 if (neededSize <= bufferLim - buffer) {
2121#ifdef XML_CONTEXT_BYTES
2122 if (keep < bufferPtr - buffer) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002123 int offset = (int)(bufferPtr - buffer) - keep;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002124 memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep);
2125 bufferEnd -= offset;
2126 bufferPtr -= offset;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002127 }
2128#else
2129 memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
2130 bufferEnd = buffer + (bufferEnd - bufferPtr);
2131 bufferPtr = buffer;
2132#endif /* not defined XML_CONTEXT_BYTES */
2133 }
2134 else {
2135 char *newBuf;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002136 int bufferSize = (int)(bufferLim - bufferPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002137 if (bufferSize == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002138 bufferSize = INIT_BUFFER_SIZE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002139 do {
Victor Stinner23ec4b52017-06-15 00:54:36 +02002140 /* Do not invoke signed arithmetic overflow: */
2141 bufferSize = (int) (2U * (unsigned) bufferSize);
Benjamin Peterson196d7db2016-06-11 13:28:56 -07002142 } while (bufferSize < neededSize && bufferSize > 0);
2143 if (bufferSize <= 0) {
2144 errorCode = XML_ERROR_NO_MEMORY;
2145 return NULL;
2146 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002147 newBuf = (char *)MALLOC(bufferSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002148 if (newBuf == 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002149 errorCode = XML_ERROR_NO_MEMORY;
2150 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002151 }
2152 bufferLim = newBuf + bufferSize;
2153#ifdef XML_CONTEXT_BYTES
2154 if (bufferPtr) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002155 int keep = (int)(bufferPtr - buffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002156 if (keep > XML_CONTEXT_BYTES)
2157 keep = XML_CONTEXT_BYTES;
2158 memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep);
2159 FREE(buffer);
2160 buffer = newBuf;
2161 bufferEnd = buffer + (bufferEnd - bufferPtr) + keep;
2162 bufferPtr = buffer + keep;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002163 }
2164 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002165 bufferEnd = newBuf + (bufferEnd - bufferPtr);
2166 bufferPtr = buffer = newBuf;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002167 }
2168#else
2169 if (bufferPtr) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002170 memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
2171 FREE(buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002172 }
2173 bufferEnd = newBuf + (bufferEnd - bufferPtr);
2174 bufferPtr = buffer = newBuf;
2175#endif /* not defined XML_CONTEXT_BYTES */
2176 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002177 eventPtr = eventEndPtr = NULL;
2178 positionPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002179 }
2180 return bufferEnd;
2181}
2182
Fred Drake31d485c2004-08-03 07:06:22 +00002183enum XML_Status XMLCALL
2184XML_StopParser(XML_Parser parser, XML_Bool resumable)
2185{
Victor Stinner5ff71322017-06-21 14:39:22 +02002186 if (parser == NULL)
2187 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002188 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002189 case XML_SUSPENDED:
2190 if (resumable) {
2191 errorCode = XML_ERROR_SUSPENDED;
2192 return XML_STATUS_ERROR;
2193 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002194 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002195 break;
2196 case XML_FINISHED:
2197 errorCode = XML_ERROR_FINISHED;
2198 return XML_STATUS_ERROR;
2199 default:
2200 if (resumable) {
2201#ifdef XML_DTD
2202 if (isParamEntity) {
2203 errorCode = XML_ERROR_SUSPEND_PE;
2204 return XML_STATUS_ERROR;
2205 }
2206#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002207 ps_parsing = XML_SUSPENDED;
Fred Drake31d485c2004-08-03 07:06:22 +00002208 }
2209 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002210 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002211 }
2212 return XML_STATUS_OK;
2213}
2214
2215enum XML_Status XMLCALL
2216XML_ResumeParser(XML_Parser parser)
2217{
Neal Norwitz52ca0dd2006-01-07 21:21:16 +00002218 enum XML_Status result = XML_STATUS_OK;
Fred Drake31d485c2004-08-03 07:06:22 +00002219
Victor Stinner5ff71322017-06-21 14:39:22 +02002220 if (parser == NULL)
2221 return XML_STATUS_ERROR;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002222 if (ps_parsing != XML_SUSPENDED) {
Fred Drake31d485c2004-08-03 07:06:22 +00002223 errorCode = XML_ERROR_NOT_SUSPENDED;
2224 return XML_STATUS_ERROR;
2225 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002226 ps_parsing = XML_PARSING;
Fred Drake31d485c2004-08-03 07:06:22 +00002227
2228 errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
2229
2230 if (errorCode != XML_ERROR_NONE) {
2231 eventEndPtr = eventPtr;
2232 processor = errorProcessor;
2233 return XML_STATUS_ERROR;
2234 }
2235 else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002236 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00002237 case XML_SUSPENDED:
2238 result = XML_STATUS_SUSPENDED;
2239 break;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002240 case XML_INITIALIZED:
Fred Drake31d485c2004-08-03 07:06:22 +00002241 case XML_PARSING:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002242 if (ps_finalBuffer) {
2243 ps_parsing = XML_FINISHED;
Fred Drake31d485c2004-08-03 07:06:22 +00002244 return result;
2245 }
2246 default: ;
2247 }
2248 }
2249
2250 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
2251 positionPtr = bufferPtr;
2252 return result;
2253}
2254
2255void XMLCALL
2256XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
2257{
Victor Stinner5ff71322017-06-21 14:39:22 +02002258 if (parser == NULL)
2259 return;
Fred Drake31d485c2004-08-03 07:06:22 +00002260 assert(status != NULL);
2261 *status = parser->m_parsingStatus;
2262}
2263
Fred Drake08317ae2003-10-21 15:38:55 +00002264enum XML_Error XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002265XML_GetErrorCode(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002266{
Victor Stinner5ff71322017-06-21 14:39:22 +02002267 if (parser == NULL)
2268 return XML_ERROR_INVALID_ARGUMENT;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002269 return errorCode;
2270}
2271
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002272XML_Index XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002273XML_GetCurrentByteIndex(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002274{
Victor Stinner5ff71322017-06-21 14:39:22 +02002275 if (parser == NULL)
2276 return -1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002277 if (eventPtr)
Victor Stinner23ec4b52017-06-15 00:54:36 +02002278 return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002279 return -1;
2280}
2281
Fred Drake08317ae2003-10-21 15:38:55 +00002282int XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002283XML_GetCurrentByteCount(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002284{
Victor Stinner5ff71322017-06-21 14:39:22 +02002285 if (parser == NULL)
2286 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002287 if (eventEndPtr && eventPtr)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002288 return (int)(eventEndPtr - eventPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002289 return 0;
2290}
2291
Fred Drake08317ae2003-10-21 15:38:55 +00002292const char * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002293XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002294{
2295#ifdef XML_CONTEXT_BYTES
Victor Stinner5ff71322017-06-21 14:39:22 +02002296 if (parser == NULL)
2297 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002298 if (eventPtr && buffer) {
Victor Stinner5ff71322017-06-21 14:39:22 +02002299 if (offset != NULL)
2300 *offset = (int)(eventPtr - buffer);
2301 if (size != NULL)
2302 *size = (int)(bufferEnd - buffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002303 return buffer;
2304 }
Victor Stinner5ff71322017-06-21 14:39:22 +02002305#else
2306 (void)parser;
2307 (void)offset;
2308 (void)size;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002309#endif /* defined XML_CONTEXT_BYTES */
2310 return (char *) 0;
2311}
2312
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002313XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002314XML_GetCurrentLineNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002315{
Victor Stinner5ff71322017-06-21 14:39:22 +02002316 if (parser == NULL)
2317 return 0;
Fred Drake31d485c2004-08-03 07:06:22 +00002318 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002319 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
2320 positionPtr = eventPtr;
2321 }
2322 return position.lineNumber + 1;
2323}
2324
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325XML_Size XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002326XML_GetCurrentColumnNumber(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002327{
Victor Stinner5ff71322017-06-21 14:39:22 +02002328 if (parser == NULL)
2329 return 0;
Fred Drake31d485c2004-08-03 07:06:22 +00002330 if (eventPtr && eventPtr >= positionPtr) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002331 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
2332 positionPtr = eventPtr;
2333 }
2334 return position.columnNumber;
2335}
2336
Fred Drake08317ae2003-10-21 15:38:55 +00002337void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002338XML_FreeContentModel(XML_Parser parser, XML_Content *model)
2339{
Victor Stinner5ff71322017-06-21 14:39:22 +02002340 if (parser != NULL)
2341 FREE(model);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002342}
2343
Fred Drake08317ae2003-10-21 15:38:55 +00002344void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002345XML_MemMalloc(XML_Parser parser, size_t size)
2346{
Victor Stinner5ff71322017-06-21 14:39:22 +02002347 if (parser == NULL)
2348 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002349 return MALLOC(size);
2350}
2351
Fred Drake08317ae2003-10-21 15:38:55 +00002352void * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002353XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
2354{
Victor Stinner5ff71322017-06-21 14:39:22 +02002355 if (parser == NULL)
2356 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002357 return REALLOC(ptr, size);
2358}
2359
Fred Drake08317ae2003-10-21 15:38:55 +00002360void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002361XML_MemFree(XML_Parser parser, void *ptr)
2362{
Victor Stinner5ff71322017-06-21 14:39:22 +02002363 if (parser != NULL)
2364 FREE(ptr);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002365}
2366
Fred Drake08317ae2003-10-21 15:38:55 +00002367void XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002368XML_DefaultCurrent(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002369{
Victor Stinner5ff71322017-06-21 14:39:22 +02002370 if (parser == NULL)
2371 return;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002372 if (defaultHandler) {
2373 if (openInternalEntities)
2374 reportDefault(parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002375 internalEncoding,
2376 openInternalEntities->internalEventPtr,
2377 openInternalEntities->internalEventEndPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002378 else
2379 reportDefault(parser, encoding, eventPtr, eventEndPtr);
2380 }
2381}
2382
Fred Drake08317ae2003-10-21 15:38:55 +00002383const XML_LChar * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002384XML_ErrorString(enum XML_Error code)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002385{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002386 static const XML_LChar* const message[] = {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002387 0,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002388 XML_L("out of memory"),
2389 XML_L("syntax error"),
2390 XML_L("no element found"),
2391 XML_L("not well-formed (invalid token)"),
2392 XML_L("unclosed token"),
2393 XML_L("partial character"),
2394 XML_L("mismatched tag"),
2395 XML_L("duplicate attribute"),
2396 XML_L("junk after document element"),
2397 XML_L("illegal parameter entity reference"),
2398 XML_L("undefined entity"),
2399 XML_L("recursive entity reference"),
2400 XML_L("asynchronous entity"),
2401 XML_L("reference to invalid character number"),
2402 XML_L("reference to binary entity"),
2403 XML_L("reference to external entity in attribute"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002404 XML_L("XML or text declaration not at start of entity"),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002405 XML_L("unknown encoding"),
2406 XML_L("encoding specified in XML declaration is incorrect"),
2407 XML_L("unclosed CDATA section"),
2408 XML_L("error in processing external entity reference"),
2409 XML_L("document is not standalone"),
2410 XML_L("unexpected parser state - please send a bug report"),
2411 XML_L("entity declared in parameter entity"),
2412 XML_L("requested feature requires XML_DTD support in Expat"),
Fred Drake08317ae2003-10-21 15:38:55 +00002413 XML_L("cannot change setting once parsing has begun"),
Fred Drake31d485c2004-08-03 07:06:22 +00002414 XML_L("unbound prefix"),
2415 XML_L("must not undeclare prefix"),
2416 XML_L("incomplete markup in parameter entity"),
2417 XML_L("XML declaration not well-formed"),
2418 XML_L("text declaration not well-formed"),
2419 XML_L("illegal character(s) in public id"),
2420 XML_L("parser suspended"),
2421 XML_L("parser not suspended"),
2422 XML_L("parsing aborted"),
2423 XML_L("parsing finished"),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002424 XML_L("cannot suspend in external parameter entity"),
2425 XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
2426 XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
2427 XML_L("prefix must not be bound to one of the reserved namespace names")
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002428 };
2429 if (code > 0 && code < sizeof(message)/sizeof(message[0]))
2430 return message[code];
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002431 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002432}
2433
Fred Drake08317ae2003-10-21 15:38:55 +00002434const XML_LChar * XMLCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002435XML_ExpatVersion(void) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002436
2437 /* V1 is used to string-ize the version number. However, it would
2438 string-ize the actual version macro *names* unless we get them
2439 substituted before being passed to V1. CPP is defined to expand
2440 a macro, then rescan for more expansions. Thus, we use V2 to expand
2441 the version macros, then CPP will expand the resulting V1() macro
2442 with the correct numerals. */
2443 /* ### I'm assuming cpp is portable in this respect... */
2444
2445#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c)
2446#define V2(a,b,c) XML_L("expat_")V1(a,b,c)
2447
2448 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2449
2450#undef V1
2451#undef V2
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002452}
2453
Fred Drake08317ae2003-10-21 15:38:55 +00002454XML_Expat_Version XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002455XML_ExpatVersionInfo(void)
2456{
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002457 XML_Expat_Version version;
2458
2459 version.major = XML_MAJOR_VERSION;
2460 version.minor = XML_MINOR_VERSION;
2461 version.micro = XML_MICRO_VERSION;
2462
2463 return version;
2464}
2465
Fred Drake08317ae2003-10-21 15:38:55 +00002466const XML_Feature * XMLCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002467XML_GetFeatureList(void)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002468{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469 static const XML_Feature features[] = {
2470 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2471 sizeof(XML_Char)},
2472 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2473 sizeof(XML_LChar)},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002474#ifdef XML_UNICODE
Fred Drake08317ae2003-10-21 15:38:55 +00002475 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002476#endif
2477#ifdef XML_UNICODE_WCHAR_T
Fred Drake08317ae2003-10-21 15:38:55 +00002478 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002479#endif
2480#ifdef XML_DTD
Fred Drake08317ae2003-10-21 15:38:55 +00002481 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002482#endif
2483#ifdef XML_CONTEXT_BYTES
2484 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2485 XML_CONTEXT_BYTES},
2486#endif
2487#ifdef XML_MIN_SIZE
Fred Drake08317ae2003-10-21 15:38:55 +00002488 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002489#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002490#ifdef XML_NS
2491 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2492#endif
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002493#ifdef XML_LARGE_SIZE
2494 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2495#endif
2496#ifdef XML_ATTR_INFO
2497 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2498#endif
Fred Drake08317ae2003-10-21 15:38:55 +00002499 {XML_FEATURE_END, NULL, 0}
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002500 };
2501
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002502 return features;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002503}
2504
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002505/* Initially tag->rawName always points into the parse buffer;
2506 for those TAG instances opened while the current parse buffer was
2507 processed, and not yet closed, we need to store tag->rawName in a more
2508 permanent location, since the parse buffer is about to be discarded.
2509*/
2510static XML_Bool
2511storeRawNames(XML_Parser parser)
2512{
2513 TAG *tag = tagStack;
2514 while (tag) {
2515 int bufSize;
2516 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2517 char *rawNameBuf = tag->buf + nameLen;
2518 /* Stop if already stored. Since tagStack is a stack, we can stop
2519 at the first entry that has already been copied; everything
2520 below it in the stack is already been accounted for in a
2521 previous call to this function.
2522 */
2523 if (tag->rawName == rawNameBuf)
2524 break;
2525 /* For re-use purposes we need to ensure that the
2526 size of tag->buf is a multiple of sizeof(XML_Char).
2527 */
2528 bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2529 if (bufSize > tag->bufEnd - tag->buf) {
2530 char *temp = (char *)REALLOC(tag->buf, bufSize);
2531 if (temp == NULL)
2532 return XML_FALSE;
2533 /* if tag->name.str points to tag->buf (only when namespace
2534 processing is off) then we have to update it
2535 */
2536 if (tag->name.str == (XML_Char *)tag->buf)
2537 tag->name.str = (XML_Char *)temp;
2538 /* if tag->name.localPart is set (when namespace processing is on)
2539 then update it as well, since it will always point into tag->buf
2540 */
2541 if (tag->name.localPart)
2542 tag->name.localPart = (XML_Char *)temp + (tag->name.localPart -
2543 (XML_Char *)tag->buf);
2544 tag->buf = temp;
2545 tag->bufEnd = temp + bufSize;
2546 rawNameBuf = temp + nameLen;
2547 }
2548 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2549 tag->rawName = rawNameBuf;
2550 tag = tag->parent;
2551 }
2552 return XML_TRUE;
2553}
2554
2555static enum XML_Error PTRCALL
2556contentProcessor(XML_Parser parser,
2557 const char *start,
2558 const char *end,
2559 const char **endPtr)
2560{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002561 enum XML_Error result = doContent(parser, 0, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002562 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002563 if (result == XML_ERROR_NONE) {
2564 if (!storeRawNames(parser))
2565 return XML_ERROR_NO_MEMORY;
2566 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002567 return result;
2568}
2569
2570static enum XML_Error PTRCALL
2571externalEntityInitProcessor(XML_Parser parser,
2572 const char *start,
2573 const char *end,
2574 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002575{
2576 enum XML_Error result = initializeEncoding(parser);
2577 if (result != XML_ERROR_NONE)
2578 return result;
2579 processor = externalEntityInitProcessor2;
2580 return externalEntityInitProcessor2(parser, start, end, endPtr);
2581}
2582
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002583static enum XML_Error PTRCALL
2584externalEntityInitProcessor2(XML_Parser parser,
2585 const char *start,
2586 const char *end,
2587 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002588{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002589 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002590 int tok = XmlContentTok(encoding, start, end, &next);
2591 switch (tok) {
2592 case XML_TOK_BOM:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002593 /* If we are at the end of the buffer, this would cause the next stage,
2594 i.e. externalEntityInitProcessor3, to pass control directly to
2595 doContent (by detecting XML_TOK_NONE) without processing any xml text
2596 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2597 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002598 if (next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002599 *endPtr = next;
2600 return XML_ERROR_NONE;
2601 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002602 start = next;
2603 break;
2604 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002605 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002606 *endPtr = start;
2607 return XML_ERROR_NONE;
2608 }
2609 eventPtr = start;
2610 return XML_ERROR_UNCLOSED_TOKEN;
2611 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002612 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002613 *endPtr = start;
2614 return XML_ERROR_NONE;
2615 }
2616 eventPtr = start;
2617 return XML_ERROR_PARTIAL_CHAR;
2618 }
2619 processor = externalEntityInitProcessor3;
2620 return externalEntityInitProcessor3(parser, start, end, endPtr);
2621}
2622
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002623static enum XML_Error PTRCALL
2624externalEntityInitProcessor3(XML_Parser parser,
2625 const char *start,
2626 const char *end,
2627 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002628{
Fred Drake31d485c2004-08-03 07:06:22 +00002629 int tok;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002630 const char *next = start; /* XmlContentTok doesn't always set the last arg */
Fred Drake31d485c2004-08-03 07:06:22 +00002631 eventPtr = start;
2632 tok = XmlContentTok(encoding, start, end, &next);
2633 eventEndPtr = next;
2634
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002635 switch (tok) {
2636 case XML_TOK_XML_DECL:
2637 {
Fred Drake31d485c2004-08-03 07:06:22 +00002638 enum XML_Error result;
2639 result = processXmlDecl(parser, 1, start, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002640 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002641 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002642 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002643 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00002644 *endPtr = next;
2645 return XML_ERROR_NONE;
2646 case XML_FINISHED:
2647 return XML_ERROR_ABORTED;
2648 default:
2649 start = next;
2650 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002651 }
2652 break;
2653 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002654 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002655 *endPtr = start;
2656 return XML_ERROR_NONE;
2657 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002658 return XML_ERROR_UNCLOSED_TOKEN;
2659 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002660 if (!ps_finalBuffer) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002661 *endPtr = start;
2662 return XML_ERROR_NONE;
2663 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002664 return XML_ERROR_PARTIAL_CHAR;
2665 }
2666 processor = externalEntityContentProcessor;
2667 tagLevel = 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002668 return externalEntityContentProcessor(parser, start, end, endPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002669}
2670
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002671static enum XML_Error PTRCALL
2672externalEntityContentProcessor(XML_Parser parser,
2673 const char *start,
2674 const char *end,
2675 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002676{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002677 enum XML_Error result = doContent(parser, 1, encoding, start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00002679 if (result == XML_ERROR_NONE) {
2680 if (!storeRawNames(parser))
2681 return XML_ERROR_NO_MEMORY;
2682 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002683 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002684}
2685
2686static enum XML_Error
2687doContent(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002688 int startTagLevel,
2689 const ENCODING *enc,
2690 const char *s,
2691 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00002692 const char **nextPtr,
2693 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002694{
Fred Drake31d485c2004-08-03 07:06:22 +00002695 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002696 DTD * const dtd = _dtd;
Fred Drake31d485c2004-08-03 07:06:22 +00002697
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002698 const char **eventPP;
2699 const char **eventEndPP;
2700 if (enc == encoding) {
2701 eventPP = &eventPtr;
2702 eventEndPP = &eventEndPtr;
2703 }
2704 else {
2705 eventPP = &(openInternalEntities->internalEventPtr);
2706 eventEndPP = &(openInternalEntities->internalEventEndPtr);
2707 }
2708 *eventPP = s;
Fred Drake31d485c2004-08-03 07:06:22 +00002709
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002710 for (;;) {
2711 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2712 int tok = XmlContentTok(enc, s, end, &next);
2713 *eventEndPP = next;
2714 switch (tok) {
2715 case XML_TOK_TRAILING_CR:
Fred Drake31d485c2004-08-03 07:06:22 +00002716 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002717 *nextPtr = s;
2718 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002719 }
2720 *eventEndPP = end;
2721 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002722 XML_Char c = 0xA;
2723 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002724 }
2725 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002726 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07002727 /* We are at the end of the final buffer, should we check for
2728 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00002729 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002730 if (startTagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002731 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002732 if (tagLevel != startTagLevel)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002733 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002734 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002735 return XML_ERROR_NONE;
2736 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00002737 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002738 *nextPtr = s;
2739 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002740 }
2741 if (startTagLevel > 0) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002742 if (tagLevel != startTagLevel)
2743 return XML_ERROR_ASYNC_ENTITY;
Fred Drake31d485c2004-08-03 07:06:22 +00002744 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002745 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002746 }
2747 return XML_ERROR_NO_ELEMENTS;
2748 case XML_TOK_INVALID:
2749 *eventPP = next;
2750 return XML_ERROR_INVALID_TOKEN;
2751 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00002752 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002753 *nextPtr = s;
2754 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002755 }
2756 return XML_ERROR_UNCLOSED_TOKEN;
2757 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00002758 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002759 *nextPtr = s;
2760 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002761 }
2762 return XML_ERROR_PARTIAL_CHAR;
2763 case XML_TOK_ENTITY_REF:
2764 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002765 const XML_Char *name;
2766 ENTITY *entity;
2767 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
2768 s + enc->minBytesPerChar,
2769 next - enc->minBytesPerChar);
2770 if (ch) {
2771 if (characterDataHandler)
2772 characterDataHandler(handlerArg, &ch, 1);
2773 else if (defaultHandler)
2774 reportDefault(parser, enc, s, next);
2775 break;
2776 }
2777 name = poolStoreString(&dtd->pool, enc,
2778 s + enc->minBytesPerChar,
2779 next - enc->minBytesPerChar);
2780 if (!name)
2781 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07002782 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002783 poolDiscard(&dtd->pool);
2784 /* First, determine if a check for an existing declaration is needed;
2785 if yes, check that the entity exists, and that it is internal,
2786 otherwise call the skipped entity or default handler.
2787 */
2788 if (!dtd->hasParamEntityRefs || dtd->standalone) {
2789 if (!entity)
2790 return XML_ERROR_UNDEFINED_ENTITY;
2791 else if (!entity->is_internal)
2792 return XML_ERROR_ENTITY_DECLARED_IN_PE;
2793 }
2794 else if (!entity) {
2795 if (skippedEntityHandler)
2796 skippedEntityHandler(handlerArg, name, 0);
2797 else if (defaultHandler)
2798 reportDefault(parser, enc, s, next);
2799 break;
2800 }
2801 if (entity->open)
2802 return XML_ERROR_RECURSIVE_ENTITY_REF;
2803 if (entity->notation)
2804 return XML_ERROR_BINARY_ENTITY_REF;
2805 if (entity->textPtr) {
2806 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002807 if (!defaultExpandInternalEntities) {
2808 if (skippedEntityHandler)
2809 skippedEntityHandler(handlerArg, entity->name, 0);
2810 else if (defaultHandler)
2811 reportDefault(parser, enc, s, next);
2812 break;
2813 }
Fred Drake31d485c2004-08-03 07:06:22 +00002814 result = processInternalEntity(parser, entity, XML_FALSE);
2815 if (result != XML_ERROR_NONE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002816 return result;
2817 }
2818 else if (externalEntityRefHandler) {
2819 const XML_Char *context;
2820 entity->open = XML_TRUE;
2821 context = getContext(parser);
2822 entity->open = XML_FALSE;
2823 if (!context)
2824 return XML_ERROR_NO_MEMORY;
Fred Drake31d485c2004-08-03 07:06:22 +00002825 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002826 context,
2827 entity->base,
2828 entity->systemId,
2829 entity->publicId))
2830 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2831 poolDiscard(&tempPool);
2832 }
2833 else if (defaultHandler)
2834 reportDefault(parser, enc, s, next);
2835 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002836 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002837 case XML_TOK_START_TAG_NO_ATTS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002838 /* fall through */
2839 case XML_TOK_START_TAG_WITH_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002840 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002841 TAG *tag;
2842 enum XML_Error result;
2843 XML_Char *toPtr;
2844 if (freeTagList) {
2845 tag = freeTagList;
2846 freeTagList = freeTagList->parent;
2847 }
2848 else {
2849 tag = (TAG *)MALLOC(sizeof(TAG));
2850 if (!tag)
2851 return XML_ERROR_NO_MEMORY;
2852 tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE);
2853 if (!tag->buf) {
2854 FREE(tag);
2855 return XML_ERROR_NO_MEMORY;
2856 }
2857 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
2858 }
2859 tag->bindings = NULL;
2860 tag->parent = tagStack;
2861 tagStack = tag;
2862 tag->name.localPart = NULL;
2863 tag->name.prefix = NULL;
2864 tag->rawName = s + enc->minBytesPerChar;
2865 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
2866 ++tagLevel;
2867 {
2868 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
2869 const char *fromPtr = tag->rawName;
2870 toPtr = (XML_Char *)tag->buf;
2871 for (;;) {
2872 int bufSize;
2873 int convLen;
Victor Stinner23ec4b52017-06-15 00:54:36 +02002874 const enum XML_Convert_Result convert_res = XmlConvert(enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002875 &fromPtr, rawNameEnd,
2876 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002877 convLen = (int)(toPtr - (XML_Char *)tag->buf);
Victor Stinner5ff71322017-06-21 14:39:22 +02002878 if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002879 tag->name.strLen = convLen;
2880 break;
2881 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002882 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002883 {
2884 char *temp = (char *)REALLOC(tag->buf, bufSize);
2885 if (temp == NULL)
2886 return XML_ERROR_NO_MEMORY;
2887 tag->buf = temp;
2888 tag->bufEnd = temp + bufSize;
2889 toPtr = (XML_Char *)temp + convLen;
2890 }
2891 }
2892 }
2893 tag->name.str = (XML_Char *)tag->buf;
2894 *toPtr = XML_T('\0');
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002895 result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
2896 if (result)
2897 return result;
2898 if (startElementHandler)
2899 startElementHandler(handlerArg, tag->name.str,
2900 (const XML_Char **)atts);
2901 else if (defaultHandler)
2902 reportDefault(parser, enc, s, next);
2903 poolClear(&tempPool);
2904 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002905 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002906 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002907 /* fall through */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002908 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
2909 {
2910 const char *rawName = s + enc->minBytesPerChar;
2911 enum XML_Error result;
2912 BINDING *bindings = NULL;
2913 XML_Bool noElmHandlers = XML_TRUE;
2914 TAG_NAME name;
2915 name.str = poolStoreString(&tempPool, enc, rawName,
2916 rawName + XmlNameLength(enc, rawName));
2917 if (!name.str)
2918 return XML_ERROR_NO_MEMORY;
2919 poolFinish(&tempPool);
Fred Drake4faea012003-01-28 06:42:40 +00002920 result = storeAtts(parser, enc, s, &name, &bindings);
Victor Stinner5ff71322017-06-21 14:39:22 +02002921 if (result != XML_ERROR_NONE) {
2922 freeBindings(parser, bindings);
Fred Drake4faea012003-01-28 06:42:40 +00002923 return result;
Victor Stinner5ff71322017-06-21 14:39:22 +02002924 }
Fred Drake4faea012003-01-28 06:42:40 +00002925 poolFinish(&tempPool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002926 if (startElementHandler) {
2927 startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
2928 noElmHandlers = XML_FALSE;
2929 }
2930 if (endElementHandler) {
2931 if (startElementHandler)
2932 *eventPP = *eventEndPP;
2933 endElementHandler(handlerArg, name.str);
2934 noElmHandlers = XML_FALSE;
2935 }
2936 if (noElmHandlers && defaultHandler)
2937 reportDefault(parser, enc, s, next);
2938 poolClear(&tempPool);
Victor Stinner5ff71322017-06-21 14:39:22 +02002939 freeBindings(parser, bindings);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002940 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002941 if (tagLevel == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002942 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002943 break;
2944 case XML_TOK_END_TAG:
2945 if (tagLevel == startTagLevel)
2946 return XML_ERROR_ASYNC_ENTITY;
2947 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00002948 int len;
2949 const char *rawName;
2950 TAG *tag = tagStack;
2951 tagStack = tag->parent;
2952 tag->parent = freeTagList;
2953 freeTagList = tag;
2954 rawName = s + enc->minBytesPerChar*2;
2955 len = XmlNameLength(enc, rawName);
2956 if (len != tag->rawNameLength
2957 || memcmp(tag->rawName, rawName, len) != 0) {
2958 *eventPP = rawName;
2959 return XML_ERROR_TAG_MISMATCH;
2960 }
2961 --tagLevel;
2962 if (endElementHandler) {
2963 const XML_Char *localPart;
2964 const XML_Char *prefix;
2965 XML_Char *uri;
2966 localPart = tag->name.localPart;
2967 if (ns && localPart) {
2968 /* localPart and prefix may have been overwritten in
2969 tag->name.str, since this points to the binding->uri
2970 buffer which gets re-used; so we have to add them again
2971 */
2972 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
2973 /* don't need to check for space - already done in storeAtts() */
2974 while (*localPart) *uri++ = *localPart++;
2975 prefix = (XML_Char *)tag->name.prefix;
2976 if (ns_triplets && prefix) {
2977 *uri++ = namespaceSeparator;
2978 while (*prefix) *uri++ = *prefix++;
2979 }
2980 *uri = XML_T('\0');
2981 }
2982 endElementHandler(handlerArg, tag->name.str);
2983 }
2984 else if (defaultHandler)
2985 reportDefault(parser, enc, s, next);
2986 while (tag->bindings) {
2987 BINDING *b = tag->bindings;
2988 if (endNamespaceDeclHandler)
2989 endNamespaceDeclHandler(handlerArg, b->prefix->name);
2990 tag->bindings = tag->bindings->nextTagBinding;
2991 b->nextTagBinding = freeBindingList;
2992 freeBindingList = b;
2993 b->prefix->binding = b->prevPrefixBinding;
2994 }
2995 if (tagLevel == 0)
2996 return epilogProcessor(parser, next, end, nextPtr);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00002997 }
2998 break;
2999 case XML_TOK_CHAR_REF:
3000 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003001 int n = XmlCharRefNumber(enc, s);
3002 if (n < 0)
3003 return XML_ERROR_BAD_CHAR_REF;
3004 if (characterDataHandler) {
3005 XML_Char buf[XML_ENCODE_MAX];
3006 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
3007 }
3008 else if (defaultHandler)
3009 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003010 }
3011 break;
3012 case XML_TOK_XML_DECL:
3013 return XML_ERROR_MISPLACED_XML_PI;
3014 case XML_TOK_DATA_NEWLINE:
3015 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003016 XML_Char c = 0xA;
3017 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003018 }
3019 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003020 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003021 break;
3022 case XML_TOK_CDATA_SECT_OPEN:
3023 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003024 enum XML_Error result;
3025 if (startCdataSectionHandler)
3026 startCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003027#if 0
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003028 /* Suppose you doing a transformation on a document that involves
3029 changing only the character data. You set up a defaultHandler
3030 and a characterDataHandler. The defaultHandler simply copies
3031 characters through. The characterDataHandler does the
3032 transformation and writes the characters out escaping them as
3033 necessary. This case will fail to work if we leave out the
3034 following two lines (because & and < inside CDATA sections will
3035 be incorrectly escaped).
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003036
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003037 However, now we have a start/endCdataSectionHandler, so it seems
3038 easier to let the user deal with this.
3039 */
3040 else if (characterDataHandler)
3041 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003042#endif
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003043 else if (defaultHandler)
3044 reportDefault(parser, enc, s, next);
Fred Drake31d485c2004-08-03 07:06:22 +00003045 result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
3046 if (result != XML_ERROR_NONE)
3047 return result;
3048 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003049 processor = cdataSectionProcessor;
3050 return result;
3051 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003052 }
3053 break;
3054 case XML_TOK_TRAILING_RSQB:
Fred Drake31d485c2004-08-03 07:06:22 +00003055 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003056 *nextPtr = s;
3057 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003058 }
3059 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003060 if (MUST_CONVERT(enc, s)) {
3061 ICHAR *dataPtr = (ICHAR *)dataBuf;
3062 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
3063 characterDataHandler(handlerArg, dataBuf,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003064 (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003065 }
3066 else
3067 characterDataHandler(handlerArg,
3068 (XML_Char *)s,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003069 (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003070 }
3071 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003072 reportDefault(parser, enc, s, end);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003073 /* We are at the end of the final buffer, should we check for
3074 XML_SUSPENDED, XML_FINISHED?
Fred Drake31d485c2004-08-03 07:06:22 +00003075 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003076 if (startTagLevel == 0) {
3077 *eventPP = end;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003078 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003079 }
3080 if (tagLevel != startTagLevel) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003081 *eventPP = end;
3082 return XML_ERROR_ASYNC_ENTITY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003083 }
Fred Drake31d485c2004-08-03 07:06:22 +00003084 *nextPtr = end;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003085 return XML_ERROR_NONE;
3086 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003087 {
3088 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3089 if (charDataHandler) {
3090 if (MUST_CONVERT(enc, s)) {
3091 for (;;) {
3092 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02003093 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003094 *eventEndPP = s;
3095 charDataHandler(handlerArg, dataBuf,
3096 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003097 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003098 break;
3099 *eventPP = s;
3100 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003101 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003102 else
3103 charDataHandler(handlerArg,
3104 (XML_Char *)s,
3105 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003106 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003107 else if (defaultHandler)
3108 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003109 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003110 break;
3111 case XML_TOK_PI:
3112 if (!reportProcessingInstruction(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003113 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003114 break;
3115 case XML_TOK_COMMENT:
3116 if (!reportComment(parser, enc, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003117 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003118 break;
3119 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003120 /* All of the tokens produced by XmlContentTok() have their own
3121 * explicit cases, so this default is not strictly necessary.
3122 * However it is a useful safety net, so we retain the code and
3123 * simply exclude it from the coverage tests.
3124 *
3125 * LCOV_EXCL_START
3126 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003127 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003128 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003129 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003130 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003131 }
3132 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003133 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003134 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00003135 *nextPtr = next;
3136 return XML_ERROR_NONE;
3137 case XML_FINISHED:
3138 return XML_ERROR_ABORTED;
3139 default: ;
3140 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003141 }
3142 /* not reached */
3143}
3144
Victor Stinner5ff71322017-06-21 14:39:22 +02003145/* This function does not call free() on the allocated memory, merely
3146 * moving it to the parser's freeBindingList where it can be freed or
3147 * reused as appropriate.
3148 */
3149static void
3150freeBindings(XML_Parser parser, BINDING *bindings)
3151{
3152 while (bindings) {
3153 BINDING *b = bindings;
3154
3155 /* startNamespaceDeclHandler will have been called for this
3156 * binding in addBindings(), so call the end handler now.
3157 */
3158 if (endNamespaceDeclHandler)
3159 endNamespaceDeclHandler(handlerArg, b->prefix->name);
3160
3161 bindings = bindings->nextTagBinding;
3162 b->nextTagBinding = freeBindingList;
3163 freeBindingList = b;
3164 b->prefix->binding = b->prevPrefixBinding;
3165 }
3166}
3167
Fred Drake4faea012003-01-28 06:42:40 +00003168/* Precondition: all arguments must be non-NULL;
3169 Purpose:
3170 - normalize attributes
3171 - check attributes for well-formedness
3172 - generate namespace aware attribute names (URI, prefix)
3173 - build list of attributes for startElementHandler
3174 - default attributes
3175 - process namespace declarations (check and report them)
3176 - generate namespace aware element name (URI, prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003177*/
3178static enum XML_Error
3179storeAtts(XML_Parser parser, const ENCODING *enc,
3180 const char *attStr, TAG_NAME *tagNamePtr,
3181 BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003182{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003183 DTD * const dtd = _dtd; /* save one level of indirection */
Fred Drake08317ae2003-10-21 15:38:55 +00003184 ELEMENT_TYPE *elementType;
3185 int nDefaultAtts;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003186 const XML_Char **appAtts; /* the attribute list for the application */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003187 int attIndex = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003188 int prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003189 int i;
3190 int n;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003191 XML_Char *uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003192 int nPrefixes = 0;
3193 BINDING *binding;
3194 const XML_Char *localPart;
3195
3196 /* lookup the element type name */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003197 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0);
Fred Drake4faea012003-01-28 06:42:40 +00003198 if (!elementType) {
3199 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3200 if (!name)
3201 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003202 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
Fred Drake4faea012003-01-28 06:42:40 +00003203 sizeof(ELEMENT_TYPE));
3204 if (!elementType)
3205 return XML_ERROR_NO_MEMORY;
3206 if (ns && !setElementTypePrefix(parser, elementType))
3207 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003208 }
Fred Drake4faea012003-01-28 06:42:40 +00003209 nDefaultAtts = elementType->nDefaultAtts;
3210
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003211 /* get the attributes from the tokenizer */
3212 n = XmlGetAttributes(enc, attStr, attsSize, atts);
3213 if (n + nDefaultAtts > attsSize) {
3214 int oldAttsSize = attsSize;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003215 ATTRIBUTE *temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003216#ifdef XML_ATTR_INFO
3217 XML_AttrInfo *temp2;
3218#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003219 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003220 temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003221 if (temp == NULL) {
3222 attsSize = oldAttsSize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003223 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003224 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003225 atts = temp;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003226#ifdef XML_ATTR_INFO
3227 temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003228 if (temp2 == NULL) {
3229 attsSize = oldAttsSize;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003230 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003231 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003232 attInfo = temp2;
3233#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003234 if (n > oldAttsSize)
3235 XmlGetAttributes(enc, attStr, n, atts);
3236 }
Fred Drake4faea012003-01-28 06:42:40 +00003237
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003238 appAtts = (const XML_Char **)atts;
3239 for (i = 0; i < n; i++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003240 ATTRIBUTE *currAtt = &atts[i];
3241#ifdef XML_ATTR_INFO
3242 XML_AttrInfo *currAttInfo = &attInfo[i];
3243#endif
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003244 /* add the name and value to the attribute list */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003245 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
3246 currAtt->name
3247 + XmlNameLength(enc, currAtt->name));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003248 if (!attId)
3249 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003250#ifdef XML_ATTR_INFO
3251 currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
3252 currAttInfo->nameEnd = currAttInfo->nameStart +
3253 XmlNameLength(enc, currAtt->name);
3254 currAttInfo->valueStart = parseEndByteIndex -
3255 (parseEndPtr - currAtt->valuePtr);
3256 currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
3257#endif
Fred Drake08317ae2003-10-21 15:38:55 +00003258 /* Detect duplicate attributes by their QNames. This does not work when
3259 namespace processing is turned on and different prefixes for the same
3260 namespace are used. For this case we have a check further down.
3261 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003262 if ((attId->name)[-1]) {
3263 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003264 eventPtr = atts[i].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003265 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3266 }
3267 (attId->name)[-1] = 1;
3268 appAtts[attIndex++] = attId->name;
3269 if (!atts[i].normalized) {
3270 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003271 XML_Bool isCdata = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003272
3273 /* figure out whether declared as other than CDATA */
3274 if (attId->maybeTokenized) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003275 int j;
3276 for (j = 0; j < nDefaultAtts; j++) {
3277 if (attId == elementType->defaultAtts[j].id) {
3278 isCdata = elementType->defaultAtts[j].isCdata;
3279 break;
3280 }
3281 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003282 }
3283
3284 /* normalize the attribute value */
3285 result = storeAttributeValue(parser, enc, isCdata,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003286 atts[i].valuePtr, atts[i].valueEnd,
3287 &tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003288 if (result)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003289 return result;
Fred Drake4faea012003-01-28 06:42:40 +00003290 appAtts[attIndex] = poolStart(&tempPool);
3291 poolFinish(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003292 }
Fred Drake4faea012003-01-28 06:42:40 +00003293 else {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003294 /* the value did not need normalizing */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003295 appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr,
3296 atts[i].valueEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003297 if (appAtts[attIndex] == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003298 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003299 poolFinish(&tempPool);
3300 }
3301 /* handle prefixed attribute names */
Fred Drake4faea012003-01-28 06:42:40 +00003302 if (attId->prefix) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003303 if (attId->xmlns) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003304 /* deal with namespace declarations here */
3305 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3306 appAtts[attIndex], bindingsPtr);
3307 if (result)
3308 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003309 --attIndex;
3310 }
3311 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003312 /* deal with other prefixed names later */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003313 attIndex++;
3314 nPrefixes++;
3315 (attId->name)[-1] = 2;
3316 }
3317 }
3318 else
3319 attIndex++;
3320 }
Fred Drake4faea012003-01-28 06:42:40 +00003321
3322 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3323 nSpecifiedAtts = attIndex;
3324 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3325 for (i = 0; i < attIndex; i += 2)
3326 if (appAtts[i] == elementType->idAtt->name) {
3327 idAttIndex = i;
3328 break;
3329 }
3330 }
3331 else
3332 idAttIndex = -1;
3333
3334 /* do attribute defaulting */
3335 for (i = 0; i < nDefaultAtts; i++) {
3336 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3337 if (!(da->id->name)[-1] && da->value) {
3338 if (da->id->prefix) {
3339 if (da->id->xmlns) {
3340 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3341 da->value, bindingsPtr);
3342 if (result)
3343 return result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003344 }
3345 else {
Fred Drake4faea012003-01-28 06:42:40 +00003346 (da->id->name)[-1] = 2;
3347 nPrefixes++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003348 appAtts[attIndex++] = da->id->name;
3349 appAtts[attIndex++] = da->value;
3350 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003351 }
Fred Drake4faea012003-01-28 06:42:40 +00003352 else {
3353 (da->id->name)[-1] = 1;
3354 appAtts[attIndex++] = da->id->name;
3355 appAtts[attIndex++] = da->value;
3356 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003357 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003358 }
Fred Drake4faea012003-01-28 06:42:40 +00003359 appAtts[attIndex] = 0;
3360
Fred Drake08317ae2003-10-21 15:38:55 +00003361 /* expand prefixed attribute names, check for duplicates,
3362 and clear flags that say whether attributes were specified */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003363 i = 0;
3364 if (nPrefixes) {
Fred Drake08317ae2003-10-21 15:38:55 +00003365 int j; /* hash table index */
3366 unsigned long version = nsAttsVersion;
3367 int nsAttsSize = (int)1 << nsAttsPower;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003368 unsigned char oldNsAttsPower = nsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003369 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3370 if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */
3371 NS_ATT *temp;
3372 /* hash table size must also be a power of 2 and >= 8 */
3373 while (nPrefixes >> nsAttsPower++);
3374 if (nsAttsPower < 3)
3375 nsAttsPower = 3;
3376 nsAttsSize = (int)1 << nsAttsPower;
3377 temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT));
Victor Stinner93d0cb52017-08-18 23:43:54 +02003378 if (!temp) {
3379 /* Restore actual size of memory in nsAtts */
3380 nsAttsPower = oldNsAttsPower;
Fred Drake08317ae2003-10-21 15:38:55 +00003381 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003382 }
Fred Drake08317ae2003-10-21 15:38:55 +00003383 nsAtts = temp;
3384 version = 0; /* force re-initialization of nsAtts hash table */
3385 }
3386 /* using a version flag saves us from initializing nsAtts every time */
3387 if (!version) { /* initialize version flags when version wraps around */
3388 version = INIT_ATTS_VERSION;
3389 for (j = nsAttsSize; j != 0; )
3390 nsAtts[--j].version = version;
3391 }
3392 nsAttsVersion = --version;
3393
3394 /* expand prefixed names and check for duplicates */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003395 for (; i < attIndex; i += 2) {
Fred Drake08317ae2003-10-21 15:38:55 +00003396 const XML_Char *s = appAtts[i];
3397 if (s[-1] == 2) { /* prefixed */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003398 ATTRIBUTE_ID *id;
Fred Drake08317ae2003-10-21 15:38:55 +00003399 const BINDING *b;
Victor Stinner5ff71322017-06-21 14:39:22 +02003400 unsigned long uriHash;
3401 struct siphash sip_state;
3402 struct sipkey sip_key;
3403
3404 copy_salt_to_sipkey(parser, &sip_key);
3405 sip24_init(&sip_state, &sip_key);
3406
Fred Drake08317ae2003-10-21 15:38:55 +00003407 ((XML_Char *)s)[-1] = 0; /* clear flag */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003408 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003409 if (!id || !id->prefix) {
3410 /* This code is walking through the appAtts array, dealing
3411 * with (in this case) a prefixed attribute name. To be in
3412 * the array, the attribute must have already been bound, so
3413 * has to have passed through the hash table lookup once
3414 * already. That implies that an entry for it already
3415 * exists, so the lookup above will return a pointer to
3416 * already allocated memory. There is no opportunaity for
3417 * the allocator to fail, so the condition above cannot be
3418 * fulfilled.
3419 *
3420 * Since it is difficult to be certain that the above
3421 * analysis is complete, we retain the test and merely
3422 * remove the code from coverage tests.
3423 */
3424 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3425 }
Fred Drake08317ae2003-10-21 15:38:55 +00003426 b = id->prefix->binding;
3427 if (!b)
3428 return XML_ERROR_UNBOUND_PREFIX;
3429
Fred Drake08317ae2003-10-21 15:38:55 +00003430 for (j = 0; j < b->uriLen; j++) {
3431 const XML_Char c = b->uri[j];
3432 if (!poolAppendChar(&tempPool, c))
3433 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003434 }
Victor Stinner5ff71322017-06-21 14:39:22 +02003435
3436 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3437
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003438 while (*s++ != XML_T(ASCII_COLON))
Fred Drake08317ae2003-10-21 15:38:55 +00003439 ;
Victor Stinner5ff71322017-06-21 14:39:22 +02003440
3441 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3442
Fred Drake08317ae2003-10-21 15:38:55 +00003443 do { /* copies null terminator */
Fred Drake08317ae2003-10-21 15:38:55 +00003444 if (!poolAppendChar(&tempPool, *s))
3445 return XML_ERROR_NO_MEMORY;
Fred Drake08317ae2003-10-21 15:38:55 +00003446 } while (*s++);
3447
Victor Stinner5ff71322017-06-21 14:39:22 +02003448 uriHash = (unsigned long)sip24_final(&sip_state);
3449
Fred Drake08317ae2003-10-21 15:38:55 +00003450 { /* Check hash table for duplicate of expanded name (uriName).
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07003451 Derived from code in lookup(parser, HASH_TABLE *table, ...).
Fred Drake08317ae2003-10-21 15:38:55 +00003452 */
3453 unsigned char step = 0;
3454 unsigned long mask = nsAttsSize - 1;
3455 j = uriHash & mask; /* index into hash table */
3456 while (nsAtts[j].version == version) {
3457 /* for speed we compare stored hash values first */
3458 if (uriHash == nsAtts[j].hash) {
3459 const XML_Char *s1 = poolStart(&tempPool);
3460 const XML_Char *s2 = nsAtts[j].uriName;
3461 /* s1 is null terminated, but not s2 */
3462 for (; *s1 == *s2 && *s1 != 0; s1++, s2++);
3463 if (*s1 == 0)
3464 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3465 }
3466 if (!step)
3467 step = PROBE_STEP(uriHash, mask, nsAttsPower);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003468 j < step ? (j += nsAttsSize - step) : (j -= step);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003469 }
Fred Drake08317ae2003-10-21 15:38:55 +00003470 }
3471
3472 if (ns_triplets) { /* append namespace separator and prefix */
3473 tempPool.ptr[-1] = namespaceSeparator;
3474 s = b->prefix->name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003475 do {
3476 if (!poolAppendChar(&tempPool, *s))
3477 return XML_ERROR_NO_MEMORY;
3478 } while (*s++);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003479 }
Fred Drake08317ae2003-10-21 15:38:55 +00003480
3481 /* store expanded name in attribute list */
3482 s = poolStart(&tempPool);
3483 poolFinish(&tempPool);
3484 appAtts[i] = s;
3485
3486 /* fill empty slot with new version, uriName and hash value */
3487 nsAtts[j].version = version;
3488 nsAtts[j].hash = uriHash;
3489 nsAtts[j].uriName = s;
3490
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003491 if (!--nPrefixes) {
3492 i += 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003493 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003494 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003495 }
Fred Drake08317ae2003-10-21 15:38:55 +00003496 else /* not prefixed */
3497 ((XML_Char *)s)[-1] = 0; /* clear flag */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003498 }
3499 }
Fred Drake08317ae2003-10-21 15:38:55 +00003500 /* clear flags for the remaining attributes */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003501 for (; i < attIndex; i += 2)
3502 ((XML_Char *)(appAtts[i]))[-1] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003503 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3504 binding->attId->name[-1] = 0;
Fred Drake4faea012003-01-28 06:42:40 +00003505
Fred Drake08317ae2003-10-21 15:38:55 +00003506 if (!ns)
3507 return XML_ERROR_NONE;
3508
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003509 /* expand the element type name */
3510 if (elementType->prefix) {
3511 binding = elementType->prefix->binding;
3512 if (!binding)
Fred Drake08317ae2003-10-21 15:38:55 +00003513 return XML_ERROR_UNBOUND_PREFIX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003514 localPart = tagNamePtr->str;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003515 while (*localPart++ != XML_T(ASCII_COLON))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003516 ;
3517 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003518 else if (dtd->defaultPrefix.binding) {
3519 binding = dtd->defaultPrefix.binding;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003520 localPart = tagNamePtr->str;
3521 }
3522 else
3523 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003524 prefixLen = 0;
Fred Drake08317ae2003-10-21 15:38:55 +00003525 if (ns_triplets && binding->prefix->name) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003526 for (; binding->prefix->name[prefixLen++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003527 ; /* prefixLen includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003528 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003529 tagNamePtr->localPart = localPart;
3530 tagNamePtr->uriLen = binding->uriLen;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003531 tagNamePtr->prefix = binding->prefix->name;
3532 tagNamePtr->prefixLen = prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003533 for (i = 0; localPart[i++];)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003534 ; /* i includes null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003535 n = i + binding->uriLen + prefixLen;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003536 if (n > binding->uriAlloc) {
3537 TAG *p;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003538 uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003539 if (!uri)
3540 return XML_ERROR_NO_MEMORY;
3541 binding->uriAlloc = n + EXPAND_SPARE;
3542 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3543 for (p = tagStack; p; p = p->parent)
3544 if (p->name.str == binding->uri)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003545 p->name.str = uri;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003546 FREE(binding->uri);
3547 binding->uri = uri;
3548 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003549 /* if namespaceSeparator != '\0' then uri includes it already */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003550 uri = binding->uri + binding->uriLen;
3551 memcpy(uri, localPart, i * sizeof(XML_Char));
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003552 /* we always have a namespace separator between localPart and prefix */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003553 if (prefixLen) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003554 uri += i - 1;
3555 *uri = namespaceSeparator; /* replace null terminator */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003556 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3557 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003558 tagNamePtr->str = binding->uri;
3559 return XML_ERROR_NONE;
3560}
3561
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003562/* addBinding() overwrites the value of prefix->binding without checking.
3563 Therefore one must keep track of the old value outside of addBinding().
3564*/
3565static enum XML_Error
3566addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3567 const XML_Char *uri, BINDING **bindingsPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003568{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003569 static const XML_Char xmlNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003570 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3571 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3572 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
3573 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
3574 ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
3575 ASCII_e, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003576 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003577 static const int xmlLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003578 (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
3579 static const XML_Char xmlnsNamespace[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003580 ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
3581 ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
3582 ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
3583 ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
3584 ASCII_SLASH, '\0'
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003585 };
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003586 static const int xmlnsLen =
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003587 (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
3588
3589 XML_Bool mustBeXML = XML_FALSE;
3590 XML_Bool isXML = XML_TRUE;
3591 XML_Bool isXMLNS = XML_TRUE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003592
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003593 BINDING *b;
3594 int len;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003595
Fred Drake31d485c2004-08-03 07:06:22 +00003596 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003597 if (*uri == XML_T('\0') && prefix->name)
Fred Drake31d485c2004-08-03 07:06:22 +00003598 return XML_ERROR_UNDECLARING_PREFIX;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003599
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003600 if (prefix->name
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003601 && prefix->name[0] == XML_T(ASCII_x)
3602 && prefix->name[1] == XML_T(ASCII_m)
3603 && prefix->name[2] == XML_T(ASCII_l)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003604
3605 /* Not allowed to bind xmlns */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003606 if (prefix->name[3] == XML_T(ASCII_n)
3607 && prefix->name[4] == XML_T(ASCII_s)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003608 && prefix->name[5] == XML_T('\0'))
3609 return XML_ERROR_RESERVED_PREFIX_XMLNS;
3610
3611 if (prefix->name[3] == XML_T('\0'))
3612 mustBeXML = XML_TRUE;
3613 }
3614
3615 for (len = 0; uri[len]; len++) {
3616 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
3617 isXML = XML_FALSE;
3618
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003619 if (!mustBeXML && isXMLNS
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003620 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
3621 isXMLNS = XML_FALSE;
3622 }
3623 isXML = isXML && len == xmlLen;
3624 isXMLNS = isXMLNS && len == xmlnsLen;
3625
3626 if (mustBeXML != isXML)
3627 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
3628 : XML_ERROR_RESERVED_NAMESPACE_URI;
3629
3630 if (isXMLNS)
3631 return XML_ERROR_RESERVED_NAMESPACE_URI;
3632
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003633 if (namespaceSeparator)
3634 len++;
3635 if (freeBindingList) {
3636 b = freeBindingList;
3637 if (len > b->uriAlloc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003638 XML_Char *temp = (XML_Char *)REALLOC(b->uri,
3639 sizeof(XML_Char) * (len + EXPAND_SPARE));
3640 if (temp == NULL)
3641 return XML_ERROR_NO_MEMORY;
3642 b->uri = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003643 b->uriAlloc = len + EXPAND_SPARE;
3644 }
3645 freeBindingList = b->nextTagBinding;
3646 }
3647 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003648 b = (BINDING *)MALLOC(sizeof(BINDING));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003649 if (!b)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003650 return XML_ERROR_NO_MEMORY;
3651 b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003652 if (!b->uri) {
3653 FREE(b);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003654 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003655 }
3656 b->uriAlloc = len + EXPAND_SPARE;
3657 }
3658 b->uriLen = len;
3659 memcpy(b->uri, uri, len * sizeof(XML_Char));
3660 if (namespaceSeparator)
3661 b->uri[len - 1] = namespaceSeparator;
3662 b->prefix = prefix;
3663 b->attId = attId;
3664 b->prevPrefixBinding = prefix->binding;
Fred Drake08317ae2003-10-21 15:38:55 +00003665 /* NULL binding when default namespace undeclared */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003666 if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix)
3667 prefix->binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003668 else
3669 prefix->binding = b;
3670 b->nextTagBinding = *bindingsPtr;
3671 *bindingsPtr = b;
Fred Drake31d485c2004-08-03 07:06:22 +00003672 /* if attId == NULL then we are not starting a namespace scope */
3673 if (attId && startNamespaceDeclHandler)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003674 startNamespaceDeclHandler(handlerArg, prefix->name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003675 prefix->binding ? uri : 0);
3676 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003677}
3678
3679/* The idea here is to avoid using stack for each CDATA section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003680 the whole file is parsed with one call.
3681*/
3682static enum XML_Error PTRCALL
3683cdataSectionProcessor(XML_Parser parser,
3684 const char *start,
3685 const char *end,
3686 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003687{
Fred Drake31d485c2004-08-03 07:06:22 +00003688 enum XML_Error result = doCdataSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003689 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003690 if (result != XML_ERROR_NONE)
3691 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003692 if (start) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003693 if (parentParser) { /* we are parsing an external entity */
3694 processor = externalEntityContentProcessor;
3695 return externalEntityContentProcessor(parser, start, end, endPtr);
3696 }
3697 else {
3698 processor = contentProcessor;
3699 return contentProcessor(parser, start, end, endPtr);
3700 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003701 }
3702 return result;
3703}
3704
Fred Drake31d485c2004-08-03 07:06:22 +00003705/* startPtr gets set to non-null if the section is closed, and to null if
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003706 the section is not yet closed.
3707*/
3708static enum XML_Error
3709doCdataSection(XML_Parser parser,
3710 const ENCODING *enc,
3711 const char **startPtr,
3712 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003713 const char **nextPtr,
3714 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003715{
3716 const char *s = *startPtr;
3717 const char **eventPP;
3718 const char **eventEndPP;
3719 if (enc == encoding) {
3720 eventPP = &eventPtr;
3721 *eventPP = s;
3722 eventEndPP = &eventEndPtr;
3723 }
3724 else {
3725 eventPP = &(openInternalEntities->internalEventPtr);
3726 eventEndPP = &(openInternalEntities->internalEventEndPtr);
3727 }
3728 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003729 *startPtr = NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00003730
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003731 for (;;) {
3732 const char *next;
3733 int tok = XmlCdataSectionTok(enc, s, end, &next);
3734 *eventEndPP = next;
3735 switch (tok) {
3736 case XML_TOK_CDATA_SECT_CLOSE:
3737 if (endCdataSectionHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003738 endCdataSectionHandler(handlerArg);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003739#if 0
3740 /* see comment under XML_TOK_CDATA_SECT_OPEN */
3741 else if (characterDataHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003742 characterDataHandler(handlerArg, dataBuf, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003743#endif
3744 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003745 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003746 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003747 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003748 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003749 return XML_ERROR_ABORTED;
3750 else
3751 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003752 case XML_TOK_DATA_NEWLINE:
3753 if (characterDataHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003754 XML_Char c = 0xA;
3755 characterDataHandler(handlerArg, &c, 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003756 }
3757 else if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003758 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003759 break;
3760 case XML_TOK_DATA_CHARS:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003761 {
3762 XML_CharacterDataHandler charDataHandler = characterDataHandler;
3763 if (charDataHandler) {
3764 if (MUST_CONVERT(enc, s)) {
3765 for (;;) {
3766 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02003767 const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003768 *eventEndPP = next;
3769 charDataHandler(handlerArg, dataBuf,
3770 (int)(dataPtr - (ICHAR *)dataBuf));
Victor Stinner23ec4b52017-06-15 00:54:36 +02003771 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003772 break;
3773 *eventPP = s;
3774 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003775 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003776 else
3777 charDataHandler(handlerArg,
3778 (XML_Char *)s,
3779 (int)((XML_Char *)next - (XML_Char *)s));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003780 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003781 else if (defaultHandler)
3782 reportDefault(parser, enc, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003783 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003784 break;
3785 case XML_TOK_INVALID:
3786 *eventPP = next;
3787 return XML_ERROR_INVALID_TOKEN;
3788 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003789 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003790 *nextPtr = s;
3791 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003792 }
3793 return XML_ERROR_PARTIAL_CHAR;
3794 case XML_TOK_PARTIAL:
3795 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003796 if (haveMore) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003797 *nextPtr = s;
3798 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003799 }
3800 return XML_ERROR_UNCLOSED_CDATA_SECTION;
3801 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003802 /* Every token returned by XmlCdataSectionTok() has its own
3803 * explicit case, so this default case will never be executed.
3804 * We retain it as a safety net and exclude it from the coverage
3805 * statistics.
3806 *
3807 * LCOV_EXCL_START
3808 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003809 *eventPP = next;
3810 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003811 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003812 }
Fred Drake31d485c2004-08-03 07:06:22 +00003813
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003814 *eventPP = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003815 switch (ps_parsing) {
Fred Drake31d485c2004-08-03 07:06:22 +00003816 case XML_SUSPENDED:
3817 *nextPtr = next;
3818 return XML_ERROR_NONE;
3819 case XML_FINISHED:
3820 return XML_ERROR_ABORTED;
3821 default: ;
3822 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003823 }
3824 /* not reached */
3825}
3826
3827#ifdef XML_DTD
3828
3829/* The idea here is to avoid using stack for each IGNORE section when
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003830 the whole file is parsed with one call.
3831*/
3832static enum XML_Error PTRCALL
3833ignoreSectionProcessor(XML_Parser parser,
3834 const char *start,
3835 const char *end,
3836 const char **endPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003837{
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07003838 enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003839 endPtr, (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00003840 if (result != XML_ERROR_NONE)
3841 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003842 if (start) {
3843 processor = prologProcessor;
3844 return prologProcessor(parser, start, end, endPtr);
3845 }
3846 return result;
3847}
3848
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003849/* startPtr gets set to non-null is the section is closed, and to null
3850 if the section is not yet closed.
3851*/
3852static enum XML_Error
3853doIgnoreSection(XML_Parser parser,
3854 const ENCODING *enc,
3855 const char **startPtr,
3856 const char *end,
Fred Drake31d485c2004-08-03 07:06:22 +00003857 const char **nextPtr,
3858 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003859{
3860 const char *next;
3861 int tok;
3862 const char *s = *startPtr;
3863 const char **eventPP;
3864 const char **eventEndPP;
3865 if (enc == encoding) {
3866 eventPP = &eventPtr;
3867 *eventPP = s;
3868 eventEndPP = &eventEndPtr;
3869 }
3870 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02003871 /* It's not entirely clear, but it seems the following two lines
3872 * of code cannot be executed. The only occasions on which 'enc'
3873 * is not 'parser->m_encoding' are when this function is called
3874 * from the internal entity processing, and IGNORE sections are an
3875 * error in internal entities.
3876 *
3877 * Since it really isn't clear that this is true, we keep the code
3878 * and just remove it from our coverage tests.
3879 *
3880 * LCOV_EXCL_START
3881 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003882 eventPP = &(openInternalEntities->internalEventPtr);
3883 eventEndPP = &(openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02003884 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003885 }
3886 *eventPP = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003887 *startPtr = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003888 tok = XmlIgnoreSectionTok(enc, s, end, &next);
3889 *eventEndPP = next;
3890 switch (tok) {
3891 case XML_TOK_IGNORE_SECT:
3892 if (defaultHandler)
3893 reportDefault(parser, enc, s, next);
3894 *startPtr = next;
Fred Drake31d485c2004-08-03 07:06:22 +00003895 *nextPtr = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003896 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00003897 return XML_ERROR_ABORTED;
3898 else
3899 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003900 case XML_TOK_INVALID:
3901 *eventPP = next;
3902 return XML_ERROR_INVALID_TOKEN;
3903 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00003904 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003905 *nextPtr = s;
3906 return XML_ERROR_NONE;
3907 }
3908 return XML_ERROR_PARTIAL_CHAR;
3909 case XML_TOK_PARTIAL:
3910 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00003911 if (haveMore) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003912 *nextPtr = s;
3913 return XML_ERROR_NONE;
3914 }
3915 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
3916 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02003917 /* All of the tokens that XmlIgnoreSectionTok() returns have
3918 * explicit cases to handle them, so this default case is never
3919 * executed. We keep it as a safety net anyway, and remove it
3920 * from our test coverage statistics.
3921 *
3922 * LCOV_EXCL_START
3923 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003924 *eventPP = next;
3925 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02003926 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003927 }
3928 /* not reached */
3929}
3930
3931#endif /* XML_DTD */
3932
3933static enum XML_Error
3934initializeEncoding(XML_Parser parser)
3935{
3936 const char *s;
3937#ifdef XML_UNICODE
3938 char encodingBuf[128];
Victor Stinner93d0cb52017-08-18 23:43:54 +02003939 /* See comments abount `protoclEncodingName` in parserInit() */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003940 if (!protocolEncodingName)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003941 s = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003942 else {
3943 int i;
3944 for (i = 0; protocolEncodingName[i]; i++) {
3945 if (i == sizeof(encodingBuf) - 1
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003946 || (protocolEncodingName[i] & ~0x7f) != 0) {
3947 encodingBuf[0] = '\0';
3948 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003949 }
3950 encodingBuf[i] = (char)protocolEncodingName[i];
3951 }
3952 encodingBuf[i] = '\0';
3953 s = encodingBuf;
3954 }
3955#else
3956 s = protocolEncodingName;
3957#endif
3958 if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
3959 return XML_ERROR_NONE;
3960 return handleUnknownEncoding(parser, protocolEncodingName);
3961}
3962
3963static enum XML_Error
3964processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003965 const char *s, const char *next)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003966{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003967 const char *encodingName = NULL;
3968 const XML_Char *storedEncName = NULL;
3969 const ENCODING *newEncoding = NULL;
3970 const char *version = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003971 const char *versionend;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003972 const XML_Char *storedversion = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003973 int standalone = -1;
3974 if (!(ns
3975 ? XmlParseXmlDeclNS
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003976 : XmlParseXmlDecl)(isGeneralTextEntity,
3977 encoding,
3978 s,
3979 next,
3980 &eventPtr,
3981 &version,
3982 &versionend,
3983 &encodingName,
3984 &newEncoding,
Fred Drake31d485c2004-08-03 07:06:22 +00003985 &standalone)) {
3986 if (isGeneralTextEntity)
3987 return XML_ERROR_TEXT_DECL;
3988 else
3989 return XML_ERROR_XML_DECL;
3990 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003991 if (!isGeneralTextEntity && standalone == 1) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003992 _dtd->standalone = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00003993#ifdef XML_DTD
3994 if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
3995 paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
3996#endif /* XML_DTD */
3997 }
3998 if (xmlDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00003999 if (encodingName != NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004000 storedEncName = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004001 encoding,
4002 encodingName,
4003 encodingName
4004 + XmlNameLength(encoding, encodingName));
4005 if (!storedEncName)
4006 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004007 poolFinish(&temp2Pool);
4008 }
4009 if (version) {
4010 storedversion = poolStoreString(&temp2Pool,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004011 encoding,
4012 version,
4013 versionend - encoding->minBytesPerChar);
4014 if (!storedversion)
4015 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004016 }
4017 xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone);
4018 }
4019 else if (defaultHandler)
4020 reportDefault(parser, encoding, s, next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004021 if (protocolEncodingName == NULL) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004022 if (newEncoding) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02004023 /* Check that the specified encoding does not conflict with what
4024 * the parser has already deduced. Do we have the same number
4025 * of bytes in the smallest representation of a character? If
4026 * this is UTF-16, is it the same endianness?
4027 */
4028 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar
4029 || (newEncoding->minBytesPerChar == 2 &&
4030 newEncoding != encoding)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004031 eventPtr = encodingName;
4032 return XML_ERROR_INCORRECT_ENCODING;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004033 }
4034 encoding = newEncoding;
4035 }
4036 else if (encodingName) {
4037 enum XML_Error result;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004038 if (!storedEncName) {
4039 storedEncName = poolStoreString(
4040 &temp2Pool, encoding, encodingName,
4041 encodingName + XmlNameLength(encoding, encodingName));
4042 if (!storedEncName)
4043 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004044 }
4045 result = handleUnknownEncoding(parser, storedEncName);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004046 poolClear(&temp2Pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004047 if (result == XML_ERROR_UNKNOWN_ENCODING)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004048 eventPtr = encodingName;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004049 return result;
4050 }
4051 }
4052
4053 if (storedEncName || storedversion)
4054 poolClear(&temp2Pool);
4055
4056 return XML_ERROR_NONE;
4057}
4058
4059static enum XML_Error
4060handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
4061{
4062 if (unknownEncodingHandler) {
4063 XML_Encoding info;
4064 int i;
4065 for (i = 0; i < 256; i++)
4066 info.map[i] = -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004067 info.convert = NULL;
4068 info.data = NULL;
4069 info.release = NULL;
4070 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName,
4071 &info)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004072 ENCODING *enc;
4073 unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding());
4074 if (!unknownEncodingMem) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004075 if (info.release)
4076 info.release(info.data);
4077 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004078 }
4079 enc = (ns
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004080 ? XmlInitUnknownEncodingNS
4081 : XmlInitUnknownEncoding)(unknownEncodingMem,
4082 info.map,
4083 info.convert,
4084 info.data);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004085 if (enc) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004086 unknownEncodingData = info.data;
4087 unknownEncodingRelease = info.release;
4088 encoding = enc;
4089 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004090 }
4091 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004092 if (info.release != NULL)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004093 info.release(info.data);
4094 }
4095 return XML_ERROR_UNKNOWN_ENCODING;
4096}
4097
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004098static enum XML_Error PTRCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004099prologInitProcessor(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004100 const char *s,
4101 const char *end,
4102 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004103{
4104 enum XML_Error result = initializeEncoding(parser);
4105 if (result != XML_ERROR_NONE)
4106 return result;
4107 processor = prologProcessor;
4108 return prologProcessor(parser, s, end, nextPtr);
4109}
4110
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004111#ifdef XML_DTD
4112
4113static enum XML_Error PTRCALL
4114externalParEntInitProcessor(XML_Parser parser,
4115 const char *s,
4116 const char *end,
4117 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004118{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004119 enum XML_Error result = initializeEncoding(parser);
4120 if (result != XML_ERROR_NONE)
4121 return result;
4122
4123 /* we know now that XML_Parse(Buffer) has been called,
4124 so we consider the external parameter entity read */
4125 _dtd->paramEntityRead = XML_TRUE;
4126
4127 if (prologState.inEntityValue) {
4128 processor = entityValueInitProcessor;
4129 return entityValueInitProcessor(parser, s, end, nextPtr);
4130 }
4131 else {
4132 processor = externalParEntProcessor;
4133 return externalParEntProcessor(parser, s, end, nextPtr);
4134 }
4135}
4136
4137static enum XML_Error PTRCALL
4138entityValueInitProcessor(XML_Parser parser,
4139 const char *s,
4140 const char *end,
4141 const char **nextPtr)
4142{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004143 int tok;
Fred Drake31d485c2004-08-03 07:06:22 +00004144 const char *start = s;
4145 const char *next = start;
4146 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004147
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004148 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004149 tok = XmlPrologTok(encoding, start, end, &next);
Fred Drake31d485c2004-08-03 07:06:22 +00004150 eventEndPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004151 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004152 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Fred Drake31d485c2004-08-03 07:06:22 +00004153 *nextPtr = s;
4154 return XML_ERROR_NONE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004155 }
4156 switch (tok) {
4157 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004158 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004159 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004160 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004161 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004162 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004163 case XML_TOK_NONE: /* start == end */
4164 default:
4165 break;
4166 }
Fred Drake31d485c2004-08-03 07:06:22 +00004167 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004168 return storeEntityValue(parser, encoding, s, end);
4169 }
4170 else if (tok == XML_TOK_XML_DECL) {
Fred Drake31d485c2004-08-03 07:06:22 +00004171 enum XML_Error result;
4172 result = processXmlDecl(parser, 0, start, next);
4173 if (result != XML_ERROR_NONE)
4174 return result;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004175 /* At this point, ps_parsing cannot be XML_SUSPENDED. For that
4176 * to happen, a parameter entity parsing handler must have
4177 * attempted to suspend the parser, which fails and raises an
4178 * error. The parser can be aborted, but can't be suspended.
4179 */
4180 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00004181 return XML_ERROR_ABORTED;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004182 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004183 /* stop scanning for text declaration - we found one */
4184 processor = entityValueProcessor;
4185 return entityValueProcessor(parser, next, end, nextPtr);
4186 }
4187 /* If we are at the end of the buffer, this would cause XmlPrologTok to
4188 return XML_TOK_NONE on the next call, which would then cause the
4189 function to exit with *nextPtr set to s - that is what we want for other
4190 tokens, but not for the BOM - we would rather like to skip it;
4191 then, when this routine is entered the next time, XmlPrologTok will
4192 return XML_TOK_INVALID, since the BOM is still in the buffer
4193 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004194 else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004195 *nextPtr = next;
4196 return XML_ERROR_NONE;
4197 }
Victor Stinner5ff71322017-06-21 14:39:22 +02004198 /* If we get this token, we have the start of what might be a
4199 normal tag, but not a declaration (i.e. it doesn't begin with
4200 "<!"). In a DTD context, that isn't legal.
4201 */
4202 else if (tok == XML_TOK_INSTANCE_START) {
4203 *nextPtr = next;
4204 return XML_ERROR_SYNTAX;
4205 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004206 start = next;
Fred Drake31d485c2004-08-03 07:06:22 +00004207 eventPtr = start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004208 }
4209}
4210
4211static enum XML_Error PTRCALL
4212externalParEntProcessor(XML_Parser parser,
4213 const char *s,
4214 const char *end,
4215 const char **nextPtr)
4216{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004217 const char *next = s;
4218 int tok;
4219
Fred Drake31d485c2004-08-03 07:06:22 +00004220 tok = XmlPrologTok(encoding, s, end, &next);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004221 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004222 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004223 *nextPtr = s;
4224 return XML_ERROR_NONE;
4225 }
4226 switch (tok) {
4227 case XML_TOK_INVALID:
4228 return XML_ERROR_INVALID_TOKEN;
4229 case XML_TOK_PARTIAL:
4230 return XML_ERROR_UNCLOSED_TOKEN;
4231 case XML_TOK_PARTIAL_CHAR:
4232 return XML_ERROR_PARTIAL_CHAR;
4233 case XML_TOK_NONE: /* start == end */
4234 default:
4235 break;
4236 }
4237 }
4238 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4239 However, when parsing an external subset, doProlog will not accept a BOM
4240 as valid, and report a syntax error, so we have to skip the BOM
4241 */
4242 else if (tok == XML_TOK_BOM) {
4243 s = next;
4244 tok = XmlPrologTok(encoding, s, end, &next);
4245 }
4246
4247 processor = prologProcessor;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004248 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004249 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004250}
4251
4252static enum XML_Error PTRCALL
4253entityValueProcessor(XML_Parser parser,
4254 const char *s,
4255 const char *end,
4256 const char **nextPtr)
4257{
4258 const char *start = s;
4259 const char *next = s;
4260 const ENCODING *enc = encoding;
4261 int tok;
4262
4263 for (;;) {
4264 tok = XmlPrologTok(enc, start, end, &next);
4265 if (tok <= 0) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004266 if (!ps_finalBuffer && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004267 *nextPtr = s;
4268 return XML_ERROR_NONE;
4269 }
4270 switch (tok) {
4271 case XML_TOK_INVALID:
Fred Drake31d485c2004-08-03 07:06:22 +00004272 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004273 case XML_TOK_PARTIAL:
Fred Drake31d485c2004-08-03 07:06:22 +00004274 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004275 case XML_TOK_PARTIAL_CHAR:
Fred Drake31d485c2004-08-03 07:06:22 +00004276 return XML_ERROR_PARTIAL_CHAR;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004277 case XML_TOK_NONE: /* start == end */
4278 default:
4279 break;
4280 }
Fred Drake31d485c2004-08-03 07:06:22 +00004281 /* found end of entity value - can store it now */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004282 return storeEntityValue(parser, enc, s, end);
4283 }
4284 start = next;
4285 }
4286}
4287
4288#endif /* XML_DTD */
4289
4290static enum XML_Error PTRCALL
4291prologProcessor(XML_Parser parser,
4292 const char *s,
4293 const char *end,
4294 const char **nextPtr)
4295{
4296 const char *next = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004297 int tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004298 return doProlog(parser, encoding, s, end, tok, next,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004299 nextPtr, (XML_Bool)!ps_finalBuffer);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004300}
4301
4302static enum XML_Error
4303doProlog(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004304 const ENCODING *enc,
4305 const char *s,
4306 const char *end,
4307 int tok,
4308 const char *next,
Fred Drake31d485c2004-08-03 07:06:22 +00004309 const char **nextPtr,
4310 XML_Bool haveMore)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004311{
4312#ifdef XML_DTD
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004313 static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004314#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004315 static const XML_Char atypeCDATA[] =
4316 { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
4317 static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
4318 static const XML_Char atypeIDREF[] =
4319 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
4320 static const XML_Char atypeIDREFS[] =
4321 { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
4322 static const XML_Char atypeENTITY[] =
4323 { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
4324 static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
4325 ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004326 static const XML_Char atypeNMTOKEN[] = {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004327 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
4328 static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
4329 ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
4330 static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
4331 ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
4332 static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
4333 static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004334
Fred Drake31d485c2004-08-03 07:06:22 +00004335 /* save one level of indirection */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004336 DTD * const dtd = _dtd;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004337
4338 const char **eventPP;
4339 const char **eventEndPP;
4340 enum XML_Content_Quant quant;
4341
4342 if (enc == encoding) {
4343 eventPP = &eventPtr;
4344 eventEndPP = &eventEndPtr;
4345 }
4346 else {
4347 eventPP = &(openInternalEntities->internalEventPtr);
4348 eventEndPP = &(openInternalEntities->internalEventEndPtr);
4349 }
Fred Drake31d485c2004-08-03 07:06:22 +00004350
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004351 for (;;) {
4352 int role;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004353 XML_Bool handleDefault = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004354 *eventPP = s;
4355 *eventEndPP = next;
4356 if (tok <= 0) {
Fred Drake31d485c2004-08-03 07:06:22 +00004357 if (haveMore && tok != XML_TOK_INVALID) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004358 *nextPtr = s;
4359 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004360 }
4361 switch (tok) {
4362 case XML_TOK_INVALID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004363 *eventPP = next;
4364 return XML_ERROR_INVALID_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004365 case XML_TOK_PARTIAL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004366 return XML_ERROR_UNCLOSED_TOKEN;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004367 case XML_TOK_PARTIAL_CHAR:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004368 return XML_ERROR_PARTIAL_CHAR;
Matthias Klose865e33b2010-01-22 01:13:15 +00004369 case -XML_TOK_PROLOG_S:
4370 tok = -tok;
4371 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004372 case XML_TOK_NONE:
4373#ifdef XML_DTD
Fred Drake31d485c2004-08-03 07:06:22 +00004374 /* for internal PE NOT referenced between declarations */
4375 if (enc != encoding && !openInternalEntities->betweenDecl) {
4376 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004377 return XML_ERROR_NONE;
Fred Drake31d485c2004-08-03 07:06:22 +00004378 }
4379 /* WFC: PE Between Declarations - must check that PE contains
4380 complete markup, not only for external PEs, but also for
4381 internal PEs if the reference occurs between declarations.
4382 */
4383 if (isParamEntity || enc != encoding) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004384 if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
4385 == XML_ROLE_ERROR)
Fred Drake31d485c2004-08-03 07:06:22 +00004386 return XML_ERROR_INCOMPLETE_PE;
4387 *nextPtr = s;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004388 return XML_ERROR_NONE;
4389 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004390#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004391 return XML_ERROR_NO_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004392 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004393 tok = -tok;
4394 next = end;
4395 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004396 }
4397 }
4398 role = XmlTokenRole(&prologState, tok, s, next, enc);
4399 switch (role) {
4400 case XML_ROLE_XML_DECL:
4401 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004402 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4403 if (result != XML_ERROR_NONE)
4404 return result;
4405 enc = encoding;
4406 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004407 }
4408 break;
4409 case XML_ROLE_DOCTYPE_NAME:
4410 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004411 doctypeName = poolStoreString(&tempPool, enc, s, next);
4412 if (!doctypeName)
4413 return XML_ERROR_NO_MEMORY;
4414 poolFinish(&tempPool);
4415 doctypePubid = NULL;
4416 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004417 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004418 doctypeSysid = NULL; /* always initialize to NULL */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004419 break;
4420 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4421 if (startDoctypeDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004422 startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid,
4423 doctypePubid, 1);
4424 doctypeName = NULL;
4425 poolClear(&tempPool);
4426 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004427 }
4428 break;
4429#ifdef XML_DTD
4430 case XML_ROLE_TEXT_DECL:
4431 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004432 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4433 if (result != XML_ERROR_NONE)
4434 return result;
4435 enc = encoding;
4436 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004437 }
4438 break;
4439#endif /* XML_DTD */
4440 case XML_ROLE_DOCTYPE_PUBLIC_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004441#ifdef XML_DTD
4442 useForeignDTD = XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004443 declEntity = (ENTITY *)lookup(parser,
4444 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004445 externalSubsetName,
4446 sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004447 if (!declEntity)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004448 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004449#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00004450 dtd->hasParamEntityRefs = XML_TRUE;
4451 if (startDoctypeDeclHandler) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004452 XML_Char *pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004453 if (!XmlIsPublicId(enc, s, next, eventPP))
4454 return XML_ERROR_PUBLICID;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004455 pubId = poolStoreString(&tempPool, enc,
4456 s + enc->minBytesPerChar,
4457 next - enc->minBytesPerChar);
4458 if (!pubId)
Fred Drake31d485c2004-08-03 07:06:22 +00004459 return XML_ERROR_NO_MEMORY;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004460 normalizePublicId(pubId);
Fred Drake31d485c2004-08-03 07:06:22 +00004461 poolFinish(&tempPool);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004462 doctypePubid = pubId;
Fred Drake31d485c2004-08-03 07:06:22 +00004463 handleDefault = XML_FALSE;
4464 goto alreadyChecked;
4465 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004466 /* fall through */
4467 case XML_ROLE_ENTITY_PUBLIC_ID:
4468 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004469 return XML_ERROR_PUBLICID;
4470 alreadyChecked:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004471 if (dtd->keepProcessing && declEntity) {
4472 XML_Char *tem = poolStoreString(&dtd->pool,
4473 enc,
4474 s + enc->minBytesPerChar,
4475 next - enc->minBytesPerChar);
4476 if (!tem)
4477 return XML_ERROR_NO_MEMORY;
4478 normalizePublicId(tem);
4479 declEntity->publicId = tem;
4480 poolFinish(&dtd->pool);
4481 if (entityDeclHandler)
4482 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004483 }
4484 break;
4485 case XML_ROLE_DOCTYPE_CLOSE:
4486 if (doctypeName) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004487 startDoctypeDeclHandler(handlerArg, doctypeName,
4488 doctypeSysid, doctypePubid, 0);
4489 poolClear(&tempPool);
4490 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004491 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004492 /* doctypeSysid will be non-NULL in the case of a previous
4493 XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler
4494 was not set, indicating an external subset
4495 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004496#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004497 if (doctypeSysid || useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004498 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
4499 dtd->hasParamEntityRefs = XML_TRUE;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004500 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004501 ENTITY *entity = (ENTITY *)lookup(parser,
4502 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004503 externalSubsetName,
4504 sizeof(ENTITY));
Victor Stinner93d0cb52017-08-18 23:43:54 +02004505 if (!entity) {
4506 /* The external subset name "#" will have already been
4507 * inserted into the hash table at the start of the
4508 * external entity parsing, so no allocation will happen
4509 * and lookup() cannot fail.
4510 */
4511 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
4512 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004513 if (useForeignDTD)
4514 entity->base = curBase;
4515 dtd->paramEntityRead = XML_FALSE;
4516 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4517 0,
4518 entity->base,
4519 entity->systemId,
4520 entity->publicId))
4521 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004522 if (dtd->paramEntityRead) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004523 if (!dtd->standalone &&
4524 notStandaloneHandler &&
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004525 !notStandaloneHandler(handlerArg))
4526 return XML_ERROR_NOT_STANDALONE;
4527 }
4528 /* if we didn't read the foreign DTD then this means that there
4529 is no external subset and we must reset dtd->hasParamEntityRefs
4530 */
4531 else if (!doctypeSysid)
4532 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004533 /* end of DTD - no need to update dtd->keepProcessing */
4534 }
4535 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004536 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004537#endif /* XML_DTD */
4538 if (endDoctypeDeclHandler) {
4539 endDoctypeDeclHandler(handlerArg);
4540 handleDefault = XML_FALSE;
4541 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004542 break;
4543 case XML_ROLE_INSTANCE_START:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004544#ifdef XML_DTD
4545 /* if there is no DOCTYPE declaration then now is the
4546 last chance to read the foreign DTD
4547 */
4548 if (useForeignDTD) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004549 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004550 dtd->hasParamEntityRefs = XML_TRUE;
4551 if (paramEntityParsing && externalEntityRefHandler) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004552 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004553 externalSubsetName,
4554 sizeof(ENTITY));
4555 if (!entity)
4556 return XML_ERROR_NO_MEMORY;
4557 entity->base = curBase;
4558 dtd->paramEntityRead = XML_FALSE;
4559 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
4560 0,
4561 entity->base,
4562 entity->systemId,
4563 entity->publicId))
4564 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004565 if (dtd->paramEntityRead) {
4566 if (!dtd->standalone &&
4567 notStandaloneHandler &&
4568 !notStandaloneHandler(handlerArg))
4569 return XML_ERROR_NOT_STANDALONE;
4570 }
4571 /* if we didn't read the foreign DTD then this means that there
4572 is no external subset and we must reset dtd->hasParamEntityRefs
4573 */
4574 else
4575 dtd->hasParamEntityRefs = hadParamEntityRefs;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004576 /* end of DTD - no need to update dtd->keepProcessing */
4577 }
4578 }
4579#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004580 processor = contentProcessor;
4581 return contentProcessor(parser, s, end, nextPtr);
4582 case XML_ROLE_ATTLIST_ELEMENT_NAME:
4583 declElementType = getElementType(parser, enc, s, next);
4584 if (!declElementType)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004585 return XML_ERROR_NO_MEMORY;
4586 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004587 case XML_ROLE_ATTRIBUTE_NAME:
4588 declAttributeId = getAttributeId(parser, enc, s, next);
4589 if (!declAttributeId)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004590 return XML_ERROR_NO_MEMORY;
4591 declAttributeIsCdata = XML_FALSE;
4592 declAttributeType = NULL;
4593 declAttributeIsId = XML_FALSE;
4594 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004595 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004596 declAttributeIsCdata = XML_TRUE;
4597 declAttributeType = atypeCDATA;
4598 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004599 case XML_ROLE_ATTRIBUTE_TYPE_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004600 declAttributeIsId = XML_TRUE;
4601 declAttributeType = atypeID;
4602 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004603 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004604 declAttributeType = atypeIDREF;
4605 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004606 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004607 declAttributeType = atypeIDREFS;
4608 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004609 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004610 declAttributeType = atypeENTITY;
4611 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004612 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004613 declAttributeType = atypeENTITIES;
4614 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004615 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004616 declAttributeType = atypeNMTOKEN;
4617 goto checkAttListDeclHandler;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004618 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004619 declAttributeType = atypeNMTOKENS;
4620 checkAttListDeclHandler:
4621 if (dtd->keepProcessing && attlistDeclHandler)
4622 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004623 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004624 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
4625 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004626 if (dtd->keepProcessing && attlistDeclHandler) {
4627 const XML_Char *prefix;
4628 if (declAttributeType) {
4629 prefix = enumValueSep;
4630 }
4631 else {
4632 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE
4633 ? notationPrefix
4634 : enumValueStart);
4635 }
4636 if (!poolAppendString(&tempPool, prefix))
4637 return XML_ERROR_NO_MEMORY;
4638 if (!poolAppend(&tempPool, enc, s, next))
4639 return XML_ERROR_NO_MEMORY;
4640 declAttributeType = tempPool.start;
4641 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004642 }
4643 break;
4644 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
4645 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004646 if (dtd->keepProcessing) {
4647 if (!defineAttribute(declElementType, declAttributeId,
Fred Drake08317ae2003-10-21 15:38:55 +00004648 declAttributeIsCdata, declAttributeIsId,
4649 0, parser))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004650 return XML_ERROR_NO_MEMORY;
4651 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004652 if (*declAttributeType == XML_T(ASCII_LPAREN)
4653 || (*declAttributeType == XML_T(ASCII_N)
4654 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004655 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004656 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004657 || !poolAppendChar(&tempPool, XML_T('\0')))
4658 return XML_ERROR_NO_MEMORY;
4659 declAttributeType = tempPool.start;
4660 poolFinish(&tempPool);
4661 }
4662 *eventEndPP = s;
4663 attlistDeclHandler(handlerArg, declElementType->name,
4664 declAttributeId->name, declAttributeType,
4665 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
4666 poolClear(&tempPool);
4667 handleDefault = XML_FALSE;
4668 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004669 }
4670 break;
4671 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
4672 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004673 if (dtd->keepProcessing) {
4674 const XML_Char *attVal;
Fred Drake08317ae2003-10-21 15:38:55 +00004675 enum XML_Error result =
4676 storeAttributeValue(parser, enc, declAttributeIsCdata,
4677 s + enc->minBytesPerChar,
4678 next - enc->minBytesPerChar,
4679 &dtd->pool);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004680 if (result)
4681 return result;
4682 attVal = poolStart(&dtd->pool);
4683 poolFinish(&dtd->pool);
4684 /* ID attributes aren't allowed to have a default */
4685 if (!defineAttribute(declElementType, declAttributeId,
4686 declAttributeIsCdata, XML_FALSE, attVal, parser))
4687 return XML_ERROR_NO_MEMORY;
4688 if (attlistDeclHandler && declAttributeType) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004689 if (*declAttributeType == XML_T(ASCII_LPAREN)
4690 || (*declAttributeType == XML_T(ASCII_N)
4691 && declAttributeType[1] == XML_T(ASCII_O))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004692 /* Enumerated or Notation type */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004693 if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004694 || !poolAppendChar(&tempPool, XML_T('\0')))
4695 return XML_ERROR_NO_MEMORY;
4696 declAttributeType = tempPool.start;
4697 poolFinish(&tempPool);
4698 }
4699 *eventEndPP = s;
4700 attlistDeclHandler(handlerArg, declElementType->name,
4701 declAttributeId->name, declAttributeType,
4702 attVal,
4703 role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
4704 poolClear(&tempPool);
4705 handleDefault = XML_FALSE;
4706 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004707 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004708 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004709 case XML_ROLE_ENTITY_VALUE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004710 if (dtd->keepProcessing) {
4711 enum XML_Error result = storeEntityValue(parser, enc,
4712 s + enc->minBytesPerChar,
4713 next - enc->minBytesPerChar);
4714 if (declEntity) {
4715 declEntity->textPtr = poolStart(&dtd->entityValuePool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00004716 declEntity->textLen = (int)(poolLength(&dtd->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004717 poolFinish(&dtd->entityValuePool);
4718 if (entityDeclHandler) {
4719 *eventEndPP = s;
4720 entityDeclHandler(handlerArg,
4721 declEntity->name,
4722 declEntity->is_param,
4723 declEntity->textPtr,
4724 declEntity->textLen,
4725 curBase, 0, 0, 0);
4726 handleDefault = XML_FALSE;
4727 }
4728 }
4729 else
4730 poolDiscard(&dtd->entityValuePool);
4731 if (result != XML_ERROR_NONE)
4732 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004733 }
4734 break;
4735 case XML_ROLE_DOCTYPE_SYSTEM_ID:
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004736#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004737 useForeignDTD = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004738#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004739 dtd->hasParamEntityRefs = XML_TRUE;
4740 if (startDoctypeDeclHandler) {
4741 doctypeSysid = poolStoreString(&tempPool, enc,
4742 s + enc->minBytesPerChar,
4743 next - enc->minBytesPerChar);
4744 if (doctypeSysid == NULL)
4745 return XML_ERROR_NO_MEMORY;
4746 poolFinish(&tempPool);
4747 handleDefault = XML_FALSE;
4748 }
4749#ifdef XML_DTD
4750 else
4751 /* use externalSubsetName to make doctypeSysid non-NULL
4752 for the case where no startDoctypeDeclHandler is set */
4753 doctypeSysid = externalSubsetName;
4754#endif /* XML_DTD */
4755 if (!dtd->standalone
4756#ifdef XML_DTD
4757 && !paramEntityParsing
4758#endif /* XML_DTD */
4759 && notStandaloneHandler
4760 && !notStandaloneHandler(handlerArg))
4761 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004762#ifndef XML_DTD
4763 break;
4764#else /* XML_DTD */
4765 if (!declEntity) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004766 declEntity = (ENTITY *)lookup(parser,
4767 &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004768 externalSubsetName,
4769 sizeof(ENTITY));
4770 if (!declEntity)
4771 return XML_ERROR_NO_MEMORY;
4772 declEntity->publicId = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004773 }
4774 /* fall through */
4775#endif /* XML_DTD */
4776 case XML_ROLE_ENTITY_SYSTEM_ID:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004777 if (dtd->keepProcessing && declEntity) {
4778 declEntity->systemId = poolStoreString(&dtd->pool, enc,
4779 s + enc->minBytesPerChar,
4780 next - enc->minBytesPerChar);
4781 if (!declEntity->systemId)
4782 return XML_ERROR_NO_MEMORY;
4783 declEntity->base = curBase;
4784 poolFinish(&dtd->pool);
4785 if (entityDeclHandler)
4786 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004787 }
4788 break;
4789 case XML_ROLE_ENTITY_COMPLETE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004790 if (dtd->keepProcessing && declEntity && entityDeclHandler) {
4791 *eventEndPP = s;
4792 entityDeclHandler(handlerArg,
4793 declEntity->name,
4794 declEntity->is_param,
4795 0,0,
4796 declEntity->base,
4797 declEntity->systemId,
4798 declEntity->publicId,
4799 0);
4800 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004801 }
4802 break;
4803 case XML_ROLE_ENTITY_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004804 if (dtd->keepProcessing && declEntity) {
4805 declEntity->notation = poolStoreString(&dtd->pool, enc, s, next);
4806 if (!declEntity->notation)
4807 return XML_ERROR_NO_MEMORY;
4808 poolFinish(&dtd->pool);
4809 if (unparsedEntityDeclHandler) {
4810 *eventEndPP = s;
4811 unparsedEntityDeclHandler(handlerArg,
4812 declEntity->name,
4813 declEntity->base,
4814 declEntity->systemId,
4815 declEntity->publicId,
4816 declEntity->notation);
4817 handleDefault = XML_FALSE;
4818 }
4819 else if (entityDeclHandler) {
4820 *eventEndPP = s;
4821 entityDeclHandler(handlerArg,
4822 declEntity->name,
4823 0,0,0,
4824 declEntity->base,
4825 declEntity->systemId,
4826 declEntity->publicId,
4827 declEntity->notation);
4828 handleDefault = XML_FALSE;
4829 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004830 }
4831 break;
4832 case XML_ROLE_GENERAL_ENTITY_NAME:
4833 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004834 if (XmlPredefinedEntityName(enc, s, next)) {
4835 declEntity = NULL;
4836 break;
4837 }
4838 if (dtd->keepProcessing) {
4839 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4840 if (!name)
4841 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004842 declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004843 sizeof(ENTITY));
4844 if (!declEntity)
4845 return XML_ERROR_NO_MEMORY;
4846 if (declEntity->name != name) {
4847 poolDiscard(&dtd->pool);
4848 declEntity = NULL;
4849 }
4850 else {
4851 poolFinish(&dtd->pool);
4852 declEntity->publicId = NULL;
4853 declEntity->is_param = XML_FALSE;
4854 /* if we have a parent parser or are reading an internal parameter
4855 entity, then the entity declaration is not considered "internal"
4856 */
4857 declEntity->is_internal = !(parentParser || openInternalEntities);
4858 if (entityDeclHandler)
4859 handleDefault = XML_FALSE;
4860 }
4861 }
4862 else {
4863 poolDiscard(&dtd->pool);
4864 declEntity = NULL;
4865 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004866 }
4867 break;
4868 case XML_ROLE_PARAM_ENTITY_NAME:
4869#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004870 if (dtd->keepProcessing) {
4871 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
4872 if (!name)
4873 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07004874 declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004875 name, sizeof(ENTITY));
4876 if (!declEntity)
4877 return XML_ERROR_NO_MEMORY;
4878 if (declEntity->name != name) {
4879 poolDiscard(&dtd->pool);
4880 declEntity = NULL;
4881 }
4882 else {
4883 poolFinish(&dtd->pool);
4884 declEntity->publicId = NULL;
4885 declEntity->is_param = XML_TRUE;
4886 /* if we have a parent parser or are reading an internal parameter
4887 entity, then the entity declaration is not considered "internal"
4888 */
4889 declEntity->is_internal = !(parentParser || openInternalEntities);
4890 if (entityDeclHandler)
4891 handleDefault = XML_FALSE;
4892 }
4893 }
4894 else {
4895 poolDiscard(&dtd->pool);
4896 declEntity = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004897 }
4898#else /* not XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004899 declEntity = NULL;
4900#endif /* XML_DTD */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004901 break;
4902 case XML_ROLE_NOTATION_NAME:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004903 declNotationPublicId = NULL;
4904 declNotationName = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004905 if (notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004906 declNotationName = poolStoreString(&tempPool, enc, s, next);
4907 if (!declNotationName)
4908 return XML_ERROR_NO_MEMORY;
4909 poolFinish(&tempPool);
4910 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004911 }
4912 break;
4913 case XML_ROLE_NOTATION_PUBLIC_ID:
4914 if (!XmlIsPublicId(enc, s, next, eventPP))
Fred Drake31d485c2004-08-03 07:06:22 +00004915 return XML_ERROR_PUBLICID;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004916 if (declNotationName) { /* means notationDeclHandler != NULL */
4917 XML_Char *tem = poolStoreString(&tempPool,
4918 enc,
4919 s + enc->minBytesPerChar,
4920 next - enc->minBytesPerChar);
4921 if (!tem)
4922 return XML_ERROR_NO_MEMORY;
4923 normalizePublicId(tem);
4924 declNotationPublicId = tem;
4925 poolFinish(&tempPool);
4926 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004927 }
4928 break;
4929 case XML_ROLE_NOTATION_SYSTEM_ID:
4930 if (declNotationName && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004931 const XML_Char *systemId
4932 = poolStoreString(&tempPool, enc,
4933 s + enc->minBytesPerChar,
4934 next - enc->minBytesPerChar);
4935 if (!systemId)
4936 return XML_ERROR_NO_MEMORY;
4937 *eventEndPP = s;
4938 notationDeclHandler(handlerArg,
4939 declNotationName,
4940 curBase,
4941 systemId,
4942 declNotationPublicId);
4943 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004944 }
4945 poolClear(&tempPool);
4946 break;
4947 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
4948 if (declNotationPublicId && notationDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004949 *eventEndPP = s;
4950 notationDeclHandler(handlerArg,
4951 declNotationName,
4952 curBase,
4953 0,
4954 declNotationPublicId);
4955 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004956 }
4957 poolClear(&tempPool);
4958 break;
4959 case XML_ROLE_ERROR:
4960 switch (tok) {
4961 case XML_TOK_PARAM_ENTITY_REF:
Fred Drake31d485c2004-08-03 07:06:22 +00004962 /* PE references in internal subset are
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07004963 not allowed within declarations. */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004964 return XML_ERROR_PARAM_ENTITY_REF;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004965 case XML_TOK_XML_DECL:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004966 return XML_ERROR_MISPLACED_XML_PI;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004967 default:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004968 return XML_ERROR_SYNTAX;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004969 }
4970#ifdef XML_DTD
4971 case XML_ROLE_IGNORE_SECT:
4972 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004973 enum XML_Error result;
4974 if (defaultHandler)
4975 reportDefault(parser, enc, s, next);
4976 handleDefault = XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00004977 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
4978 if (result != XML_ERROR_NONE)
4979 return result;
4980 else if (!next) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004981 processor = ignoreSectionProcessor;
4982 return result;
4983 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00004984 }
4985 break;
4986#endif /* XML_DTD */
4987 case XML_ROLE_GROUP_OPEN:
4988 if (prologState.level >= groupSize) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004989 if (groupSize) {
4990 char *temp = (char *)REALLOC(groupConnector, groupSize *= 2);
Victor Stinner93d0cb52017-08-18 23:43:54 +02004991 if (temp == NULL) {
4992 groupSize /= 2;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004993 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02004994 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00004995 groupConnector = temp;
4996 if (dtd->scaffIndex) {
4997 int *temp = (int *)REALLOC(dtd->scaffIndex,
4998 groupSize * sizeof(int));
4999 if (temp == NULL)
5000 return XML_ERROR_NO_MEMORY;
5001 dtd->scaffIndex = temp;
5002 }
5003 }
5004 else {
5005 groupConnector = (char *)MALLOC(groupSize = 32);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005006 if (!groupConnector) {
5007 groupSize = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005008 return XML_ERROR_NO_MEMORY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005009 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005010 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005011 }
5012 groupConnector[prologState.level] = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005013 if (dtd->in_eldecl) {
5014 int myindex = nextScaffoldPart(parser);
5015 if (myindex < 0)
5016 return XML_ERROR_NO_MEMORY;
5017 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5018 dtd->scaffLevel++;
5019 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5020 if (elementDeclHandler)
5021 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005022 }
5023 break;
5024 case XML_ROLE_GROUP_SEQUENCE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005025 if (groupConnector[prologState.level] == ASCII_PIPE)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005026 return XML_ERROR_SYNTAX;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005027 groupConnector[prologState.level] = ASCII_COMMA;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005028 if (dtd->in_eldecl && elementDeclHandler)
5029 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005030 break;
5031 case XML_ROLE_GROUP_CHOICE:
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005032 if (groupConnector[prologState.level] == ASCII_COMMA)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005033 return XML_ERROR_SYNTAX;
5034 if (dtd->in_eldecl
5035 && !groupConnector[prologState.level]
5036 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5037 != XML_CTYPE_MIXED)
5038 ) {
5039 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5040 = XML_CTYPE_CHOICE;
5041 if (elementDeclHandler)
5042 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005043 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005044 groupConnector[prologState.level] = ASCII_PIPE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005045 break;
5046 case XML_ROLE_PARAM_ENTITY_REF:
5047#ifdef XML_DTD
5048 case XML_ROLE_INNER_PARAM_ENTITY_REF:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005049 dtd->hasParamEntityRefs = XML_TRUE;
5050 if (!paramEntityParsing)
5051 dtd->keepProcessing = dtd->standalone;
5052 else {
5053 const XML_Char *name;
5054 ENTITY *entity;
5055 name = poolStoreString(&dtd->pool, enc,
5056 s + enc->minBytesPerChar,
5057 next - enc->minBytesPerChar);
5058 if (!name)
5059 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005060 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005061 poolDiscard(&dtd->pool);
5062 /* first, determine if a check for an existing declaration is needed;
5063 if yes, check that the entity exists, and that it is internal,
5064 otherwise call the skipped entity handler
5065 */
5066 if (prologState.documentEntity &&
5067 (dtd->standalone
5068 ? !openInternalEntities
5069 : !dtd->hasParamEntityRefs)) {
5070 if (!entity)
5071 return XML_ERROR_UNDEFINED_ENTITY;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005072 else if (!entity->is_internal) {
5073 /* It's hard to exhaustively search the code to be sure,
5074 * but there doesn't seem to be a way of executing the
5075 * following line. There are two cases:
5076 *
5077 * If 'standalone' is false, the DTD must have no
5078 * parameter entities or we wouldn't have passed the outer
5079 * 'if' statement. That measn the only entity in the hash
5080 * table is the external subset name "#" which cannot be
5081 * given as a parameter entity name in XML syntax, so the
5082 * lookup must have returned NULL and we don't even reach
5083 * the test for an internal entity.
5084 *
5085 * If 'standalone' is true, it does not seem to be
5086 * possible to create entities taking this code path that
5087 * are not internal entities, so fail the test above.
5088 *
5089 * Because this analysis is very uncertain, the code is
5090 * being left in place and merely removed from the
5091 * coverage test statistics.
5092 */
5093 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5094 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005095 }
5096 else if (!entity) {
5097 dtd->keepProcessing = dtd->standalone;
5098 /* cannot report skipped entities in declarations */
5099 if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) {
5100 skippedEntityHandler(handlerArg, name, 1);
5101 handleDefault = XML_FALSE;
5102 }
5103 break;
5104 }
5105 if (entity->open)
5106 return XML_ERROR_RECURSIVE_ENTITY_REF;
5107 if (entity->textPtr) {
5108 enum XML_Error result;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005109 XML_Bool betweenDecl =
Fred Drake31d485c2004-08-03 07:06:22 +00005110 (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5111 result = processInternalEntity(parser, entity, betweenDecl);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005112 if (result != XML_ERROR_NONE)
5113 return result;
5114 handleDefault = XML_FALSE;
5115 break;
5116 }
5117 if (externalEntityRefHandler) {
5118 dtd->paramEntityRead = XML_FALSE;
5119 entity->open = XML_TRUE;
5120 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5121 0,
5122 entity->base,
5123 entity->systemId,
5124 entity->publicId)) {
5125 entity->open = XML_FALSE;
5126 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5127 }
5128 entity->open = XML_FALSE;
5129 handleDefault = XML_FALSE;
5130 if (!dtd->paramEntityRead) {
5131 dtd->keepProcessing = dtd->standalone;
5132 break;
5133 }
5134 }
5135 else {
5136 dtd->keepProcessing = dtd->standalone;
5137 break;
5138 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005139 }
5140#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005141 if (!dtd->standalone &&
5142 notStandaloneHandler &&
5143 !notStandaloneHandler(handlerArg))
5144 return XML_ERROR_NOT_STANDALONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005145 break;
5146
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005147 /* Element declaration stuff */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005148
5149 case XML_ROLE_ELEMENT_NAME:
5150 if (elementDeclHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005151 declElementType = getElementType(parser, enc, s, next);
5152 if (!declElementType)
5153 return XML_ERROR_NO_MEMORY;
5154 dtd->scaffLevel = 0;
5155 dtd->scaffCount = 0;
5156 dtd->in_eldecl = XML_TRUE;
5157 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005158 }
5159 break;
5160
5161 case XML_ROLE_CONTENT_ANY:
5162 case XML_ROLE_CONTENT_EMPTY:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005163 if (dtd->in_eldecl) {
5164 if (elementDeclHandler) {
5165 XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content));
5166 if (!content)
5167 return XML_ERROR_NO_MEMORY;
5168 content->quant = XML_CQUANT_NONE;
5169 content->name = NULL;
5170 content->numchildren = 0;
5171 content->children = NULL;
5172 content->type = ((role == XML_ROLE_CONTENT_ANY) ?
5173 XML_CTYPE_ANY :
5174 XML_CTYPE_EMPTY);
5175 *eventEndPP = s;
5176 elementDeclHandler(handlerArg, declElementType->name, content);
5177 handleDefault = XML_FALSE;
5178 }
5179 dtd->in_eldecl = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005180 }
5181 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005182
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005183 case XML_ROLE_CONTENT_PCDATA:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005184 if (dtd->in_eldecl) {
5185 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5186 = XML_CTYPE_MIXED;
5187 if (elementDeclHandler)
5188 handleDefault = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005189 }
5190 break;
5191
5192 case XML_ROLE_CONTENT_ELEMENT:
5193 quant = XML_CQUANT_NONE;
5194 goto elementContent;
5195 case XML_ROLE_CONTENT_ELEMENT_OPT:
5196 quant = XML_CQUANT_OPT;
5197 goto elementContent;
5198 case XML_ROLE_CONTENT_ELEMENT_REP:
5199 quant = XML_CQUANT_REP;
5200 goto elementContent;
5201 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5202 quant = XML_CQUANT_PLUS;
5203 elementContent:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005204 if (dtd->in_eldecl) {
5205 ELEMENT_TYPE *el;
5206 const XML_Char *name;
5207 int nameLen;
5208 const char *nxt = (quant == XML_CQUANT_NONE
5209 ? next
5210 : next - enc->minBytesPerChar);
5211 int myindex = nextScaffoldPart(parser);
5212 if (myindex < 0)
5213 return XML_ERROR_NO_MEMORY;
5214 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5215 dtd->scaffold[myindex].quant = quant;
5216 el = getElementType(parser, enc, s, nxt);
5217 if (!el)
5218 return XML_ERROR_NO_MEMORY;
5219 name = el->name;
5220 dtd->scaffold[myindex].name = name;
5221 nameLen = 0;
5222 for (; name[nameLen++]; );
5223 dtd->contentStringLen += nameLen;
5224 if (elementDeclHandler)
5225 handleDefault = XML_FALSE;
5226 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005227 break;
5228
5229 case XML_ROLE_GROUP_CLOSE:
5230 quant = XML_CQUANT_NONE;
5231 goto closeGroup;
5232 case XML_ROLE_GROUP_CLOSE_OPT:
5233 quant = XML_CQUANT_OPT;
5234 goto closeGroup;
5235 case XML_ROLE_GROUP_CLOSE_REP:
5236 quant = XML_CQUANT_REP;
5237 goto closeGroup;
5238 case XML_ROLE_GROUP_CLOSE_PLUS:
5239 quant = XML_CQUANT_PLUS;
5240 closeGroup:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005241 if (dtd->in_eldecl) {
5242 if (elementDeclHandler)
5243 handleDefault = XML_FALSE;
5244 dtd->scaffLevel--;
5245 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5246 if (dtd->scaffLevel == 0) {
5247 if (!handleDefault) {
5248 XML_Content *model = build_model(parser);
5249 if (!model)
5250 return XML_ERROR_NO_MEMORY;
5251 *eventEndPP = s;
5252 elementDeclHandler(handlerArg, declElementType->name, model);
5253 }
5254 dtd->in_eldecl = XML_FALSE;
5255 dtd->contentStringLen = 0;
5256 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005257 }
5258 break;
5259 /* End element declaration stuff */
5260
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005261 case XML_ROLE_PI:
5262 if (!reportProcessingInstruction(parser, enc, s, next))
5263 return XML_ERROR_NO_MEMORY;
5264 handleDefault = XML_FALSE;
5265 break;
5266 case XML_ROLE_COMMENT:
5267 if (!reportComment(parser, enc, s, next))
5268 return XML_ERROR_NO_MEMORY;
5269 handleDefault = XML_FALSE;
5270 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005271 case XML_ROLE_NONE:
5272 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005273 case XML_TOK_BOM:
5274 handleDefault = XML_FALSE;
5275 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005276 }
5277 break;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005278 case XML_ROLE_DOCTYPE_NONE:
5279 if (startDoctypeDeclHandler)
5280 handleDefault = XML_FALSE;
5281 break;
5282 case XML_ROLE_ENTITY_NONE:
5283 if (dtd->keepProcessing && entityDeclHandler)
5284 handleDefault = XML_FALSE;
5285 break;
5286 case XML_ROLE_NOTATION_NONE:
5287 if (notationDeclHandler)
5288 handleDefault = XML_FALSE;
5289 break;
5290 case XML_ROLE_ATTLIST_NONE:
5291 if (dtd->keepProcessing && attlistDeclHandler)
5292 handleDefault = XML_FALSE;
5293 break;
5294 case XML_ROLE_ELEMENT_NONE:
5295 if (elementDeclHandler)
5296 handleDefault = XML_FALSE;
5297 break;
5298 } /* end of big switch */
5299
5300 if (handleDefault && defaultHandler)
5301 reportDefault(parser, enc, s, next);
5302
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005303 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005304 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005305 *nextPtr = next;
5306 return XML_ERROR_NONE;
5307 case XML_FINISHED:
5308 return XML_ERROR_ABORTED;
5309 default:
5310 s = next;
5311 tok = XmlPrologTok(enc, s, end, &next);
5312 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005313 }
5314 /* not reached */
5315}
5316
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005317static enum XML_Error PTRCALL
5318epilogProcessor(XML_Parser parser,
5319 const char *s,
5320 const char *end,
5321 const char **nextPtr)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005322{
5323 processor = epilogProcessor;
5324 eventPtr = s;
5325 for (;;) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005326 const char *next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005327 int tok = XmlPrologTok(encoding, s, end, &next);
5328 eventEndPtr = next;
5329 switch (tok) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005330 /* report partial linebreak - it might be the last token */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005331 case -XML_TOK_PROLOG_S:
5332 if (defaultHandler) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005333 reportDefault(parser, encoding, s, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005334 if (ps_parsing == XML_FINISHED)
Fred Drake31d485c2004-08-03 07:06:22 +00005335 return XML_ERROR_ABORTED;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005336 }
Fred Drake31d485c2004-08-03 07:06:22 +00005337 *nextPtr = next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005338 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005339 case XML_TOK_NONE:
Fred Drake31d485c2004-08-03 07:06:22 +00005340 *nextPtr = s;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005341 return XML_ERROR_NONE;
5342 case XML_TOK_PROLOG_S:
5343 if (defaultHandler)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005344 reportDefault(parser, encoding, s, next);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005345 break;
5346 case XML_TOK_PI:
5347 if (!reportProcessingInstruction(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005348 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005349 break;
5350 case XML_TOK_COMMENT:
5351 if (!reportComment(parser, encoding, s, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005352 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005353 break;
5354 case XML_TOK_INVALID:
5355 eventPtr = next;
5356 return XML_ERROR_INVALID_TOKEN;
5357 case XML_TOK_PARTIAL:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005358 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005359 *nextPtr = s;
5360 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005361 }
5362 return XML_ERROR_UNCLOSED_TOKEN;
5363 case XML_TOK_PARTIAL_CHAR:
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005364 if (!ps_finalBuffer) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005365 *nextPtr = s;
5366 return XML_ERROR_NONE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005367 }
5368 return XML_ERROR_PARTIAL_CHAR;
5369 default:
5370 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5371 }
5372 eventPtr = s = next;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005373 switch (ps_parsing) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005374 case XML_SUSPENDED:
Fred Drake31d485c2004-08-03 07:06:22 +00005375 *nextPtr = next;
5376 return XML_ERROR_NONE;
5377 case XML_FINISHED:
5378 return XML_ERROR_ABORTED;
5379 default: ;
5380 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005381 }
5382}
5383
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005384static enum XML_Error
Fred Drake31d485c2004-08-03 07:06:22 +00005385processInternalEntity(XML_Parser parser, ENTITY *entity,
5386 XML_Bool betweenDecl)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005387{
Fred Drake31d485c2004-08-03 07:06:22 +00005388 const char *textStart, *textEnd;
5389 const char *next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005390 enum XML_Error result;
Fred Drake31d485c2004-08-03 07:06:22 +00005391 OPEN_INTERNAL_ENTITY *openEntity;
5392
5393 if (freeInternalEntities) {
5394 openEntity = freeInternalEntities;
5395 freeInternalEntities = openEntity->next;
5396 }
5397 else {
5398 openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY));
5399 if (!openEntity)
5400 return XML_ERROR_NO_MEMORY;
5401 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005402 entity->open = XML_TRUE;
Fred Drake31d485c2004-08-03 07:06:22 +00005403 entity->processed = 0;
5404 openEntity->next = openInternalEntities;
5405 openInternalEntities = openEntity;
5406 openEntity->entity = entity;
5407 openEntity->startTagLevel = tagLevel;
5408 openEntity->betweenDecl = betweenDecl;
5409 openEntity->internalEventPtr = NULL;
5410 openEntity->internalEventEndPtr = NULL;
5411 textStart = (char *)entity->textPtr;
5412 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005413 /* Set a safe default value in case 'next' does not get set */
5414 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005415
5416#ifdef XML_DTD
5417 if (entity->is_param) {
5418 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005419 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005420 next, &next, XML_FALSE);
5421 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005422 else
Fred Drake31d485c2004-08-03 07:06:22 +00005423#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005424 result = doContent(parser, tagLevel, internalEncoding, textStart,
Fred Drake31d485c2004-08-03 07:06:22 +00005425 textEnd, &next, XML_FALSE);
5426
5427 if (result == XML_ERROR_NONE) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005428 if (textEnd != next && ps_parsing == XML_SUSPENDED) {
5429 entity->processed = (int)(next - textStart);
Fred Drake31d485c2004-08-03 07:06:22 +00005430 processor = internalEntityProcessor;
5431 }
5432 else {
5433 entity->open = XML_FALSE;
5434 openInternalEntities = openEntity->next;
5435 /* put openEntity back in list of free instances */
5436 openEntity->next = freeInternalEntities;
5437 freeInternalEntities = openEntity;
5438 }
5439 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005440 return result;
5441}
5442
Fred Drake31d485c2004-08-03 07:06:22 +00005443static enum XML_Error PTRCALL
5444internalEntityProcessor(XML_Parser parser,
5445 const char *s,
5446 const char *end,
5447 const char **nextPtr)
5448{
5449 ENTITY *entity;
5450 const char *textStart, *textEnd;
5451 const char *next;
5452 enum XML_Error result;
5453 OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities;
5454 if (!openEntity)
5455 return XML_ERROR_UNEXPECTED_STATE;
5456
5457 entity = openEntity->entity;
5458 textStart = ((char *)entity->textPtr) + entity->processed;
5459 textEnd = (char *)(entity->textPtr + entity->textLen);
Victor Stinner5ff71322017-06-21 14:39:22 +02005460 /* Set a safe default value in case 'next' does not get set */
5461 next = textStart;
Fred Drake31d485c2004-08-03 07:06:22 +00005462
5463#ifdef XML_DTD
5464 if (entity->is_param) {
5465 int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005466 result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
Fred Drake31d485c2004-08-03 07:06:22 +00005467 next, &next, XML_FALSE);
5468 }
5469 else
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005470#endif /* XML_DTD */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005471 result = doContent(parser, openEntity->startTagLevel, internalEncoding,
5472 textStart, textEnd, &next, XML_FALSE);
Fred Drake31d485c2004-08-03 07:06:22 +00005473
5474 if (result != XML_ERROR_NONE)
5475 return result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005476 else if (textEnd != next && ps_parsing == XML_SUSPENDED) {
5477 entity->processed = (int)(next - (char *)entity->textPtr);
Fred Drake31d485c2004-08-03 07:06:22 +00005478 return result;
5479 }
5480 else {
5481 entity->open = XML_FALSE;
5482 openInternalEntities = openEntity->next;
5483 /* put openEntity back in list of free instances */
5484 openEntity->next = freeInternalEntities;
5485 freeInternalEntities = openEntity;
5486 }
5487
5488#ifdef XML_DTD
5489 if (entity->is_param) {
5490 int tok;
5491 processor = prologProcessor;
5492 tok = XmlPrologTok(encoding, s, end, &next);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005493 return doProlog(parser, encoding, s, end, tok, next, nextPtr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005494 (XML_Bool)!ps_finalBuffer);
Fred Drake31d485c2004-08-03 07:06:22 +00005495 }
5496 else
5497#endif /* XML_DTD */
5498 {
5499 processor = contentProcessor;
5500 /* see externalEntityContentProcessor vs contentProcessor */
5501 return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005502 nextPtr, (XML_Bool)!ps_finalBuffer);
5503 }
Fred Drake31d485c2004-08-03 07:06:22 +00005504}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005505
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005506static enum XML_Error PTRCALL
5507errorProcessor(XML_Parser parser,
Victor Stinner23ec4b52017-06-15 00:54:36 +02005508 const char *UNUSED_P(s),
5509 const char *UNUSED_P(end),
5510 const char **UNUSED_P(nextPtr))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005511{
5512 return errorCode;
5513}
5514
5515static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005516storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5517 const char *ptr, const char *end,
5518 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005519{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005520 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr,
5521 end, pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005522 if (result)
5523 return result;
5524 if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
5525 poolChop(pool);
5526 if (!poolAppendChar(pool, XML_T('\0')))
5527 return XML_ERROR_NO_MEMORY;
5528 return XML_ERROR_NONE;
5529}
5530
5531static enum XML_Error
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005532appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
5533 const char *ptr, const char *end,
5534 STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005535{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005536 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005537 for (;;) {
5538 const char *next;
5539 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
5540 switch (tok) {
5541 case XML_TOK_NONE:
5542 return XML_ERROR_NONE;
5543 case XML_TOK_INVALID:
5544 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005545 eventPtr = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005546 return XML_ERROR_INVALID_TOKEN;
5547 case XML_TOK_PARTIAL:
5548 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005549 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005550 return XML_ERROR_INVALID_TOKEN;
5551 case XML_TOK_CHAR_REF:
5552 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005553 XML_Char buf[XML_ENCODE_MAX];
5554 int i;
5555 int n = XmlCharRefNumber(enc, ptr);
5556 if (n < 0) {
5557 if (enc == encoding)
5558 eventPtr = ptr;
5559 return XML_ERROR_BAD_CHAR_REF;
5560 }
5561 if (!isCdata
5562 && n == 0x20 /* space */
5563 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
5564 break;
5565 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005566 /* The XmlEncode() functions can never return 0 here. That
5567 * error return happens if the code point passed in is either
5568 * negative or greater than or equal to 0x110000. The
5569 * XmlCharRefNumber() functions will all return a number
5570 * strictly less than 0x110000 or a negative value if an error
5571 * occurred. The negative value is intercepted above, so
5572 * XmlEncode() is never passed a value it might return an
5573 * error for.
5574 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005575 for (i = 0; i < n; i++) {
5576 if (!poolAppendChar(pool, buf[i]))
5577 return XML_ERROR_NO_MEMORY;
5578 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005579 }
5580 break;
5581 case XML_TOK_DATA_CHARS:
5582 if (!poolAppend(pool, enc, ptr, next))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005583 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005584 break;
5585 case XML_TOK_TRAILING_CR:
5586 next = ptr + enc->minBytesPerChar;
5587 /* fall through */
5588 case XML_TOK_ATTRIBUTE_VALUE_S:
5589 case XML_TOK_DATA_NEWLINE:
5590 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005591 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005592 if (!poolAppendChar(pool, 0x20))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005593 return XML_ERROR_NO_MEMORY;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005594 break;
5595 case XML_TOK_ENTITY_REF:
5596 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005597 const XML_Char *name;
5598 ENTITY *entity;
5599 char checkEntityDecl;
5600 XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc,
5601 ptr + enc->minBytesPerChar,
5602 next - enc->minBytesPerChar);
5603 if (ch) {
5604 if (!poolAppendChar(pool, ch))
5605 return XML_ERROR_NO_MEMORY;
5606 break;
5607 }
5608 name = poolStoreString(&temp2Pool, enc,
5609 ptr + enc->minBytesPerChar,
5610 next - enc->minBytesPerChar);
5611 if (!name)
5612 return XML_ERROR_NO_MEMORY;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005613 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005614 poolDiscard(&temp2Pool);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005615 /* First, determine if a check for an existing declaration is needed;
5616 if yes, check that the entity exists, and that it is internal.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005617 */
5618 if (pool == &dtd->pool) /* are we called from prolog? */
5619 checkEntityDecl =
5620#ifdef XML_DTD
5621 prologState.documentEntity &&
5622#endif /* XML_DTD */
5623 (dtd->standalone
5624 ? !openInternalEntities
5625 : !dtd->hasParamEntityRefs);
5626 else /* if (pool == &tempPool): we are called from content */
5627 checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone;
5628 if (checkEntityDecl) {
5629 if (!entity)
5630 return XML_ERROR_UNDEFINED_ENTITY;
5631 else if (!entity->is_internal)
5632 return XML_ERROR_ENTITY_DECLARED_IN_PE;
5633 }
5634 else if (!entity) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005635 /* Cannot report skipped entity here - see comments on
5636 skippedEntityHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005637 if (skippedEntityHandler)
5638 skippedEntityHandler(handlerArg, name, 0);
5639 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005640 /* Cannot call the default handler because this would be
5641 out of sync with the call to the startElementHandler.
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005642 if ((pool == &tempPool) && defaultHandler)
5643 reportDefault(parser, enc, ptr, next);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005644 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005645 break;
5646 }
5647 if (entity->open) {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005648 if (enc == encoding) {
5649 /* It does not appear that this line can be executed.
5650 *
5651 * The "if (entity->open)" check catches recursive entity
5652 * definitions. In order to be called with an open
5653 * entity, it must have gone through this code before and
5654 * been through the recursive call to
5655 * appendAttributeValue() some lines below. That call
5656 * sets the local encoding ("enc") to the parser's
5657 * internal encoding (internal_utf8 or internal_utf16),
5658 * which can never be the same as the principle encoding.
5659 * It doesn't appear there is another code path that gets
5660 * here with entity->open being TRUE.
5661 *
5662 * Since it is not certain that this logic is watertight,
5663 * we keep the line and merely exclude it from coverage
5664 * tests.
5665 */
5666 eventPtr = ptr; /* LCOV_EXCL_LINE */
5667 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005668 return XML_ERROR_RECURSIVE_ENTITY_REF;
5669 }
5670 if (entity->notation) {
5671 if (enc == encoding)
5672 eventPtr = ptr;
5673 return XML_ERROR_BINARY_ENTITY_REF;
5674 }
5675 if (!entity->textPtr) {
5676 if (enc == encoding)
5677 eventPtr = ptr;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07005678 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005679 }
5680 else {
5681 enum XML_Error result;
5682 const XML_Char *textEnd = entity->textPtr + entity->textLen;
5683 entity->open = XML_TRUE;
5684 result = appendAttributeValue(parser, internalEncoding, isCdata,
5685 (char *)entity->textPtr,
5686 (char *)textEnd, pool);
5687 entity->open = XML_FALSE;
5688 if (result)
5689 return result;
5690 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005691 }
5692 break;
5693 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005694 /* The only token returned by XmlAttributeValueTok() that does
5695 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
5696 * Getting that would require an entity name to contain an
5697 * incomplete XML character (e.g. \xE2\x82); however previous
5698 * tokenisers will have already recognised and rejected such
5699 * names before XmlAttributeValueTok() gets a look-in. This
5700 * default case should be retained as a safety net, but the code
5701 * excluded from coverage tests.
5702 *
5703 * LCOV_EXCL_START
5704 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005705 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005706 eventPtr = ptr;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005707 return XML_ERROR_UNEXPECTED_STATE;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005708 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005709 }
5710 ptr = next;
5711 }
5712 /* not reached */
5713}
5714
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005715static enum XML_Error
5716storeEntityValue(XML_Parser parser,
5717 const ENCODING *enc,
5718 const char *entityTextPtr,
5719 const char *entityTextEnd)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005720{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005721 DTD * const dtd = _dtd; /* save one level of indirection */
5722 STRING_POOL *pool = &(dtd->entityValuePool);
5723 enum XML_Error result = XML_ERROR_NONE;
5724#ifdef XML_DTD
5725 int oldInEntityValue = prologState.inEntityValue;
5726 prologState.inEntityValue = 1;
5727#endif /* XML_DTD */
5728 /* never return Null for the value argument in EntityDeclHandler,
5729 since this would indicate an external entity; therefore we
5730 have to make sure that entityValuePool.start is not null */
5731 if (!pool->blocks) {
5732 if (!poolGrow(pool))
5733 return XML_ERROR_NO_MEMORY;
5734 }
5735
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005736 for (;;) {
5737 const char *next;
5738 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
5739 switch (tok) {
5740 case XML_TOK_PARAM_ENTITY_REF:
5741#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005742 if (isParamEntity || enc != encoding) {
5743 const XML_Char *name;
5744 ENTITY *entity;
5745 name = poolStoreString(&tempPool, enc,
5746 entityTextPtr + enc->minBytesPerChar,
5747 next - enc->minBytesPerChar);
5748 if (!name) {
5749 result = XML_ERROR_NO_MEMORY;
5750 goto endEntityValue;
5751 }
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07005752 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005753 poolDiscard(&tempPool);
5754 if (!entity) {
5755 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
5756 /* cannot report skipped entity here - see comments on
5757 skippedEntityHandler
5758 if (skippedEntityHandler)
5759 skippedEntityHandler(handlerArg, name, 0);
5760 */
5761 dtd->keepProcessing = dtd->standalone;
5762 goto endEntityValue;
5763 }
5764 if (entity->open) {
5765 if (enc == encoding)
5766 eventPtr = entityTextPtr;
5767 result = XML_ERROR_RECURSIVE_ENTITY_REF;
5768 goto endEntityValue;
5769 }
5770 if (entity->systemId) {
5771 if (externalEntityRefHandler) {
5772 dtd->paramEntityRead = XML_FALSE;
5773 entity->open = XML_TRUE;
5774 if (!externalEntityRefHandler(externalEntityRefHandlerArg,
5775 0,
5776 entity->base,
5777 entity->systemId,
5778 entity->publicId)) {
5779 entity->open = XML_FALSE;
5780 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5781 goto endEntityValue;
5782 }
5783 entity->open = XML_FALSE;
5784 if (!dtd->paramEntityRead)
5785 dtd->keepProcessing = dtd->standalone;
5786 }
5787 else
5788 dtd->keepProcessing = dtd->standalone;
5789 }
5790 else {
5791 entity->open = XML_TRUE;
5792 result = storeEntityValue(parser,
5793 internalEncoding,
5794 (char *)entity->textPtr,
5795 (char *)(entity->textPtr
5796 + entity->textLen));
5797 entity->open = XML_FALSE;
5798 if (result)
5799 goto endEntityValue;
5800 }
5801 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005802 }
5803#endif /* XML_DTD */
Fred Drake31d485c2004-08-03 07:06:22 +00005804 /* In the internal subset, PE references are not legal
5805 within markup declarations, e.g entity values in this case. */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005806 eventPtr = entityTextPtr;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005807 result = XML_ERROR_PARAM_ENTITY_REF;
5808 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005809 case XML_TOK_NONE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005810 result = XML_ERROR_NONE;
5811 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005812 case XML_TOK_ENTITY_REF:
5813 case XML_TOK_DATA_CHARS:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005814 if (!poolAppend(pool, enc, entityTextPtr, next)) {
5815 result = XML_ERROR_NO_MEMORY;
5816 goto endEntityValue;
5817 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005818 break;
5819 case XML_TOK_TRAILING_CR:
5820 next = entityTextPtr + enc->minBytesPerChar;
5821 /* fall through */
5822 case XML_TOK_DATA_NEWLINE:
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005823 if (pool->end == pool->ptr && !poolGrow(pool)) {
5824 result = XML_ERROR_NO_MEMORY;
5825 goto endEntityValue;
5826 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005827 *(pool->ptr)++ = 0xA;
5828 break;
5829 case XML_TOK_CHAR_REF:
5830 {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005831 XML_Char buf[XML_ENCODE_MAX];
5832 int i;
5833 int n = XmlCharRefNumber(enc, entityTextPtr);
5834 if (n < 0) {
5835 if (enc == encoding)
5836 eventPtr = entityTextPtr;
5837 result = XML_ERROR_BAD_CHAR_REF;
5838 goto endEntityValue;
5839 }
5840 n = XmlEncode(n, (ICHAR *)buf);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005841 /* The XmlEncode() functions can never return 0 here. That
5842 * error return happens if the code point passed in is either
5843 * negative or greater than or equal to 0x110000. The
5844 * XmlCharRefNumber() functions will all return a number
5845 * strictly less than 0x110000 or a negative value if an error
5846 * occurred. The negative value is intercepted above, so
5847 * XmlEncode() is never passed a value it might return an
5848 * error for.
5849 */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005850 for (i = 0; i < n; i++) {
5851 if (pool->end == pool->ptr && !poolGrow(pool)) {
5852 result = XML_ERROR_NO_MEMORY;
5853 goto endEntityValue;
5854 }
5855 *(pool->ptr)++ = buf[i];
5856 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005857 }
5858 break;
5859 case XML_TOK_PARTIAL:
5860 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005861 eventPtr = entityTextPtr;
5862 result = XML_ERROR_INVALID_TOKEN;
5863 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005864 case XML_TOK_INVALID:
5865 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005866 eventPtr = next;
5867 result = XML_ERROR_INVALID_TOKEN;
5868 goto endEntityValue;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005869 default:
Victor Stinner93d0cb52017-08-18 23:43:54 +02005870 /* This default case should be unnecessary -- all the tokens
5871 * that XmlEntityValueTok() can return have their own explicit
5872 * cases -- but should be retained for safety. We do however
5873 * exclude it from the coverage statistics.
5874 *
5875 * LCOV_EXCL_START
5876 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005877 if (enc == encoding)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005878 eventPtr = entityTextPtr;
5879 result = XML_ERROR_UNEXPECTED_STATE;
5880 goto endEntityValue;
Victor Stinner93d0cb52017-08-18 23:43:54 +02005881 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005882 }
5883 entityTextPtr = next;
5884 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005885endEntityValue:
5886#ifdef XML_DTD
5887 prologState.inEntityValue = oldInEntityValue;
5888#endif /* XML_DTD */
5889 return result;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005890}
5891
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005892static void FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005893normalizeLines(XML_Char *s)
5894{
5895 XML_Char *p;
5896 for (;; s++) {
5897 if (*s == XML_T('\0'))
5898 return;
5899 if (*s == 0xD)
5900 break;
5901 }
5902 p = s;
5903 do {
5904 if (*s == 0xD) {
5905 *p++ = 0xA;
5906 if (*++s == 0xA)
5907 s++;
5908 }
5909 else
5910 *p++ = *s++;
5911 } while (*s);
5912 *p = XML_T('\0');
5913}
5914
5915static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005916reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
5917 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005918{
5919 const XML_Char *target;
5920 XML_Char *data;
5921 const char *tem;
5922 if (!processingInstructionHandler) {
5923 if (defaultHandler)
5924 reportDefault(parser, enc, start, end);
5925 return 1;
5926 }
5927 start += enc->minBytesPerChar * 2;
5928 tem = start + XmlNameLength(enc, start);
5929 target = poolStoreString(&tempPool, enc, start, tem);
5930 if (!target)
5931 return 0;
5932 poolFinish(&tempPool);
5933 data = poolStoreString(&tempPool, enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005934 XmlSkipS(enc, tem),
5935 end - enc->minBytesPerChar*2);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005936 if (!data)
5937 return 0;
5938 normalizeLines(data);
5939 processingInstructionHandler(handlerArg, target, data);
5940 poolClear(&tempPool);
5941 return 1;
5942}
5943
5944static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005945reportComment(XML_Parser parser, const ENCODING *enc,
5946 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005947{
5948 XML_Char *data;
5949 if (!commentHandler) {
5950 if (defaultHandler)
5951 reportDefault(parser, enc, start, end);
5952 return 1;
5953 }
5954 data = poolStoreString(&tempPool,
5955 enc,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005956 start + enc->minBytesPerChar * 4,
5957 end - enc->minBytesPerChar * 3);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005958 if (!data)
5959 return 0;
5960 normalizeLines(data);
5961 commentHandler(handlerArg, data);
5962 poolClear(&tempPool);
5963 return 1;
5964}
5965
5966static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00005967reportDefault(XML_Parser parser, const ENCODING *enc,
5968 const char *s, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005969{
5970 if (MUST_CONVERT(enc, s)) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02005971 enum XML_Convert_Result convert_res;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005972 const char **eventPP;
5973 const char **eventEndPP;
5974 if (enc == encoding) {
5975 eventPP = &eventPtr;
5976 eventEndPP = &eventEndPtr;
5977 }
5978 else {
Victor Stinner93d0cb52017-08-18 23:43:54 +02005979 /* To get here, two things must be true; the parser must be
5980 * using a character encoding that is not the same as the
5981 * encoding passed in, and the encoding passed in must need
5982 * conversion to the internal format (UTF-8 unless XML_UNICODE
5983 * is defined). The only occasions on which the encoding passed
5984 * in is not the same as the parser's encoding are when it is
5985 * the internal encoding (e.g. a previously defined parameter
5986 * entity, already converted to internal format). This by
5987 * definition doesn't need conversion, so the whole branch never
5988 * gets executed.
5989 *
5990 * For safety's sake we don't delete these lines and merely
5991 * exclude them from coverage statistics.
5992 *
5993 * LCOV_EXCL_START
5994 */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005995 eventPP = &(openInternalEntities->internalEventPtr);
5996 eventEndPP = &(openInternalEntities->internalEventEndPtr);
Victor Stinner93d0cb52017-08-18 23:43:54 +02005997 /* LCOV_EXCL_STOP */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00005998 }
5999 do {
6000 ICHAR *dataPtr = (ICHAR *)dataBuf;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006001 convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006002 *eventEndPP = s;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006003 defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006004 *eventPP = s;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006005 } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006006 }
6007 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006008 defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006009}
6010
6011
6012static int
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006013defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6014 XML_Bool isId, const XML_Char *value, XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006015{
6016 DEFAULT_ATTRIBUTE *att;
6017 if (value || isId) {
6018 /* The handling of default attributes gets messed up if we have
6019 a default which duplicates a non-default. */
6020 int i;
6021 for (i = 0; i < type->nDefaultAtts; i++)
6022 if (attId == type->defaultAtts[i].id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006023 return 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006024 if (isId && !type->idAtt && !attId->xmlns)
6025 type->idAtt = attId;
6026 }
6027 if (type->nDefaultAtts == type->allocDefaultAtts) {
6028 if (type->allocDefaultAtts == 0) {
6029 type->allocDefaultAtts = 8;
Fred Drake08317ae2003-10-21 15:38:55 +00006030 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006031 * sizeof(DEFAULT_ATTRIBUTE));
6032 if (!type->defaultAtts)
6033 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006034 }
6035 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006036 DEFAULT_ATTRIBUTE *temp;
6037 int count = type->allocDefaultAtts * 2;
6038 temp = (DEFAULT_ATTRIBUTE *)
6039 REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE)));
6040 if (temp == NULL)
6041 return 0;
6042 type->allocDefaultAtts = count;
6043 type->defaultAtts = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006044 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006045 }
6046 att = type->defaultAtts + type->nDefaultAtts;
6047 att->id = attId;
6048 att->value = value;
6049 att->isCdata = isCdata;
6050 if (!isCdata)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006051 attId->maybeTokenized = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006052 type->nDefaultAtts += 1;
6053 return 1;
6054}
6055
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006056static int
6057setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006058{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006059 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006060 const XML_Char *name;
6061 for (name = elementType->name; *name; name++) {
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006062 if (*name == XML_T(ASCII_COLON)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006063 PREFIX *prefix;
6064 const XML_Char *s;
6065 for (s = elementType->name; s != name; s++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006066 if (!poolAppendChar(&dtd->pool, *s))
6067 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006068 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006069 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6070 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006071 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006072 sizeof(PREFIX));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006073 if (!prefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006074 return 0;
6075 if (prefix->name == poolStart(&dtd->pool))
6076 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006077 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006078 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006079 elementType->prefix = prefix;
6080
6081 }
6082 }
6083 return 1;
6084}
6085
6086static ATTRIBUTE_ID *
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006087getAttributeId(XML_Parser parser, const ENCODING *enc,
6088 const char *start, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006089{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006090 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006091 ATTRIBUTE_ID *id;
6092 const XML_Char *name;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006093 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6094 return NULL;
6095 name = poolStoreString(&dtd->pool, enc, start, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006096 if (!name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006097 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006098 /* skip quotation mark - its storage will be re-used (like in name[-1]) */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006099 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006100 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006101 if (!id)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006102 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006103 if (id->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006104 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006105 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006106 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006107 if (!ns)
6108 ;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006109 else if (name[0] == XML_T(ASCII_x)
6110 && name[1] == XML_T(ASCII_m)
6111 && name[2] == XML_T(ASCII_l)
6112 && name[3] == XML_T(ASCII_n)
6113 && name[4] == XML_T(ASCII_s)
6114 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006115 if (name[5] == XML_T('\0'))
6116 id->prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006117 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006118 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006119 id->xmlns = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006120 }
6121 else {
6122 int i;
6123 for (i = 0; name[i]; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006124 /* attributes without prefix are *not* in the default namespace */
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006125 if (name[i] == XML_T(ASCII_COLON)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006126 int j;
6127 for (j = 0; j < i; j++) {
6128 if (!poolAppendChar(&dtd->pool, name[j]))
6129 return NULL;
6130 }
6131 if (!poolAppendChar(&dtd->pool, XML_T('\0')))
6132 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006133 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006134 sizeof(PREFIX));
Benjamin Peterson196d7db2016-06-11 13:28:56 -07006135 if (!id->prefix)
6136 return NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006137 if (id->prefix->name == poolStart(&dtd->pool))
6138 poolFinish(&dtd->pool);
6139 else
6140 poolDiscard(&dtd->pool);
6141 break;
6142 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006143 }
6144 }
6145 }
6146 return id;
6147}
6148
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006149#define CONTEXT_SEP XML_T(ASCII_FF)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006150
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006151static const XML_Char *
6152getContext(XML_Parser parser)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006153{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006154 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006155 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006156 XML_Bool needSep = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006157
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006158 if (dtd->defaultPrefix.binding) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006159 int i;
6160 int len;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006161 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006162 return NULL;
6163 len = dtd->defaultPrefix.binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006164 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006165 len--;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006166 for (i = 0; i < len; i++) {
6167 if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) {
6168 /* Because of memory caching, I don't believe this line can be
6169 * executed.
6170 *
6171 * This is part of a loop copying the default prefix binding
6172 * URI into the parser's temporary string pool. Previously,
6173 * that URI was copied into the same string pool, with a
6174 * terminating NUL character, as part of setContext(). When
6175 * the pool was cleared, that leaves a block definitely big
6176 * enough to hold the URI on the free block list of the pool.
6177 * The URI copy in getContext() therefore cannot run out of
6178 * memory.
6179 *
6180 * If the pool is used between the setContext() and
6181 * getContext() calls, the worst it can do is leave a bigger
6182 * block on the front of the free list. Given that this is
6183 * all somewhat inobvious and program logic can be changed, we
6184 * don't delete the line but we do exclude it from the test
6185 * coverage statistics.
6186 */
6187 return NULL; /* LCOV_EXCL_LINE */
6188 }
6189 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006190 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006191 }
6192
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006193 hashTableIterInit(&iter, &(dtd->prefixes));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006194 for (;;) {
6195 int i;
6196 int len;
6197 const XML_Char *s;
6198 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6199 if (!prefix)
6200 break;
Victor Stinner93d0cb52017-08-18 23:43:54 +02006201 if (!prefix->binding) {
6202 /* This test appears to be (justifiable) paranoia. There does
6203 * not seem to be a way of injecting a prefix without a binding
6204 * that doesn't get errored long before this function is called.
6205 * The test should remain for safety's sake, so we instead
6206 * exclude the following line from the coverage statistics.
6207 */
6208 continue; /* LCOV_EXCL_LINE */
6209 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006210 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006211 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006212 for (s = prefix->name; *s; s++)
6213 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006214 return NULL;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006215 if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006216 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006217 len = prefix->binding->uriLen;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00006218 if (namespaceSeparator)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006219 len--;
6220 for (i = 0; i < len; i++)
6221 if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006222 return NULL;
6223 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006224 }
6225
6226
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006227 hashTableIterInit(&iter, &(dtd->generalEntities));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006228 for (;;) {
6229 const XML_Char *s;
6230 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
6231 if (!e)
6232 break;
6233 if (!e->open)
6234 continue;
6235 if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006236 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006237 for (s = e->name; *s; s++)
6238 if (!poolAppendChar(&tempPool, *s))
6239 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006240 needSep = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006241 }
6242
6243 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006244 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006245 return tempPool.start;
6246}
6247
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006248static XML_Bool
6249setContext(XML_Parser parser, const XML_Char *context)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006250{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006251 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006252 const XML_Char *s = context;
6253
6254 while (*context != XML_T('\0')) {
6255 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
6256 ENTITY *e;
6257 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006258 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006259 e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006260 if (e)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006261 e->open = XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006262 if (*s != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006263 s++;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006264 context = s;
6265 poolDiscard(&tempPool);
6266 }
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07006267 else if (*s == XML_T(ASCII_EQUALS)) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006268 PREFIX *prefix;
6269 if (poolLength(&tempPool) == 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006270 prefix = &dtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006271 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006272 if (!poolAppendChar(&tempPool, XML_T('\0')))
6273 return XML_FALSE;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006274 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006275 sizeof(PREFIX));
6276 if (!prefix)
6277 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006278 if (prefix->name == poolStart(&tempPool)) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006279 prefix->name = poolCopyString(&dtd->pool, prefix->name);
6280 if (!prefix->name)
6281 return XML_FALSE;
6282 }
6283 poolDiscard(&tempPool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006284 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006285 for (context = s + 1;
6286 *context != CONTEXT_SEP && *context != XML_T('\0');
6287 context++)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006288 if (!poolAppendChar(&tempPool, *context))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006289 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006290 if (!poolAppendChar(&tempPool, XML_T('\0')))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006291 return XML_FALSE;
Fred Drake31d485c2004-08-03 07:06:22 +00006292 if (addBinding(parser, prefix, NULL, poolStart(&tempPool),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006293 &inheritedBindings) != XML_ERROR_NONE)
6294 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006295 poolDiscard(&tempPool);
6296 if (*context != XML_T('\0'))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006297 ++context;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006298 s = context;
6299 }
6300 else {
6301 if (!poolAppendChar(&tempPool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006302 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006303 s++;
6304 }
6305 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006306 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006307}
6308
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006309static void FASTCALL
6310normalizePublicId(XML_Char *publicId)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006311{
6312 XML_Char *p = publicId;
6313 XML_Char *s;
6314 for (s = publicId; *s; s++) {
6315 switch (*s) {
6316 case 0x20:
6317 case 0xD:
6318 case 0xA:
6319 if (p != publicId && p[-1] != 0x20)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006320 *p++ = 0x20;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006321 break;
6322 default:
6323 *p++ = *s;
6324 }
6325 }
6326 if (p != publicId && p[-1] == 0x20)
6327 --p;
6328 *p = XML_T('\0');
6329}
6330
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006331static DTD *
6332dtdCreate(const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006333{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006334 DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
6335 if (p == NULL)
6336 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006337 poolInit(&(p->pool), ms);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006338 poolInit(&(p->entityValuePool), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006339 hashTableInit(&(p->generalEntities), ms);
6340 hashTableInit(&(p->elementTypes), ms);
6341 hashTableInit(&(p->attributeIds), ms);
6342 hashTableInit(&(p->prefixes), ms);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006343#ifdef XML_DTD
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006344 p->paramEntityRead = XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006345 hashTableInit(&(p->paramEntities), ms);
6346#endif /* XML_DTD */
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006347 p->defaultPrefix.name = NULL;
6348 p->defaultPrefix.binding = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006349
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006350 p->in_eldecl = XML_FALSE;
6351 p->scaffIndex = NULL;
6352 p->scaffold = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006353 p->scaffLevel = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006354 p->scaffSize = 0;
6355 p->scaffCount = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006356 p->contentStringLen = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006357
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006358 p->keepProcessing = XML_TRUE;
6359 p->hasParamEntityRefs = XML_FALSE;
6360 p->standalone = XML_FALSE;
6361 return p;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006362}
6363
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006364static void
6365dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006366{
6367 HASH_TABLE_ITER iter;
6368 hashTableIterInit(&iter, &(p->elementTypes));
6369 for (;;) {
6370 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6371 if (!e)
6372 break;
6373 if (e->allocDefaultAtts != 0)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006374 ms->free_fcn(e->defaultAtts);
6375 }
6376 hashTableClear(&(p->generalEntities));
6377#ifdef XML_DTD
6378 p->paramEntityRead = XML_FALSE;
6379 hashTableClear(&(p->paramEntities));
6380#endif /* XML_DTD */
6381 hashTableClear(&(p->elementTypes));
6382 hashTableClear(&(p->attributeIds));
6383 hashTableClear(&(p->prefixes));
6384 poolClear(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006385 poolClear(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006386 p->defaultPrefix.name = NULL;
6387 p->defaultPrefix.binding = NULL;
6388
6389 p->in_eldecl = XML_FALSE;
Fred Drake08317ae2003-10-21 15:38:55 +00006390
6391 ms->free_fcn(p->scaffIndex);
6392 p->scaffIndex = NULL;
6393 ms->free_fcn(p->scaffold);
6394 p->scaffold = NULL;
6395
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006396 p->scaffLevel = 0;
6397 p->scaffSize = 0;
6398 p->scaffCount = 0;
6399 p->contentStringLen = 0;
6400
6401 p->keepProcessing = XML_TRUE;
6402 p->hasParamEntityRefs = XML_FALSE;
6403 p->standalone = XML_FALSE;
6404}
6405
6406static void
6407dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms)
6408{
6409 HASH_TABLE_ITER iter;
6410 hashTableIterInit(&iter, &(p->elementTypes));
6411 for (;;) {
6412 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6413 if (!e)
6414 break;
6415 if (e->allocDefaultAtts != 0)
6416 ms->free_fcn(e->defaultAtts);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006417 }
6418 hashTableDestroy(&(p->generalEntities));
6419#ifdef XML_DTD
6420 hashTableDestroy(&(p->paramEntities));
6421#endif /* XML_DTD */
6422 hashTableDestroy(&(p->elementTypes));
6423 hashTableDestroy(&(p->attributeIds));
6424 hashTableDestroy(&(p->prefixes));
6425 poolDestroy(&(p->pool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006426 poolDestroy(&(p->entityValuePool));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006427 if (isDocEntity) {
Fred Drake08317ae2003-10-21 15:38:55 +00006428 ms->free_fcn(p->scaffIndex);
6429 ms->free_fcn(p->scaffold);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006430 }
6431 ms->free_fcn(p);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006432}
6433
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006434/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
6435 The new DTD has already been initialized.
6436*/
6437static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006438dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006439{
6440 HASH_TABLE_ITER iter;
6441
6442 /* Copy the prefix table. */
6443
6444 hashTableIterInit(&iter, &(oldDtd->prefixes));
6445 for (;;) {
6446 const XML_Char *name;
6447 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
6448 if (!oldP)
6449 break;
6450 name = poolCopyString(&(newDtd->pool), oldP->name);
6451 if (!name)
6452 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006453 if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006454 return 0;
6455 }
6456
6457 hashTableIterInit(&iter, &(oldDtd->attributeIds));
6458
6459 /* Copy the attribute id table. */
6460
6461 for (;;) {
6462 ATTRIBUTE_ID *newA;
6463 const XML_Char *name;
6464 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
6465
6466 if (!oldA)
6467 break;
6468 /* Remember to allocate the scratch byte before the name. */
6469 if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
6470 return 0;
6471 name = poolCopyString(&(newDtd->pool), oldA->name);
6472 if (!name)
6473 return 0;
6474 ++name;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006475 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006476 sizeof(ATTRIBUTE_ID));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006477 if (!newA)
6478 return 0;
6479 newA->maybeTokenized = oldA->maybeTokenized;
6480 if (oldA->prefix) {
6481 newA->xmlns = oldA->xmlns;
6482 if (oldA->prefix == &oldDtd->defaultPrefix)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006483 newA->prefix = &newDtd->defaultPrefix;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006484 else
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006485 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006486 oldA->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006487 }
6488 }
6489
6490 /* Copy the element type table. */
6491
6492 hashTableIterInit(&iter, &(oldDtd->elementTypes));
6493
6494 for (;;) {
6495 int i;
6496 ELEMENT_TYPE *newE;
6497 const XML_Char *name;
6498 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
6499 if (!oldE)
6500 break;
6501 name = poolCopyString(&(newDtd->pool), oldE->name);
6502 if (!name)
6503 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006504 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006505 sizeof(ELEMENT_TYPE));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006506 if (!newE)
6507 return 0;
6508 if (oldE->nDefaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006509 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)
6510 ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6511 if (!newE->defaultAtts) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006512 return 0;
6513 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006514 }
6515 if (oldE->idAtt)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006516 newE->idAtt = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006517 lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006518 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
6519 if (oldE->prefix)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006520 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006521 oldE->prefix->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006522 for (i = 0; i < newE->nDefaultAtts; i++) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006523 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006524 lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006525 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
6526 if (oldE->defaultAtts[i].value) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006527 newE->defaultAtts[i].value
6528 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
6529 if (!newE->defaultAtts[i].value)
6530 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006531 }
6532 else
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006533 newE->defaultAtts[i].value = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006534 }
6535 }
6536
6537 /* Copy the entity tables. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006538 if (!copyEntityTable(oldParser,
6539 &(newDtd->generalEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006540 &(newDtd->pool),
6541 &(oldDtd->generalEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006542 return 0;
6543
6544#ifdef XML_DTD
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006545 if (!copyEntityTable(oldParser,
6546 &(newDtd->paramEntities),
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006547 &(newDtd->pool),
6548 &(oldDtd->paramEntities)))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006549 return 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006550 newDtd->paramEntityRead = oldDtd->paramEntityRead;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006551#endif /* XML_DTD */
6552
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006553 newDtd->keepProcessing = oldDtd->keepProcessing;
6554 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006555 newDtd->standalone = oldDtd->standalone;
6556
6557 /* Don't want deep copying for scaffolding */
6558 newDtd->in_eldecl = oldDtd->in_eldecl;
6559 newDtd->scaffold = oldDtd->scaffold;
6560 newDtd->contentStringLen = oldDtd->contentStringLen;
6561 newDtd->scaffSize = oldDtd->scaffSize;
6562 newDtd->scaffLevel = oldDtd->scaffLevel;
6563 newDtd->scaffIndex = oldDtd->scaffIndex;
6564
6565 return 1;
6566} /* End dtdCopy */
6567
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006568static int
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006569copyEntityTable(XML_Parser oldParser,
6570 HASH_TABLE *newTable,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006571 STRING_POOL *newPool,
6572 const HASH_TABLE *oldTable)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006573{
6574 HASH_TABLE_ITER iter;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006575 const XML_Char *cachedOldBase = NULL;
6576 const XML_Char *cachedNewBase = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006577
6578 hashTableIterInit(&iter, oldTable);
6579
6580 for (;;) {
6581 ENTITY *newE;
6582 const XML_Char *name;
6583 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
6584 if (!oldE)
6585 break;
6586 name = poolCopyString(newPool, oldE->name);
6587 if (!name)
6588 return 0;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006589 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006590 if (!newE)
6591 return 0;
6592 if (oldE->systemId) {
6593 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
6594 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006595 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006596 newE->systemId = tem;
6597 if (oldE->base) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006598 if (oldE->base == cachedOldBase)
6599 newE->base = cachedNewBase;
6600 else {
6601 cachedOldBase = oldE->base;
6602 tem = poolCopyString(newPool, cachedOldBase);
6603 if (!tem)
6604 return 0;
6605 cachedNewBase = newE->base = tem;
6606 }
6607 }
6608 if (oldE->publicId) {
6609 tem = poolCopyString(newPool, oldE->publicId);
6610 if (!tem)
6611 return 0;
6612 newE->publicId = tem;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006613 }
6614 }
6615 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006616 const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr,
6617 oldE->textLen);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006618 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006619 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006620 newE->textPtr = tem;
6621 newE->textLen = oldE->textLen;
6622 }
6623 if (oldE->notation) {
6624 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
6625 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006626 return 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006627 newE->notation = tem;
6628 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006629 newE->is_param = oldE->is_param;
6630 newE->is_internal = oldE->is_internal;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006631 }
6632 return 1;
6633}
6634
Fred Drake08317ae2003-10-21 15:38:55 +00006635#define INIT_POWER 6
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006636
Fred Drake08317ae2003-10-21 15:38:55 +00006637static XML_Bool FASTCALL
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006638keyeq(KEY s1, KEY s2)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006639{
6640 for (; *s1 == *s2; s1++, s2++)
6641 if (*s1 == 0)
Fred Drake08317ae2003-10-21 15:38:55 +00006642 return XML_TRUE;
6643 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006644}
6645
Victor Stinner5ff71322017-06-21 14:39:22 +02006646static size_t
6647keylen(KEY s)
6648{
6649 size_t len = 0;
6650 for (; *s; s++, len++);
6651 return len;
6652}
6653
6654static void
6655copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key)
6656{
6657 key->k[0] = 0;
6658 key->k[1] = get_hash_secret_salt(parser);
6659}
6660
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006661static unsigned long FASTCALL
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006662hash(XML_Parser parser, KEY s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006663{
Victor Stinner5ff71322017-06-21 14:39:22 +02006664 struct siphash state;
6665 struct sipkey key;
6666 (void)sip_tobin;
6667 (void)sip24_valid;
6668 copy_salt_to_sipkey(parser, &key);
6669 sip24_init(&state, &key);
6670 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
6671 return (unsigned long)sip24_final(&state);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006672}
6673
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006674static NAMED *
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006675lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006676{
6677 size_t i;
6678 if (table->size == 0) {
6679 size_t tsize;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006680 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006681 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006682 table->power = INIT_POWER;
6683 /* table->size is a power of 2 */
6684 table->size = (size_t)1 << INIT_POWER;
6685 tsize = table->size * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006686 table->v = (NAMED **)table->mem->malloc_fcn(tsize);
Fred Drake31d485c2004-08-03 07:06:22 +00006687 if (!table->v) {
6688 table->size = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006689 return NULL;
Fred Drake31d485c2004-08-03 07:06:22 +00006690 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006691 memset(table->v, 0, tsize);
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006692 i = hash(parser, name) & ((unsigned long)table->size - 1);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006693 }
6694 else {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006695 unsigned long h = hash(parser, name);
Fred Drake08317ae2003-10-21 15:38:55 +00006696 unsigned long mask = (unsigned long)table->size - 1;
6697 unsigned char step = 0;
6698 i = h & mask;
6699 while (table->v[i]) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006700 if (keyeq(name, table->v[i]->name))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006701 return table->v[i];
Fred Drake08317ae2003-10-21 15:38:55 +00006702 if (!step)
6703 step = PROBE_STEP(h, mask, table->power);
6704 i < step ? (i += table->size - step) : (i -= step);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006705 }
6706 if (!createSize)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006707 return NULL;
Fred Drake08317ae2003-10-21 15:38:55 +00006708
6709 /* check for overflow (table is half full) */
6710 if (table->used >> (table->power - 1)) {
6711 unsigned char newPower = table->power + 1;
6712 size_t newSize = (size_t)1 << newPower;
6713 unsigned long newMask = (unsigned long)newSize - 1;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006714 size_t tsize = newSize * sizeof(NAMED *);
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006715 NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006716 if (!newV)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006717 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006718 memset(newV, 0, tsize);
6719 for (i = 0; i < table->size; i++)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006720 if (table->v[i]) {
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07006721 unsigned long newHash = hash(parser, table->v[i]->name);
Fred Drake08317ae2003-10-21 15:38:55 +00006722 size_t j = newHash & newMask;
6723 step = 0;
6724 while (newV[j]) {
6725 if (!step)
6726 step = PROBE_STEP(newHash, newMask, newPower);
6727 j < step ? (j += newSize - step) : (j -= step);
6728 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006729 newV[j] = table->v[i];
6730 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006731 table->mem->free_fcn(table->v);
6732 table->v = newV;
Fred Drake08317ae2003-10-21 15:38:55 +00006733 table->power = newPower;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006734 table->size = newSize;
Fred Drake08317ae2003-10-21 15:38:55 +00006735 i = h & newMask;
6736 step = 0;
6737 while (table->v[i]) {
6738 if (!step)
6739 step = PROBE_STEP(h, newMask, newPower);
6740 i < step ? (i += newSize - step) : (i -= step);
6741 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006742 }
6743 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006744 table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006745 if (!table->v[i])
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006746 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006747 memset(table->v[i], 0, createSize);
6748 table->v[i]->name = name;
6749 (table->used)++;
6750 return table->v[i];
6751}
6752
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006753static void FASTCALL
6754hashTableClear(HASH_TABLE *table)
6755{
6756 size_t i;
6757 for (i = 0; i < table->size; i++) {
Fred Drake08317ae2003-10-21 15:38:55 +00006758 table->mem->free_fcn(table->v[i]);
6759 table->v[i] = NULL;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006760 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006761 table->used = 0;
6762}
6763
6764static void FASTCALL
6765hashTableDestroy(HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006766{
6767 size_t i;
Fred Drake08317ae2003-10-21 15:38:55 +00006768 for (i = 0; i < table->size; i++)
6769 table->mem->free_fcn(table->v[i]);
6770 table->mem->free_fcn(table->v);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006771}
6772
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006773static void FASTCALL
6774hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006775{
Fred Drake08317ae2003-10-21 15:38:55 +00006776 p->power = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006777 p->size = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006778 p->used = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006779 p->v = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006780 p->mem = ms;
6781}
6782
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006783static void FASTCALL
6784hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006785{
6786 iter->p = table->v;
6787 iter->end = iter->p + table->size;
6788}
6789
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006790static NAMED * FASTCALL
6791hashTableIterNext(HASH_TABLE_ITER *iter)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006792{
6793 while (iter->p != iter->end) {
6794 NAMED *tem = *(iter->p)++;
6795 if (tem)
6796 return tem;
6797 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006798 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006799}
6800
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006801static void FASTCALL
6802poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006803{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006804 pool->blocks = NULL;
6805 pool->freeBlocks = NULL;
6806 pool->start = NULL;
6807 pool->ptr = NULL;
6808 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006809 pool->mem = ms;
6810}
6811
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006812static void FASTCALL
6813poolClear(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006814{
6815 if (!pool->freeBlocks)
6816 pool->freeBlocks = pool->blocks;
6817 else {
6818 BLOCK *p = pool->blocks;
6819 while (p) {
6820 BLOCK *tem = p->next;
6821 p->next = pool->freeBlocks;
6822 pool->freeBlocks = p;
6823 p = tem;
6824 }
6825 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006826 pool->blocks = NULL;
6827 pool->start = NULL;
6828 pool->ptr = NULL;
6829 pool->end = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006830}
6831
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006832static void FASTCALL
6833poolDestroy(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006834{
6835 BLOCK *p = pool->blocks;
6836 while (p) {
6837 BLOCK *tem = p->next;
6838 pool->mem->free_fcn(p);
6839 p = tem;
6840 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006841 p = pool->freeBlocks;
6842 while (p) {
6843 BLOCK *tem = p->next;
6844 pool->mem->free_fcn(p);
6845 p = tem;
6846 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006847}
6848
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006849static XML_Char *
6850poolAppend(STRING_POOL *pool, const ENCODING *enc,
6851 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006852{
6853 if (!pool->ptr && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006854 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006855 for (;;) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006856 const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
6857 if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006858 break;
6859 if (!poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006860 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006861 }
6862 return pool->start;
6863}
6864
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006865static const XML_Char * FASTCALL
6866poolCopyString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006867{
6868 do {
6869 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006870 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006871 } while (*s++);
6872 s = pool->start;
6873 poolFinish(pool);
6874 return s;
6875}
6876
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006877static const XML_Char *
6878poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006879{
Victor Stinner93d0cb52017-08-18 23:43:54 +02006880 if (!pool->ptr && !poolGrow(pool)) {
6881 /* The following line is unreachable given the current usage of
6882 * poolCopyStringN(). Currently it is called from exactly one
6883 * place to copy the text of a simple general entity. By that
6884 * point, the name of the entity is already stored in the pool, so
6885 * pool->ptr cannot be NULL.
6886 *
6887 * If poolCopyStringN() is used elsewhere as it well might be,
6888 * this line may well become executable again. Regardless, this
6889 * sort of check shouldn't be removed lightly, so we just exclude
6890 * it from the coverage statistics.
6891 */
6892 return NULL; /* LCOV_EXCL_LINE */
6893 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006894 for (; n > 0; --n, s++) {
6895 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006896 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006897 }
6898 s = pool->start;
6899 poolFinish(pool);
6900 return s;
6901}
6902
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006903static const XML_Char * FASTCALL
6904poolAppendString(STRING_POOL *pool, const XML_Char *s)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006905{
6906 while (*s) {
6907 if (!poolAppendChar(pool, *s))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006908 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006909 s++;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006910 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006911 return pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006912}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006913
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006914static XML_Char *
6915poolStoreString(STRING_POOL *pool, const ENCODING *enc,
6916 const char *ptr, const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006917{
6918 if (!poolAppend(pool, enc, ptr, end))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006919 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006920 if (pool->ptr == pool->end && !poolGrow(pool))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006921 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006922 *(pool->ptr)++ = 0;
6923 return pool->start;
6924}
6925
Victor Stinner5ff71322017-06-21 14:39:22 +02006926static size_t
6927poolBytesToAllocateFor(int blockSize)
6928{
6929 /* Unprotected math would be:
6930 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
6931 **
6932 ** Detect overflow, avoiding _signed_ overflow undefined behavior
6933 ** For a + b * c we check b * c in isolation first, so that addition of a
6934 ** on top has no chance of making us accept a small non-negative number
6935 */
6936 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
6937
6938 if (blockSize <= 0)
6939 return 0;
6940
6941 if (blockSize > (int)(INT_MAX / stretch))
6942 return 0;
6943
6944 {
6945 const int stretchedBlockSize = blockSize * (int)stretch;
6946 const int bytesToAllocate = (int)(
6947 offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
6948 if (bytesToAllocate < 0)
6949 return 0;
6950
6951 return (size_t)bytesToAllocate;
6952 }
6953}
6954
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006955static XML_Bool FASTCALL
6956poolGrow(STRING_POOL *pool)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006957{
6958 if (pool->freeBlocks) {
6959 if (pool->start == 0) {
6960 pool->blocks = pool->freeBlocks;
6961 pool->freeBlocks = pool->freeBlocks->next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006962 pool->blocks->next = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006963 pool->start = pool->blocks->s;
6964 pool->end = pool->start + pool->blocks->size;
6965 pool->ptr = pool->start;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006966 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006967 }
6968 if (pool->end - pool->start < pool->freeBlocks->size) {
6969 BLOCK *tem = pool->freeBlocks->next;
6970 pool->freeBlocks->next = pool->blocks;
6971 pool->blocks = pool->freeBlocks;
6972 pool->freeBlocks = tem;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006973 memcpy(pool->blocks->s, pool->start,
6974 (pool->end - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006975 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
6976 pool->start = pool->blocks->s;
6977 pool->end = pool->start + pool->blocks->size;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00006978 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00006979 }
6980 }
6981 if (pool->blocks && pool->start == pool->blocks->s) {
Victor Stinner23ec4b52017-06-15 00:54:36 +02006982 BLOCK *temp;
6983 int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
Victor Stinner5ff71322017-06-21 14:39:22 +02006984 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02006985
Victor Stinner93d0cb52017-08-18 23:43:54 +02006986 // NOTE: Needs to be calculated prior to calling `realloc`
6987 // to avoid dangling pointers:
6988 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
6989
6990 if (blockSize < 0) {
6991 /* This condition traps a situation where either more than
6992 * INT_MAX/2 bytes have already been allocated. This isn't
6993 * readily testable, since it is unlikely that an average
6994 * machine will have that much memory, so we exclude it from the
6995 * coverage statistics.
6996 */
6997 return XML_FALSE; /* LCOV_EXCL_LINE */
6998 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02006999
Victor Stinner5ff71322017-06-21 14:39:22 +02007000 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7001 if (bytesToAllocate == 0)
7002 return XML_FALSE;
7003
Victor Stinner23ec4b52017-06-15 00:54:36 +02007004 temp = (BLOCK *)
Victor Stinner5ff71322017-06-21 14:39:22 +02007005 pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate);
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007006 if (temp == NULL)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007007 return XML_FALSE;
Gregory P. Smith7c6309c2012-07-14 14:12:35 -07007008 pool->blocks = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007009 pool->blocks->size = blockSize;
Victor Stinner93d0cb52017-08-18 23:43:54 +02007010 pool->ptr = pool->blocks->s + offsetInsideBlock;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007011 pool->start = pool->blocks->s;
7012 pool->end = pool->start + blockSize;
7013 }
7014 else {
7015 BLOCK *tem;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00007016 int blockSize = (int)(pool->end - pool->start);
Victor Stinner5ff71322017-06-21 14:39:22 +02007017 size_t bytesToAllocate;
Victor Stinner23ec4b52017-06-15 00:54:36 +02007018
Victor Stinner93d0cb52017-08-18 23:43:54 +02007019 if (blockSize < 0) {
7020 /* This condition traps a situation where either more than
7021 * INT_MAX bytes have already been allocated (which is prevented
7022 * by various pieces of program logic, not least this one, never
7023 * mind the unlikelihood of actually having that much memory) or
7024 * the pool control fields have been corrupted (which could
7025 * conceivably happen in an extremely buggy user handler
7026 * function). Either way it isn't readily testable, so we
7027 * exclude it from the coverage statistics.
7028 */
7029 return XML_FALSE; /* LCOV_EXCL_LINE */
7030 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02007031
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007032 if (blockSize < INIT_BLOCK_SIZE)
7033 blockSize = INIT_BLOCK_SIZE;
Victor Stinner5ff71322017-06-21 14:39:22 +02007034 else {
7035 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7036 if ((int)((unsigned)blockSize * 2U) < 0) {
7037 return XML_FALSE;
7038 }
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007039 blockSize *= 2;
Victor Stinner5ff71322017-06-21 14:39:22 +02007040 }
7041
7042 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7043 if (bytesToAllocate == 0)
7044 return XML_FALSE;
7045
7046 tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007047 if (!tem)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007048 return XML_FALSE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007049 tem->size = blockSize;
7050 tem->next = pool->blocks;
7051 pool->blocks = tem;
7052 if (pool->ptr != pool->start)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007053 memcpy(tem->s, pool->start,
7054 (pool->ptr - pool->start) * sizeof(XML_Char));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007055 pool->ptr = tem->s + (pool->ptr - pool->start);
7056 pool->start = tem->s;
7057 pool->end = tem->s + blockSize;
7058 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007059 return XML_TRUE;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007060}
7061
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007062static int FASTCALL
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007063nextScaffoldPart(XML_Parser parser)
7064{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007065 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007066 CONTENT_SCAFFOLD * me;
7067 int next;
7068
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007069 if (!dtd->scaffIndex) {
7070 dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int));
7071 if (!dtd->scaffIndex)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007072 return -1;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007073 dtd->scaffIndex[0] = 0;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007074 }
7075
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007076 if (dtd->scaffCount >= dtd->scaffSize) {
7077 CONTENT_SCAFFOLD *temp;
7078 if (dtd->scaffold) {
7079 temp = (CONTENT_SCAFFOLD *)
7080 REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7081 if (temp == NULL)
7082 return -1;
7083 dtd->scaffSize *= 2;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007084 }
7085 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007086 temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS
7087 * sizeof(CONTENT_SCAFFOLD));
7088 if (temp == NULL)
7089 return -1;
7090 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007091 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007092 dtd->scaffold = temp;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007093 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007094 next = dtd->scaffCount++;
7095 me = &dtd->scaffold[next];
7096 if (dtd->scaffLevel) {
7097 CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]];
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007098 if (parent->lastchild) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007099 dtd->scaffold[parent->lastchild].nextsib = next;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007100 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007101 if (!parent->childcnt)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007102 parent->firstchild = next;
7103 parent->lastchild = next;
7104 parent->childcnt++;
7105 }
7106 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7107 return next;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007108}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007109
7110static void
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007111build_node(XML_Parser parser,
7112 int src_node,
7113 XML_Content *dest,
7114 XML_Content **contpos,
7115 XML_Char **strpos)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007116{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007117 DTD * const dtd = _dtd; /* save one level of indirection */
7118 dest->type = dtd->scaffold[src_node].type;
7119 dest->quant = dtd->scaffold[src_node].quant;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007120 if (dest->type == XML_CTYPE_NAME) {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007121 const XML_Char *src;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007122 dest->name = *strpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007123 src = dtd->scaffold[src_node].name;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007124 for (;;) {
7125 *(*strpos)++ = *src;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007126 if (!*src)
7127 break;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007128 src++;
7129 }
7130 dest->numchildren = 0;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007131 dest->children = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007132 }
7133 else {
7134 unsigned int i;
7135 int cn;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007136 dest->numchildren = dtd->scaffold[src_node].childcnt;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007137 dest->children = *contpos;
7138 *contpos += dest->numchildren;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007139 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
7140 i < dest->numchildren;
7141 i++, cn = dtd->scaffold[cn].nextsib) {
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007142 build_node(parser, cn, &(dest->children[i]), contpos, strpos);
7143 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007144 dest->name = NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007145 }
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007146}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007147
7148static XML_Content *
7149build_model (XML_Parser parser)
7150{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007151 DTD * const dtd = _dtd; /* save one level of indirection */
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007152 XML_Content *ret;
7153 XML_Content *cpos;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007154 XML_Char * str;
7155 int allocsize = (dtd->scaffCount * sizeof(XML_Content)
7156 + (dtd->contentStringLen * sizeof(XML_Char)));
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007157
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007158 ret = (XML_Content *)MALLOC(allocsize);
7159 if (!ret)
7160 return NULL;
7161
7162 str = (XML_Char *) (&ret[dtd->scaffCount]);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007163 cpos = &ret[1];
7164
7165 build_node(parser, 0, ret, &cpos, &str);
7166 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007167}
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007168
7169static ELEMENT_TYPE *
7170getElementType(XML_Parser parser,
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007171 const ENCODING *enc,
7172 const char *ptr,
7173 const char *end)
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007174{
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007175 DTD * const dtd = _dtd; /* save one level of indirection */
7176 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007177 ELEMENT_TYPE *ret;
7178
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007179 if (!name)
7180 return NULL;
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07007181 ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE));
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007182 if (!ret)
7183 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007184 if (ret->name != name)
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007185 poolDiscard(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007186 else {
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007187 poolFinish(&dtd->pool);
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007188 if (!setElementTypePrefix(parser, ret))
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007189 return NULL;
Martin v. Löwis1dbb1ca2002-02-11 23:13:04 +00007190 }
7191 return ret;
Martin v. Löwisfc03a942003-01-25 22:41:29 +00007192}
Victor Stinner93d0cb52017-08-18 23:43:54 +02007193
7194static XML_Char *
7195copyString(const XML_Char *s,
7196 const XML_Memory_Handling_Suite *memsuite)
7197{
7198 int charsRequired = 0;
7199 XML_Char *result;
7200
7201 /* First determine how long the string is */
7202 while (s[charsRequired] != 0) {
7203 charsRequired++;
7204 }
7205 /* Include the terminator */
7206 charsRequired++;
7207
7208 /* Now allocate space for the copy */
7209 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
7210 if (result == NULL)
7211 return NULL;
7212 /* Copy the original into place */
7213 memcpy(result, s, charsRequired * sizeof(XML_Char));
7214 return result;
7215}