blob: 2e8be3706db9141cc740f708a0e3e915a1a97a99 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Serhiy Storchaka55f82492018-10-19 18:00:51 +03004#include "structmember.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
50static PyObject *ErrorObject;
51
52/* ----------------------------------------------------- */
53
54/* Declarations for objects of type xmlparser */
55
56typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000057 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058
Fred Drake0582df92000-07-12 04:49:00 +000059 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000060 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000062 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000063 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000064 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000068 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000069 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000070} xmlparseobject;
71
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030072#include "clinic/pyexpat.c.h"
73
Fred Drake2a3d7db2002-06-28 22:56:48 +000074#define CHARACTER_DATA_BUFFER_SIZE 8192
75
Jeremy Hylton938ace62002-07-17 16:30:39 +000076static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000077
Fred Drake117ac852002-09-24 16:24:54 +000078typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079typedef void* xmlhandler;
80
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000081struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000082 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030085 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000086};
87
Jeremy Hylton938ace62002-07-17 16:30:39 +000088static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089
Fred Drakebd6101c2001-02-14 18:29:45 +000090/* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +020094set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +000095{
Christian Heimes217cfd12007-12-02 14:31:20 +000096 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000097
Neal Norwitz2f5e9902006-03-08 06:36:45 +000098 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000100 return 0;
101 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000102 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000103 return 1;
104}
105
106/* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
Fred Drake85d835f2001-02-08 15:39:08 +0000109static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000110set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000111{
112 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100113 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000114 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000117
Victor Stinner499dfcf2011-03-21 13:26:24 +0100118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
Victor Stinner7bfb42d2016-12-05 17:04:32 +0100122 err = PyObject_CallFunctionObjArgs(ErrorObject, buffer, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100123 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000129 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000130 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000131 return NULL;
132}
133
Fred Drake71b63ff2002-06-28 22:29:01 +0000134static int
135have_handler(xmlparseobject *self, int type)
136{
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139}
140
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000141/* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
Fred Drake0582df92000-07-12 04:49:00 +0000144static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000145conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000146{
Fred Drake71b63ff2002-06-28 22:29:01 +0000147 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200151 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000152 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000154}
155
Fred Drake0582df92000-07-12 04:49:00 +0000156static PyObject *
157conv_string_len_to_unicode(const XML_Char *str, int len)
158{
Fred Drake71b63ff2002-06-28 22:29:01 +0000159 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200163 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000164 }
Fred Drake6f987622000-08-25 18:03:30 +0000165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000166}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000167
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000168/* Callback routines */
169
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000170static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000171
Martin v. Löwis069dde22003-01-21 10:58:18 +0000172/* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175static int
176error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181{
182 return 0;
183}
184
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185/* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193{
194 /* Do nothing. */
195}
196
Fred Drake6f987622000-08-25 18:03:30 +0000197static void
198flag_error(xmlparseobject *self)
199{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000200 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000203}
204
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000205static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200206call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000207 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200209 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000210
Fred Drakebd6101c2001-02-14 18:29:45 +0000211 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000212 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200213 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000214 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000215 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000217}
218
Fred Drakeb91a36b2002-06-27 19:40:48 +0000219static PyObject*
220string_intern(xmlparseobject *self, const char* str)
221{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000222 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000223 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000227 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200229 value = PyDict_GetItemWithError(self->intern, result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000230 if (!value) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200231 if (!PyErr_Occurred() &&
232 PyDict_SetItem(self->intern, result, result) == 0)
233 {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000234 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200235 }
Zackery Spytz68def052018-10-19 00:57:38 -0600236 else {
237 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000238 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600239 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000240 }
241 Py_INCREF(value);
242 Py_DECREF(result);
243 return value;
244}
245
Fred Drake2a3d7db2002-06-28 22:56:48 +0000246/* Return 0 on success, -1 on exception.
247 * flag_error() will be called before return if needed.
248 */
249static int
250call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
251{
252 PyObject *args;
253 PyObject *temp;
254
Georg Brandlc01537f2010-10-15 16:26:08 +0000255 if (!have_handler(self, CharacterData))
256 return -1;
257
Fred Drake2a3d7db2002-06-28 22:56:48 +0000258 args = PyTuple_New(1);
259 if (args == NULL)
260 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000261 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000262 if (temp == NULL) {
263 Py_DECREF(args);
264 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 XML_SetCharacterDataHandler(self->itself,
266 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000267 return -1;
268 }
269 PyTuple_SET_ITEM(args, 0, temp);
270 /* temp is now a borrowed reference; consider it unused. */
271 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200272 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000273 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000274 /* temp is an owned reference again, or NULL */
275 self->in_callback = 0;
276 Py_DECREF(args);
277 if (temp == NULL) {
278 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000279 XML_SetCharacterDataHandler(self->itself,
280 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000281 return -1;
282 }
283 Py_DECREF(temp);
284 return 0;
285}
286
287static int
288flush_character_buffer(xmlparseobject *self)
289{
290 int rc;
291 if (self->buffer == NULL || self->buffer_used == 0)
292 return 0;
293 rc = call_character_handler(self, self->buffer, self->buffer_used);
294 self->buffer_used = 0;
295 return rc;
296}
297
298static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000300{
301 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200302
303 if (PyErr_Occurred())
304 return;
305
Fred Drake2a3d7db2002-06-28 22:56:48 +0000306 if (self->buffer == NULL)
307 call_character_handler(self, data, len);
308 else {
309 if ((self->buffer_used + len) > self->buffer_size) {
310 if (flush_character_buffer(self) < 0)
311 return;
312 /* handler might have changed; drop the rest on the floor
313 * if there isn't a handler anymore
314 */
315 if (!have_handler(self, CharacterData))
316 return;
317 }
318 if (len > self->buffer_size) {
319 call_character_handler(self, data, len);
320 self->buffer_used = 0;
321 }
322 else {
323 memcpy(self->buffer + self->buffer_used,
324 data, len * sizeof(XML_Char));
325 self->buffer_used += len;
326 }
327 }
328}
329
Fred Drake85d835f2001-02-08 15:39:08 +0000330static void
331my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000332 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000333{
334 xmlparseobject *self = (xmlparseobject *)userData;
335
Fred Drake71b63ff2002-06-28 22:29:01 +0000336 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000337 PyObject *container, *rv, *args;
338 int i, max;
339
Victor Stinner9e09c262013-07-18 23:17:01 +0200340 if (PyErr_Occurred())
341 return;
342
Fred Drake2a3d7db2002-06-28 22:56:48 +0000343 if (flush_character_buffer(self) < 0)
344 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000345 /* Set max to the number of slots filled in atts[]; max/2 is
346 * the number of attributes we need to process.
347 */
348 if (self->specified_attributes) {
349 max = XML_GetSpecifiedAttributeCount(self->itself);
350 }
351 else {
352 max = 0;
353 while (atts[max] != NULL)
354 max += 2;
355 }
356 /* Build the container. */
357 if (self->ordered_attributes)
358 container = PyList_New(max);
359 else
360 container = PyDict_New();
361 if (container == NULL) {
362 flag_error(self);
363 return;
364 }
365 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000366 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000367 PyObject *v;
368 if (n == NULL) {
369 flag_error(self);
370 Py_DECREF(container);
371 return;
372 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000373 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000374 if (v == NULL) {
375 flag_error(self);
376 Py_DECREF(container);
377 Py_DECREF(n);
378 return;
379 }
380 if (self->ordered_attributes) {
381 PyList_SET_ITEM(container, i, n);
382 PyList_SET_ITEM(container, i+1, v);
383 }
384 else if (PyDict_SetItem(container, n, v)) {
385 flag_error(self);
386 Py_DECREF(n);
387 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600388 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000389 return;
390 }
391 else {
392 Py_DECREF(n);
393 Py_DECREF(v);
394 }
395 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000396 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000397 if (args == NULL) {
398 Py_DECREF(container);
399 return;
400 }
Zackery Spytz68def052018-10-19 00:57:38 -0600401 args = Py_BuildValue("(NN)", args, container);
402 if (args == NULL) {
403 return;
404 }
Fred Drake85d835f2001-02-08 15:39:08 +0000405 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000406 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200407 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000408 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000409 self->in_callback = 0;
410 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000411 if (rv == NULL) {
412 flag_error(self);
413 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000414 }
Fred Drake85d835f2001-02-08 15:39:08 +0000415 Py_DECREF(rv);
416 }
417}
418
419#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
420 RETURN, GETUSERDATA) \
421static RC \
422my_##NAME##Handler PARAMS {\
423 xmlparseobject *self = GETUSERDATA ; \
424 PyObject *args = NULL; \
425 PyObject *rv = NULL; \
426 INIT \
427\
Fred Drake71b63ff2002-06-28 22:29:01 +0000428 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200429 if (PyErr_Occurred()) \
430 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000431 if (flush_character_buffer(self) < 0) \
432 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000433 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000434 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000435 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200436 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000437 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000438 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000439 Py_DECREF(args); \
440 if (rv == NULL) { \
441 flag_error(self); \
442 return RETURN; \
443 } \
444 CONVERSION \
445 Py_DECREF(rv); \
446 } \
447 return RETURN; \
448}
449
Fred Drake6f987622000-08-25 18:03:30 +0000450#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
452 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000453
Fred Drake6f987622000-08-25 18:03:30 +0000454#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
456 rc = PyLong_AsLong(rv);, rc, \
457 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000458
Fred Drake71b63ff2002-06-28 22:29:01 +0000459VOID_HANDLER(EndElement,
460 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000461 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000462
Fred Drake6f987622000-08-25 18:03:30 +0000463VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000464 (void *userData,
465 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000466 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000467 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000468
Fred Drake6f987622000-08-25 18:03:30 +0000469VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000470 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000471 const XML_Char *entityName,
472 const XML_Char *base,
473 const XML_Char *systemId,
474 const XML_Char *publicId,
475 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000476 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000477 string_intern(self, entityName), string_intern(self, base),
478 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000479 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000480
Fred Drake85d835f2001-02-08 15:39:08 +0000481VOID_HANDLER(EntityDecl,
482 (void *userData,
483 const XML_Char *entityName,
484 int is_parameter_entity,
485 const XML_Char *value,
486 int value_length,
487 const XML_Char *base,
488 const XML_Char *systemId,
489 const XML_Char *publicId,
490 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000491 ("NiNNNNN",
492 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000493 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000494 string_intern(self, base), string_intern(self, systemId),
495 string_intern(self, publicId),
496 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000497
498VOID_HANDLER(XmlDecl,
499 (void *userData,
500 const XML_Char *version,
501 const XML_Char *encoding,
502 int standalone),
503 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000504 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000505 standalone))
506
507static PyObject *
508conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000509 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000510{
511 PyObject *result = NULL;
512 PyObject *children = PyTuple_New(model->numchildren);
513 int i;
514
515 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000516 assert(model->numchildren < INT_MAX);
517 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000518 PyObject *child = conv_content_model(&model->children[i],
519 conv_string);
520 if (child == NULL) {
521 Py_XDECREF(children);
522 return NULL;
523 }
524 PyTuple_SET_ITEM(children, i, child);
525 }
526 result = Py_BuildValue("(iiO&N)",
527 model->type, model->quant,
528 conv_string,model->name, children);
529 }
530 return result;
531}
532
Fred Drake06dd8cf2003-02-02 03:54:17 +0000533static void
534my_ElementDeclHandler(void *userData,
535 const XML_Char *name,
536 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000537{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000538 xmlparseobject *self = (xmlparseobject *)userData;
539 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000540
Fred Drake06dd8cf2003-02-02 03:54:17 +0000541 if (have_handler(self, ElementDecl)) {
542 PyObject *rv = NULL;
543 PyObject *modelobj, *nameobj;
544
Victor Stinner9e09c262013-07-18 23:17:01 +0200545 if (PyErr_Occurred())
546 return;
547
Fred Drake06dd8cf2003-02-02 03:54:17 +0000548 if (flush_character_buffer(self) < 0)
549 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000550 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000551 if (modelobj == NULL) {
552 flag_error(self);
553 goto finally;
554 }
555 nameobj = string_intern(self, name);
556 if (nameobj == NULL) {
557 Py_DECREF(modelobj);
558 flag_error(self);
559 goto finally;
560 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000561 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000562 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000563 flag_error(self);
564 goto finally;
565 }
566 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200567 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000568 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000569 self->in_callback = 0;
570 if (rv == NULL) {
571 flag_error(self);
572 goto finally;
573 }
574 Py_DECREF(rv);
575 }
576 finally:
577 Py_XDECREF(args);
578 XML_FreeContentModel(self->itself, model);
579 return;
580}
Fred Drake85d835f2001-02-08 15:39:08 +0000581
582VOID_HANDLER(AttlistDecl,
583 (void *userData,
584 const XML_Char *elname,
585 const XML_Char *attname,
586 const XML_Char *att_type,
587 const XML_Char *dflt,
588 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000589 ("(NNO&O&i)",
590 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000591 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000592 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000593
Martin v. Löwisc847f402003-01-21 11:09:21 +0000594#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000595VOID_HANDLER(SkippedEntity,
596 (void *userData,
597 const XML_Char *entityName,
598 int is_parameter_entity),
599 ("Ni",
600 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000601#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000602
Fred Drake71b63ff2002-06-28 22:29:01 +0000603VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 (void *userData,
605 const XML_Char *notationName,
606 const XML_Char *base,
607 const XML_Char *systemId,
608 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000609 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 string_intern(self, notationName), string_intern(self, base),
611 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000612
Fred Drake6f987622000-08-25 18:03:30 +0000613VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 (void *userData,
615 const XML_Char *prefix,
616 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000617 ("(NN)",
618 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000619
Fred Drake6f987622000-08-25 18:03:30 +0000620VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 (void *userData,
622 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000623 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000624
Fred Drake6f987622000-08-25 18:03:30 +0000625VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000627 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000628
Fred Drake6f987622000-08-25 18:03:30 +0000629VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000630 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000632
Fred Drake6f987622000-08-25 18:03:30 +0000633VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000634 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000636
Fred Drake6f987622000-08-25 18:03:30 +0000637VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 (void *userData, const XML_Char *s, int len),
639 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000640
Fred Drake6f987622000-08-25 18:03:30 +0000641VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 (void *userData, const XML_Char *s, int len),
643 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300644#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000645
Fred Drake71b63ff2002-06-28 22:29:01 +0000646INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 (void *userData),
648 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000649
Fred Drake6f987622000-08-25 18:03:30 +0000650RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 (XML_Parser parser,
652 const XML_Char *context,
653 const XML_Char *base,
654 const XML_Char *systemId,
655 const XML_Char *publicId),
656 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000657 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 conv_string_to_unicode ,context, string_intern(self, base),
659 string_intern(self, systemId), string_intern(self, publicId)),
660 rc = PyLong_AsLong(rv);, rc,
661 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000662
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000663/* XXX UnknownEncodingHandler */
664
Fred Drake85d835f2001-02-08 15:39:08 +0000665VOID_HANDLER(StartDoctypeDecl,
666 (void *userData, const XML_Char *doctypeName,
667 const XML_Char *sysid, const XML_Char *pubid,
668 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000669 ("(NNNi)", string_intern(self, doctypeName),
670 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000671 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000672
673VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000674
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000675/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400676/*[clinic input]
677class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
678[clinic start generated code]*/
679/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
680
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000681
Fred Drake71b63ff2002-06-28 22:29:01 +0000682static PyObject *
683get_parse_result(xmlparseobject *self, int rv)
684{
685 if (PyErr_Occurred()) {
686 return NULL;
687 }
688 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000689 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000690 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000691 if (flush_character_buffer(self) < 0) {
692 return NULL;
693 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000694 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000695}
696
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200697#define MAX_CHUNK_SIZE (1 << 20)
698
Brett Cannond0aeda82014-08-22 14:23:20 -0400699/*[clinic input]
700pyexpat.xmlparser.Parse
701
702 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200703 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400704 /
705
706Parse XML data.
707
708`isfinal' should be true at end of input.
709[clinic start generated code]*/
710
Brett Cannond0aeda82014-08-22 14:23:20 -0400711static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400712pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300713 int isfinal)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200714/*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400715{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200716 const char *s;
717 Py_ssize_t slen;
718 Py_buffer view;
719 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200721 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200722 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200723 s = PyUnicode_AsUTF8AndSize(data, &slen);
724 if (s == NULL)
725 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200726 /* Explicitly set UTF-8 encoding. Return code ignored. */
727 (void)XML_SetEncoding(self->itself, "utf-8");
728 }
729 else {
730 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
731 return NULL;
732 s = view.buf;
733 slen = view.len;
734 }
735
736 while (slen > MAX_CHUNK_SIZE) {
737 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
738 if (!rc)
739 goto done;
740 s += MAX_CHUNK_SIZE;
741 slen -= MAX_CHUNK_SIZE;
742 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200743 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
744 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300745 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200746
747done:
748 if (view.buf != NULL)
749 PyBuffer_Release(&view);
750 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000751}
752
Fred Drakeca1f4262000-09-21 20:10:23 +0000753/* File reading copied from cPickle */
754
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000755#define BUF_SIZE 2048
756
Fred Drake0582df92000-07-12 04:49:00 +0000757static int
758readinst(char *buf, int buf_size, PyObject *meth)
759{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000760 PyObject *str;
761 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200762 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000764 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000765 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000766 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(str))
769 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000770 else if (PyByteArray_Check(str))
771 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000773 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000774 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000775 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000776 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000777 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000778 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000779 if (len > buf_size) {
780 PyErr_Format(PyExc_ValueError,
781 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000782 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000783 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000784 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000785 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000787 Py_DECREF(str);
788 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000789 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000790
791error:
792 Py_XDECREF(str);
793 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000794}
795
Brett Cannond0aeda82014-08-22 14:23:20 -0400796/*[clinic input]
797pyexpat.xmlparser.ParseFile
798
799 file: object
800 /
801
802Parse XML data from file-like object.
803[clinic start generated code]*/
804
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805static PyObject *
Brett Cannond0aeda82014-08-22 14:23:20 -0400806pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300807/*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000808{
Fred Drake0582df92000-07-12 04:49:00 +0000809 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000810 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200811 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000812
Brett Cannond0aeda82014-08-22 14:23:20 -0400813 readmethod = _PyObject_GetAttrId(file, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000814 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000815 PyErr_SetString(PyExc_TypeError,
816 "argument must have 'read' attribute");
817 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000818 }
819 for (;;) {
820 int bytes_read;
821 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000822 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000823 Py_XDECREF(readmethod);
Ned Deilye7d532f2014-03-27 16:39:58 -0700824 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000825 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000826
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000827 bytes_read = readinst(buf, BUF_SIZE, readmethod);
828 if (bytes_read < 0) {
829 Py_DECREF(readmethod);
830 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000831 }
832 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000833 if (PyErr_Occurred()) {
834 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000835 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000836 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000837
Fred Drake0582df92000-07-12 04:49:00 +0000838 if (!rv || bytes_read == 0)
839 break;
840 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000841 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000842 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000843}
844
Brett Cannond0aeda82014-08-22 14:23:20 -0400845/*[clinic input]
846pyexpat.xmlparser.SetBase
847
848 base: str
849 /
850
851Set the base URL for the parser.
852[clinic start generated code]*/
853
Brett Cannond0aeda82014-08-22 14:23:20 -0400854static PyObject *
855pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300856/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400857{
Fred Drake0582df92000-07-12 04:49:00 +0000858 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000860 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400861 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000862}
863
Brett Cannond0aeda82014-08-22 14:23:20 -0400864/*[clinic input]
865pyexpat.xmlparser.GetBase
866
867Return base URL string for the parser.
868[clinic start generated code]*/
869
Brett Cannond0aeda82014-08-22 14:23:20 -0400870static PyObject *
871pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300872/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000873{
Fred Drake0582df92000-07-12 04:49:00 +0000874 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000875}
876
Brett Cannond0aeda82014-08-22 14:23:20 -0400877/*[clinic input]
878pyexpat.xmlparser.GetInputContext
879
880Return the untranslated text of the input that caused the current event.
881
882If the event was generated by a large amount of text (such as a start tag
883for an element with many attributes), not all of the text may be available.
884[clinic start generated code]*/
885
Brett Cannond0aeda82014-08-22 14:23:20 -0400886static PyObject *
887pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300888/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000889{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000890 if (self->in_callback) {
891 int offset, size;
892 const char *buffer
893 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000894
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000895 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000896 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000897 size - offset);
898 else
899 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000900 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000901 else
902 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000903}
Fred Drakebd6101c2001-02-14 18:29:45 +0000904
Brett Cannond0aeda82014-08-22 14:23:20 -0400905/*[clinic input]
906pyexpat.xmlparser.ExternalEntityParserCreate
907
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700908 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400909 encoding: str = NULL
910 /
911
912Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
913[clinic start generated code]*/
914
Brett Cannond0aeda82014-08-22 14:23:20 -0400915static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400916pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
917 const char *context,
918 const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700919/*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400920{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000921 xmlparseobject *new_parser;
922 int i;
923
Martin v. Löwis894258c2001-09-23 10:20:10 +0000924 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000925 if (new_parser == NULL)
926 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000927 new_parser->buffer_size = self->buffer_size;
928 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000929 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000930 new_parser->ordered_attributes = self->ordered_attributes;
931 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000932 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000933 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000934 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000936 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000937 new_parser->intern = self->intern;
938 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000939 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000940
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000941 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200942 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000943 if (new_parser->buffer == NULL) {
944 Py_DECREF(new_parser);
945 return PyErr_NoMemory();
946 }
947 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000948 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000949 Py_DECREF(new_parser);
950 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000951 }
952
953 XML_SetUserData(new_parser->itself, (void *)new_parser);
954
955 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000956 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000957 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000958
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200959 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000960 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000961 Py_DECREF(new_parser);
962 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000963 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000964 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000965
966 /* then copy handlers from self */
967 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000968 PyObject *handler = self->handlers[i];
969 if (handler != NULL) {
970 Py_INCREF(handler);
971 new_parser->handlers[i] = handler;
972 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000973 handler_info[i].handler);
974 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000975 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000976 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000977}
978
Brett Cannond0aeda82014-08-22 14:23:20 -0400979/*[clinic input]
980pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000981
Brett Cannond0aeda82014-08-22 14:23:20 -0400982 flag: int
983 /
984
985Controls parsing of parameter entities (including the external DTD subset).
986
987Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
988XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
989XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
990was successful.
991[clinic start generated code]*/
992
Brett Cannond0aeda82014-08-22 14:23:20 -0400993static PyObject *
994pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300995/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400996{
997 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +0000998 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000999}
1000
Martin v. Löwisc847f402003-01-21 11:09:21 +00001001
1002#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001003/*[clinic input]
1004pyexpat.xmlparser.UseForeignDTD
1005
1006 flag: bool = True
1007 /
1008
1009Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1010
1011This readily allows the use of a 'default' document type controlled by the
1012application, while still getting the advantage of providing document type
1013information to the parser. 'flag' defaults to True if not provided.
1014[clinic start generated code]*/
1015
Brett Cannond0aeda82014-08-22 14:23:20 -04001016static PyObject *
1017pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001018/*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001019{
Martin v. Löwis069dde22003-01-21 10:58:18 +00001020 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001021
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001022 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001023 if (rc != XML_ERROR_NONE) {
1024 return set_error(self, rc);
1025 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001026 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001027}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001028#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001029
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001030static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001031 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1032 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1033 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1034 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1035 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1036 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1037 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001038#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001039 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001040#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001041 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001042};
1043
1044/* ---------- */
1045
1046
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001047
Fred Drake71b63ff2002-06-28 22:29:01 +00001048/* pyexpat international encoding support.
1049 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001050*/
1051
Fred Drake71b63ff2002-06-28 22:29:01 +00001052static int
1053PyUnknownEncodingHandler(void *encodingHandlerData,
1054 const XML_Char *name,
1055 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001056{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001057 static unsigned char template_buffer[256] = {0};
1058 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001059 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001060 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001061 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001062
Victor Stinner9e09c262013-07-18 23:17:01 +02001063 if (PyErr_Occurred())
1064 return XML_STATUS_ERROR;
1065
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001066 if (template_buffer[1] == 0) {
1067 for (i = 0; i < 256; i++)
1068 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001069 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001070
1071 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001072 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001073 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001074 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001075 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001076
1077 if (PyUnicode_GET_LENGTH(u) != 256) {
1078 Py_DECREF(u);
1079 PyErr_SetString(PyExc_ValueError,
1080 "multi-byte encodings are not supported");
1081 return XML_STATUS_ERROR;
1082 }
1083
1084 kind = PyUnicode_KIND(u);
1085 data = PyUnicode_DATA(u);
1086 for (i = 0; i < 256; i++) {
1087 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1088 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1089 info->map[i] = ch;
1090 else
1091 info->map[i] = -1;
1092 }
1093
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001094 info->data = NULL;
1095 info->convert = NULL;
1096 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001097 Py_DECREF(u);
1098
1099 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001100}
1101
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001102
1103static PyObject *
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001104newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001105{
1106 int i;
1107 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001108
Martin v. Löwis894258c2001-09-23 10:20:10 +00001109 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001110 if (self == NULL)
1111 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001112
Fred Drake2a3d7db2002-06-28 22:56:48 +00001113 self->buffer = NULL;
1114 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1115 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001116 self->ordered_attributes = 0;
1117 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001118 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001119 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001120 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001121 self->intern = intern;
1122 Py_XINCREF(self->intern);
1123 PyObject_GC_Track(self);
1124
Christian Heimesfa535f52013-07-07 17:35:11 +02001125 /* namespace_separator is either NULL or contains one char + \0 */
1126 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1127 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001128 if (self->itself == NULL) {
1129 PyErr_SetString(PyExc_RuntimeError,
1130 "XML_ParserCreate failed");
1131 Py_DECREF(self);
1132 return NULL;
1133 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001134#if XML_COMBINED_VERSION >= 20100
1135 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001136 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001137 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001138#endif
Fred Drake0582df92000-07-12 04:49:00 +00001139 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001140 XML_SetUnknownEncodingHandler(self->itself,
1141 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001142
Fred Drake2a3d7db2002-06-28 22:56:48 +00001143 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001144 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001145
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001146 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001147 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001148 Py_DECREF(self);
1149 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001150 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001151 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001152
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001153 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001154}
1155
1156
1157static void
Fred Drake0582df92000-07-12 04:49:00 +00001158xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001159{
Fred Drake0582df92000-07-12 04:49:00 +00001160 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001161 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001162 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001163 XML_ParserFree(self->itself);
1164 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001165
Fred Drake85d835f2001-02-08 15:39:08 +00001166 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001167 for (i = 0; handler_info[i].name != NULL; i++)
1168 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001169 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001170 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001171 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001172 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001173 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001174 self->buffer = NULL;
1175 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001176 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001177 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001178}
1179
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001180
1181static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001182xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001183{
Victor Stinner28f468c2018-11-22 13:21:43 +01001184 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1185 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001186 PyObject *result = self->handlers[handlernum];
1187 if (result == NULL)
1188 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001189 Py_INCREF(result);
1190 return result;
1191}
1192
Fred Drake6f987622000-08-25 18:03:30 +00001193static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001194xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001195{
Victor Stinner28f468c2018-11-22 13:21:43 +01001196 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1197 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001198 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001199 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1200 return -1;
1201 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001202 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001203 /* If we're changing the character data handler, flush all
1204 * cached data with the old handler. Not sure there's a
1205 * "right" thing to do, though, but this probably won't
1206 * happen.
1207 */
1208 if (flush_character_buffer(self) < 0)
1209 return -1;
1210 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001211
1212 xmlhandler c_handler = NULL;
1213 if (v == Py_None) {
1214 /* If this is the character data handler, and a character
1215 data handler is already active, we need to be more
1216 careful. What we can safely do is replace the existing
1217 character data handler callback function with a no-op
1218 function that will refuse to call Python. The downside
1219 is that this doesn't completely remove the character
1220 data handler from the C layer if there's any callback
1221 active, so Expat does a little more work than it
1222 otherwise would, but that's really an odd case. A more
1223 elaborate system of handlers and state could remove the
1224 C handler more effectively. */
1225 if (handlernum == CharacterData && self->in_callback)
1226 c_handler = noop_character_data_handler;
1227 v = NULL;
1228 }
1229 else if (v != NULL) {
1230 Py_INCREF(v);
1231 c_handler = handler_info[handlernum].handler;
1232 }
1233 Py_XSETREF(self->handlers[handlernum], v);
1234 handler_info[handlernum].setter(self->itself, c_handler);
1235 return 0;
1236}
1237
1238#define INT_GETTER(name) \
1239 static PyObject * \
1240 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1241 { \
1242 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1243 }
1244INT_GETTER(ErrorCode)
1245INT_GETTER(ErrorLineNumber)
1246INT_GETTER(ErrorColumnNumber)
1247INT_GETTER(ErrorByteIndex)
1248INT_GETTER(CurrentLineNumber)
1249INT_GETTER(CurrentColumnNumber)
1250INT_GETTER(CurrentByteIndex)
1251
1252#undef INT_GETTER
1253
1254static PyObject *
1255xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1256{
1257 return PyBool_FromLong(self->buffer != NULL);
1258}
1259
1260static int
1261xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1262{
1263 if (v == NULL) {
1264 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1265 return -1;
1266 }
1267 int b = PyObject_IsTrue(v);
1268 if (b < 0)
1269 return -1;
1270 if (b) {
1271 if (self->buffer == NULL) {
1272 self->buffer = PyMem_Malloc(self->buffer_size);
1273 if (self->buffer == NULL) {
1274 PyErr_NoMemory();
1275 return -1;
1276 }
1277 self->buffer_used = 0;
1278 }
1279 }
1280 else if (self->buffer != NULL) {
1281 if (flush_character_buffer(self) < 0)
1282 return -1;
1283 PyMem_Free(self->buffer);
1284 self->buffer = NULL;
1285 }
1286 return 0;
1287}
1288
1289static PyObject *
1290xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1291{
1292 return PyLong_FromLong((long) self->buffer_size);
1293}
1294
1295static int
1296xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1297{
1298 if (v == NULL) {
1299 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1300 return -1;
1301 }
1302 long new_buffer_size;
1303 if (!PyLong_Check(v)) {
1304 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1305 return -1;
1306 }
1307
1308 new_buffer_size = PyLong_AsLong(v);
1309 if (new_buffer_size <= 0) {
1310 if (!PyErr_Occurred())
1311 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1312 return -1;
1313 }
1314
1315 /* trivial case -- no change */
1316 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001317 return 0;
1318 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001319
1320 /* check maximum */
1321 if (new_buffer_size > INT_MAX) {
1322 char errmsg[100];
1323 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1324 PyErr_SetString(PyExc_ValueError, errmsg);
1325 return -1;
1326 }
1327
1328 if (self->buffer != NULL) {
1329 /* there is already a buffer */
1330 if (self->buffer_used != 0) {
1331 if (flush_character_buffer(self) < 0) {
1332 return -1;
1333 }
1334 }
1335 /* free existing buffer */
1336 PyMem_Free(self->buffer);
1337 }
1338 self->buffer = PyMem_Malloc(new_buffer_size);
1339 if (self->buffer == NULL) {
1340 PyErr_NoMemory();
1341 return -1;
1342 }
1343 self->buffer_size = new_buffer_size;
1344 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001345}
1346
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001347static PyObject *
1348xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1349{
1350 return PyLong_FromLong((long) self->buffer_used);
1351}
1352
1353static PyObject *
1354xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1355{
1356 return PyBool_FromLong(self->ns_prefixes);
1357}
1358
1359static int
1360xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1361{
1362 if (v == NULL) {
1363 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1364 return -1;
1365 }
1366 int b = PyObject_IsTrue(v);
1367 if (b < 0)
1368 return -1;
1369 self->ns_prefixes = b;
1370 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1371 return 0;
1372}
1373
1374static PyObject *
1375xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1376{
1377 return PyBool_FromLong(self->ordered_attributes);
1378}
1379
1380static int
1381xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1382{
1383 if (v == NULL) {
1384 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1385 return -1;
1386 }
1387 int b = PyObject_IsTrue(v);
1388 if (b < 0)
1389 return -1;
1390 self->ordered_attributes = b;
1391 return 0;
1392}
1393
1394static PyObject *
1395xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1396{
1397 return PyBool_FromLong((long) self->specified_attributes);
1398}
1399
1400static int
1401xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1402{
1403 if (v == NULL) {
1404 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1405 return -1;
1406 }
1407 int b = PyObject_IsTrue(v);
1408 if (b < 0)
1409 return -1;
1410 self->specified_attributes = b;
1411 return 0;
1412}
1413
1414static PyMemberDef xmlparse_members[] = {
1415 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1416 {NULL}
1417};
1418
1419#define XMLPARSE_GETTER_DEF(name) \
1420 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1421#define XMLPARSE_GETTER_SETTER_DEF(name) \
1422 {#name, (getter)xmlparse_##name##_getter, \
1423 (setter)xmlparse_##name##_setter, NULL},
1424
1425static PyGetSetDef xmlparse_getsetlist[] = {
1426 XMLPARSE_GETTER_DEF(ErrorCode)
1427 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1428 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1429 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1430 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1431 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1432 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1433 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1434 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1435 XMLPARSE_GETTER_DEF(buffer_used)
1436 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1437 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1438 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1439 {NULL},
1440};
1441
1442#undef XMLPARSE_GETTER_DEF
1443#undef XMLPARSE_GETTER_SETTER_DEF
1444
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001445static int
1446xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1447{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001448 int i;
1449 for (i = 0; handler_info[i].name != NULL; i++)
1450 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001451 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001452}
1453
1454static int
1455xmlparse_clear(xmlparseobject *op)
1456{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001457 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001458 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001459 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001460}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001461
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001462PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001463
1464static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 PyVarObject_HEAD_INIT(NULL, 0)
1466 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001467 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 0, /*tp_itemsize*/
1469 /* methods */
1470 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1471 (printfunc)0, /*tp_print*/
1472 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001473 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 0, /*tp_reserved*/
1475 (reprfunc)0, /*tp_repr*/
1476 0, /*tp_as_number*/
1477 0, /*tp_as_sequence*/
1478 0, /*tp_as_mapping*/
1479 (hashfunc)0, /*tp_hash*/
1480 (ternaryfunc)0, /*tp_call*/
1481 (reprfunc)0, /*tp_str*/
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001482 (getattrofunc)0, /* tp_getattro */
1483 (setattrofunc)0, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1487 (traverseproc)xmlparse_traverse, /* tp_traverse */
1488 (inquiry)xmlparse_clear, /* tp_clear */
1489 0, /* tp_richcompare */
1490 0, /* tp_weaklistoffset */
1491 0, /* tp_iter */
1492 0, /* tp_iternext */
1493 xmlparse_methods, /* tp_methods */
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001494 xmlparse_members, /* tp_members */
1495 xmlparse_getsetlist, /* tp_getset */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001496};
1497
1498/* End of code for xmlparser objects */
1499/* -------------------------------------------------------- */
1500
Brett Cannond0aeda82014-08-22 14:23:20 -04001501/*[clinic input]
1502pyexpat.ParserCreate
1503
Larry Hastingsdbfdc382015-05-04 06:59:46 -07001504 encoding: str(accept={str, NoneType}) = NULL
1505 namespace_separator: str(accept={str, NoneType}) = NULL
Brett Cannond0aeda82014-08-22 14:23:20 -04001506 intern: object = NULL
1507
1508Return a new XML parser object.
1509[clinic start generated code]*/
1510
Brett Cannond0aeda82014-08-22 14:23:20 -04001511static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001512pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001513 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001514/*[clinic end generated code: output=295c0cf01ab1146c input=23d29704acad385d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001515{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001516 PyObject *result;
1517 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001518
Fred Drakecde79132001-04-25 16:01:30 +00001519 if (namespace_separator != NULL
1520 && strlen(namespace_separator) > 1) {
1521 PyErr_SetString(PyExc_ValueError,
1522 "namespace_separator must be at most one"
1523 " character, omitted, or None");
1524 return NULL;
1525 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001526 /* Explicitly passing None means no interning is desired.
1527 Not passing anything means that a new dictionary is used. */
1528 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001530 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 intern = PyDict_New();
1532 if (!intern)
1533 return NULL;
1534 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001535 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001536 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1538 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001539 }
1540
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001541 result = newxmlparseobject(encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001542 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001544 }
1545 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001546}
1547
Brett Cannond0aeda82014-08-22 14:23:20 -04001548/*[clinic input]
1549pyexpat.ErrorString
1550
1551 code: long
1552 /
1553
1554Returns string error for given number.
1555[clinic start generated code]*/
1556
Brett Cannond0aeda82014-08-22 14:23:20 -04001557static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001558pyexpat_ErrorString_impl(PyObject *module, long code)
1559/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001560{
Fred Drake0582df92000-07-12 04:49:00 +00001561 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001562}
1563
1564/* List of methods defined in the module */
1565
1566static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001567 PYEXPAT_PARSERCREATE_METHODDEF
1568 PYEXPAT_ERRORSTRING_METHODDEF
1569 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001570};
1571
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001572/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001573
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001574PyDoc_STRVAR(pyexpat_module_documentation,
1575"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001576
Fred Drakecde79132001-04-25 16:01:30 +00001577/* Initialization function for the module */
1578
1579#ifndef MODULE_NAME
1580#define MODULE_NAME "pyexpat"
1581#endif
1582
1583#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001584#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001585#endif
1586
Martin v. Löwis1a214512008-06-11 05:26:20 +00001587static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 PyModuleDef_HEAD_INIT,
1589 MODULE_NAME,
1590 pyexpat_module_documentation,
1591 -1,
1592 pyexpat_methods,
1593 NULL,
1594 NULL,
1595 NULL,
1596 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001597};
1598
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001599static int init_handler_descrs(void)
1600{
1601 int i;
1602 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1603 for (i = 0; handler_info[i].name != NULL; i++) {
1604 struct HandlerInfo *hi = &handler_info[i];
1605 hi->getset.name = hi->name;
1606 hi->getset.get = (getter)xmlparse_handler_getter;
1607 hi->getset.set = (setter)xmlparse_handler_setter;
1608 hi->getset.closure = &handler_info[i];
1609
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001610 PyObject *descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001611 if (descr == NULL)
1612 return -1;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001613
1614 if (PyDict_GetItemWithError(Xmlparsetype.tp_dict, PyDescr_NAME(descr))) {
1615 Py_DECREF(descr);
1616 continue;
1617 }
1618 else if (PyErr_Occurred()) {
1619 Py_DECREF(descr);
1620 return -1;
1621 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001622 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1623 Py_DECREF(descr);
1624 return -1;
1625 }
1626 Py_DECREF(descr);
1627 }
1628 return 0;
1629}
1630
Martin v. Löwis069dde22003-01-21 10:58:18 +00001631PyMODINIT_FUNC
1632MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001633{
1634 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001635 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001636 PyObject *errors_module;
1637 PyObject *modelmod_name;
1638 PyObject *model_module;
Georg Brandlb4dac712010-10-15 14:46:48 +00001639 PyObject *tmpnum, *tmpstr;
1640 PyObject *codes_dict;
1641 PyObject *rev_codes_dict;
1642 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001643 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001644 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001645
Fred Drake6f987622000-08-25 18:03:30 +00001646 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001647 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001648 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001649 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001650 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001651
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001652 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001654
Fred Drake0582df92000-07-12 04:49:00 +00001655 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001656 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001657 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001659
Fred Drake0582df92000-07-12 04:49:00 +00001660 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001661 if (ErrorObject == NULL) {
1662 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001663 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001664 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001665 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001666 }
1667 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001668 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001669 Py_INCREF(ErrorObject);
1670 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001671 Py_INCREF(&Xmlparsetype);
1672 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001673
Fred Drake738293d2000-12-21 17:25:07 +00001674 PyModule_AddStringConstant(m, "EXPAT_VERSION",
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001675 XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001676 {
1677 XML_Expat_Version info = XML_ExpatVersionInfo();
1678 PyModule_AddObject(m, "version_info",
1679 Py_BuildValue("(iii)", info.major,
1680 info.minor, info.micro));
1681 }
Fred Drake0582df92000-07-12 04:49:00 +00001682 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001683 compiled, this should check and set native_encoding
1684 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001685 */
Fred Drake93adb692000-09-23 04:55:48 +00001686 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001687
Fred Drake93adb692000-09-23 04:55:48 +00001688 d = PyModule_GetDict(m);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001689 if (d == NULL) {
1690 Py_DECREF(m);
1691 return NULL;
1692 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001693 errors_module = PyDict_GetItemWithError(d, errmod_name);
1694 if (errors_module == NULL && !PyErr_Occurred()) {
Fred Drakecde79132001-04-25 16:01:30 +00001695 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001696 if (errors_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001697 _PyImport_SetModule(errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001698 /* gives away the reference to errors_module */
1699 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001700 }
1701 }
Fred Drake6f987622000-08-25 18:03:30 +00001702 Py_DECREF(errmod_name);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001703 model_module = PyDict_GetItemWithError(d, modelmod_name);
1704 if (model_module == NULL && !PyErr_Occurred()) {
Fred Drakecde79132001-04-25 16:01:30 +00001705 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001706 if (model_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001707 _PyImport_SetModule(modelmod_name, model_module);
Fred Drake85d835f2001-02-08 15:39:08 +00001708 /* gives away the reference to model_module */
1709 PyModule_AddObject(m, "model", model_module);
1710 }
1711 }
1712 Py_DECREF(modelmod_name);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001713 if (errors_module == NULL || model_module == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +00001714 /* Don't core dump later! */
Christian Heimes7a5457b2016-09-09 00:13:35 +02001715 Py_DECREF(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001716 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001717 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718
Martin v. Löwisc847f402003-01-21 11:09:21 +00001719#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001720 {
1721 const XML_Feature *features = XML_GetFeatureList();
1722 PyObject *list = PyList_New(0);
1723 if (list == NULL)
1724 /* just ignore it */
1725 PyErr_Clear();
1726 else {
1727 int i = 0;
1728 for (; features[i].feature != XML_FEATURE_END; ++i) {
1729 int ok;
1730 PyObject *item = Py_BuildValue("si", features[i].name,
1731 features[i].value);
1732 if (item == NULL) {
1733 Py_DECREF(list);
1734 list = NULL;
1735 break;
1736 }
1737 ok = PyList_Append(list, item);
1738 Py_DECREF(item);
1739 if (ok < 0) {
1740 PyErr_Clear();
1741 break;
1742 }
1743 }
1744 if (list != NULL)
1745 PyModule_AddObject(m, "features", list);
1746 }
1747 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001748#endif
Fred Drake6f987622000-08-25 18:03:30 +00001749
Georg Brandlb4dac712010-10-15 14:46:48 +00001750 codes_dict = PyDict_New();
1751 rev_codes_dict = PyDict_New();
1752 if (codes_dict == NULL || rev_codes_dict == NULL) {
1753 Py_XDECREF(codes_dict);
1754 Py_XDECREF(rev_codes_dict);
1755 return NULL;
1756 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001757
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001758#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001759 if (PyModule_AddStringConstant(errors_module, #name, \
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001760 XML_ErrorString(name)) < 0) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001761 return NULL; \
1762 tmpnum = PyLong_FromLong(name); \
1763 if (tmpnum == NULL) return NULL; \
1764 res = PyDict_SetItemString(codes_dict, \
1765 XML_ErrorString(name), tmpnum); \
1766 if (res < 0) return NULL; \
1767 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1768 if (tmpstr == NULL) return NULL; \
1769 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1770 Py_DECREF(tmpstr); \
1771 Py_DECREF(tmpnum); \
1772 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001773
Fred Drake0582df92000-07-12 04:49:00 +00001774 MYCONST(XML_ERROR_NO_MEMORY);
1775 MYCONST(XML_ERROR_SYNTAX);
1776 MYCONST(XML_ERROR_NO_ELEMENTS);
1777 MYCONST(XML_ERROR_INVALID_TOKEN);
1778 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1779 MYCONST(XML_ERROR_PARTIAL_CHAR);
1780 MYCONST(XML_ERROR_TAG_MISMATCH);
1781 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1782 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1783 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1784 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1785 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1786 MYCONST(XML_ERROR_ASYNC_ENTITY);
1787 MYCONST(XML_ERROR_BAD_CHAR_REF);
1788 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1789 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1790 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1791 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1792 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001793 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1794 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1795 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001796 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1797 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1798 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1799 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1800 /* Added in Expat 1.95.7. */
1801 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1802 /* Added in Expat 1.95.8. */
1803 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1804 MYCONST(XML_ERROR_INCOMPLETE_PE);
1805 MYCONST(XML_ERROR_XML_DECL);
1806 MYCONST(XML_ERROR_TEXT_DECL);
1807 MYCONST(XML_ERROR_PUBLICID);
1808 MYCONST(XML_ERROR_SUSPENDED);
1809 MYCONST(XML_ERROR_NOT_SUSPENDED);
1810 MYCONST(XML_ERROR_ABORTED);
1811 MYCONST(XML_ERROR_FINISHED);
1812 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001813
Georg Brandlb4dac712010-10-15 14:46:48 +00001814 if (PyModule_AddStringConstant(errors_module, "__doc__",
1815 "Constants used to describe "
1816 "error conditions.") < 0)
1817 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001818
Georg Brandlb4dac712010-10-15 14:46:48 +00001819 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1820 return NULL;
1821 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1822 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001823
Fred Drake93adb692000-09-23 04:55:48 +00001824#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001825
Fred Drake85d835f2001-02-08 15:39:08 +00001826#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001827 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1828 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1829 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001830#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001831
Fred Drake85d835f2001-02-08 15:39:08 +00001832#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1833 PyModule_AddStringConstant(model_module, "__doc__",
1834 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001835
Fred Drake85d835f2001-02-08 15:39:08 +00001836 MYCONST(XML_CTYPE_EMPTY);
1837 MYCONST(XML_CTYPE_ANY);
1838 MYCONST(XML_CTYPE_MIXED);
1839 MYCONST(XML_CTYPE_NAME);
1840 MYCONST(XML_CTYPE_CHOICE);
1841 MYCONST(XML_CTYPE_SEQ);
1842
1843 MYCONST(XML_CQUANT_NONE);
1844 MYCONST(XML_CQUANT_OPT);
1845 MYCONST(XML_CQUANT_REP);
1846 MYCONST(XML_CQUANT_PLUS);
1847#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001848
1849 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001850 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001851 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001852 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1853 capi.MINOR_VERSION = XML_MINOR_VERSION;
1854 capi.MICRO_VERSION = XML_MICRO_VERSION;
1855 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001856 capi.GetErrorCode = XML_GetErrorCode;
1857 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1858 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001859 capi.Parse = XML_Parse;
1860 capi.ParserCreate_MM = XML_ParserCreate_MM;
1861 capi.ParserFree = XML_ParserFree;
1862 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1863 capi.SetCommentHandler = XML_SetCommentHandler;
1864 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1865 capi.SetElementHandler = XML_SetElementHandler;
1866 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1867 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1868 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1869 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001870 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001871 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001872 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001873#if XML_COMBINED_VERSION >= 20100
1874 capi.SetHashSalt = XML_SetHashSalt;
1875#else
1876 capi.SetHashSalt = NULL;
1877#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878
Benjamin Petersonb173f782009-05-05 22:31:58 +00001879 /* export using capsule */
1880 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001881 if (capi_object)
1882 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001883 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001884}
1885
Fred Drake6f987622000-08-25 18:03:30 +00001886static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001887clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001888{
Fred Drakecde79132001-04-25 16:01:30 +00001889 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001890
Fred Drake71b63ff2002-06-28 22:29:01 +00001891 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001892 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 self->handlers[i] = NULL;
1894 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001895 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001897 }
Fred Drakecde79132001-04-25 16:01:30 +00001898 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001899}
1900
Tim Peters0c322792002-07-17 16:49:03 +00001901static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001902
1903#define HANDLER_INFO(name) \
1904 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1905
1906 HANDLER_INFO(StartElementHandler)
1907 HANDLER_INFO(EndElementHandler)
1908 HANDLER_INFO(ProcessingInstructionHandler)
1909 HANDLER_INFO(CharacterDataHandler)
1910 HANDLER_INFO(UnparsedEntityDeclHandler)
1911 HANDLER_INFO(NotationDeclHandler)
1912 HANDLER_INFO(StartNamespaceDeclHandler)
1913 HANDLER_INFO(EndNamespaceDeclHandler)
1914 HANDLER_INFO(CommentHandler)
1915 HANDLER_INFO(StartCdataSectionHandler)
1916 HANDLER_INFO(EndCdataSectionHandler)
1917 HANDLER_INFO(DefaultHandler)
1918 HANDLER_INFO(DefaultHandlerExpand)
1919 HANDLER_INFO(NotStandaloneHandler)
1920 HANDLER_INFO(ExternalEntityRefHandler)
1921 HANDLER_INFO(StartDoctypeDeclHandler)
1922 HANDLER_INFO(EndDoctypeDeclHandler)
1923 HANDLER_INFO(EntityDeclHandler)
1924 HANDLER_INFO(XmlDeclHandler)
1925 HANDLER_INFO(ElementDeclHandler)
1926 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001927#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001928 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001929#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001930
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001931#undef HANDLER_INFO
1932
Fred Drake0582df92000-07-12 04:49:00 +00001933 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001934};