blob: ec684638ead1182e6839fd9980d9474ca8623aae [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Victor Stinner4a21e572020-04-15 02:35:41 +02004#include "structmember.h" // PyMemberDef
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -060050typedef struct {
51 PyTypeObject *xml_parse_type;
52 PyObject *error;
53} pyexpat_state;
54
55static inline pyexpat_state*
56pyexpat_get_state(PyObject *module)
57{
58 void *state = PyModule_GetState(module);
59 assert(state != NULL);
60 return (pyexpat_state *)state;
61}
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000062
63/* ----------------------------------------------------- */
64
65/* Declarations for objects of type xmlparser */
66
67typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000068 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake0582df92000-07-12 04:49:00 +000070 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000071 int ordered_attributes; /* Return attributes as a list. */
72 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000073 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000074 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000075 XML_Char *buffer; /* Buffer used when accumulating characters */
76 /* NULL if not enabled */
77 int buffer_size; /* Size of buffer, in XML_Char units */
78 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000079 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000080 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000081} xmlparseobject;
82
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030083#include "clinic/pyexpat.c.h"
84
Fred Drake2a3d7db2002-06-28 22:56:48 +000085#define CHARACTER_DATA_BUFFER_SIZE 8192
86
Fred Drake117ac852002-09-24 16:24:54 +000087typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000088typedef void* xmlhandler;
89
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000090struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000091 const char *name;
92 xmlhandlersetter setter;
93 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030094 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000095};
96
Jeremy Hylton938ace62002-07-17 16:30:39 +000097static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000098
Fred Drakebd6101c2001-02-14 18:29:45 +000099/* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200103set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +0000104{
Christian Heimes217cfd12007-12-02 14:31:20 +0000105 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000106
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000107 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000109 return 0;
110 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000111 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000112 return 1;
113}
114
115/* Build and set an Expat exception, including positioning
116 * information. Always returns NULL.
117 */
Fred Drake85d835f2001-02-08 15:39:08 +0000118static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600119set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000120{
121 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100122 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000123 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000126
Victor Stinner499dfcf2011-03-21 13:26:24 +0100127 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128 XML_ErrorString(code), lineno, column);
129 if (buffer == NULL)
130 return NULL;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600131 err = PyObject_CallOneArg(state->error, buffer);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100132 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000133 if ( err != NULL
134 && set_error_attr(err, "code", code)
135 && set_error_attr(err, "offset", column)
136 && set_error_attr(err, "lineno", lineno)) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600137 PyErr_SetObject(state->error, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000138 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000139 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000140 return NULL;
141}
142
Fred Drake71b63ff2002-06-28 22:29:01 +0000143static int
144have_handler(xmlparseobject *self, int type)
145{
146 PyObject *handler = self->handlers[type];
147 return handler != NULL;
148}
149
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000150/* Convert a string of XML_Chars into a Unicode string.
151 Returns None if str is a null pointer. */
152
Fred Drake0582df92000-07-12 04:49:00 +0000153static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000154conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000155{
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000157 and hence in UTF-8. */
158 /* UTF-8 from Expat, Unicode desired */
159 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200160 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000161 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000163}
164
Fred Drake0582df92000-07-12 04:49:00 +0000165static PyObject *
166conv_string_len_to_unicode(const XML_Char *str, int len)
167{
Fred Drake71b63ff2002-06-28 22:29:01 +0000168 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200172 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000173 }
Fred Drake6f987622000-08-25 18:03:30 +0000174 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000176
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000177/* Callback routines */
178
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000179static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000180
Martin v. Löwis069dde22003-01-21 10:58:18 +0000181/* This handler is used when an error has been detected, in the hope
182 that actual parsing can be terminated early. This will only help
183 if an external entity reference is encountered. */
184static int
185error_external_entity_ref_handler(XML_Parser parser,
186 const XML_Char *context,
187 const XML_Char *base,
188 const XML_Char *systemId,
189 const XML_Char *publicId)
190{
191 return 0;
192}
193
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000194/* Dummy character data handler used when an error (exception) has
195 been detected, and the actual parsing can be terminated early.
196 This is needed since character data handler can't be safely removed
197 from within the character data handler, but can be replaced. It is
198 used only from the character data handler trampoline, and must be
199 used right after `flag_error()` is called. */
200static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202{
203 /* Do nothing. */
204}
205
Fred Drake6f987622000-08-25 18:03:30 +0000206static void
207flag_error(xmlparseobject *self)
208{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000209 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000210 XML_SetExternalEntityRefHandler(self->itself,
211 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000212}
213
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000214static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200215call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000216 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000217{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200218 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000219
Jeroen Demeyer1dbd0842019-07-11 17:57:32 +0200220 res = PyObject_Call(func, args, NULL);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000221 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200222 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000223 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000224 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000225 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000226}
227
Fred Drakeb91a36b2002-06-27 19:40:48 +0000228static PyObject*
229string_intern(xmlparseobject *self, const char* str)
230{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000231 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000232 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000233 /* result can be NULL if the unicode conversion failed. */
234 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000236 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200238 value = PyDict_GetItemWithError(self->intern, result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000239 if (!value) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200240 if (!PyErr_Occurred() &&
241 PyDict_SetItem(self->intern, result, result) == 0)
242 {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000243 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200244 }
Zackery Spytz68def052018-10-19 00:57:38 -0600245 else {
246 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000247 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600248 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000249 }
250 Py_INCREF(value);
251 Py_DECREF(result);
252 return value;
253}
254
Fred Drake2a3d7db2002-06-28 22:56:48 +0000255/* Return 0 on success, -1 on exception.
256 * flag_error() will be called before return if needed.
257 */
258static int
259call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260{
261 PyObject *args;
262 PyObject *temp;
263
Georg Brandlc01537f2010-10-15 16:26:08 +0000264 if (!have_handler(self, CharacterData))
265 return -1;
266
Fred Drake2a3d7db2002-06-28 22:56:48 +0000267 args = PyTuple_New(1);
268 if (args == NULL)
269 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000270 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000271 if (temp == NULL) {
272 Py_DECREF(args);
273 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000274 XML_SetCharacterDataHandler(self->itself,
275 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000276 return -1;
277 }
278 PyTuple_SET_ITEM(args, 0, temp);
279 /* temp is now a borrowed reference; consider it unused. */
280 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200281 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000282 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000283 /* temp is an owned reference again, or NULL */
284 self->in_callback = 0;
285 Py_DECREF(args);
286 if (temp == NULL) {
287 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 XML_SetCharacterDataHandler(self->itself,
289 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000290 return -1;
291 }
292 Py_DECREF(temp);
293 return 0;
294}
295
296static int
297flush_character_buffer(xmlparseobject *self)
298{
299 int rc;
300 if (self->buffer == NULL || self->buffer_used == 0)
301 return 0;
302 rc = call_character_handler(self, self->buffer, self->buffer_used);
303 self->buffer_used = 0;
304 return rc;
305}
306
307static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000309{
310 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200311
312 if (PyErr_Occurred())
313 return;
314
Fred Drake2a3d7db2002-06-28 22:56:48 +0000315 if (self->buffer == NULL)
316 call_character_handler(self, data, len);
317 else {
318 if ((self->buffer_used + len) > self->buffer_size) {
319 if (flush_character_buffer(self) < 0)
320 return;
321 /* handler might have changed; drop the rest on the floor
322 * if there isn't a handler anymore
323 */
324 if (!have_handler(self, CharacterData))
325 return;
326 }
327 if (len > self->buffer_size) {
328 call_character_handler(self, data, len);
329 self->buffer_used = 0;
330 }
331 else {
332 memcpy(self->buffer + self->buffer_used,
333 data, len * sizeof(XML_Char));
334 self->buffer_used += len;
335 }
336 }
337}
338
Fred Drake85d835f2001-02-08 15:39:08 +0000339static void
340my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000341 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000342{
343 xmlparseobject *self = (xmlparseobject *)userData;
344
Fred Drake71b63ff2002-06-28 22:29:01 +0000345 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000346 PyObject *container, *rv, *args;
347 int i, max;
348
Victor Stinner9e09c262013-07-18 23:17:01 +0200349 if (PyErr_Occurred())
350 return;
351
Fred Drake2a3d7db2002-06-28 22:56:48 +0000352 if (flush_character_buffer(self) < 0)
353 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000354 /* Set max to the number of slots filled in atts[]; max/2 is
355 * the number of attributes we need to process.
356 */
357 if (self->specified_attributes) {
358 max = XML_GetSpecifiedAttributeCount(self->itself);
359 }
360 else {
361 max = 0;
362 while (atts[max] != NULL)
363 max += 2;
364 }
365 /* Build the container. */
366 if (self->ordered_attributes)
367 container = PyList_New(max);
368 else
369 container = PyDict_New();
370 if (container == NULL) {
371 flag_error(self);
372 return;
373 }
374 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000375 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000376 PyObject *v;
377 if (n == NULL) {
378 flag_error(self);
379 Py_DECREF(container);
380 return;
381 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000382 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000383 if (v == NULL) {
384 flag_error(self);
385 Py_DECREF(container);
386 Py_DECREF(n);
387 return;
388 }
389 if (self->ordered_attributes) {
390 PyList_SET_ITEM(container, i, n);
391 PyList_SET_ITEM(container, i+1, v);
392 }
393 else if (PyDict_SetItem(container, n, v)) {
394 flag_error(self);
395 Py_DECREF(n);
396 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600397 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000398 return;
399 }
400 else {
401 Py_DECREF(n);
402 Py_DECREF(v);
403 }
404 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000405 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000406 if (args == NULL) {
407 Py_DECREF(container);
408 return;
409 }
Zackery Spytz68def052018-10-19 00:57:38 -0600410 args = Py_BuildValue("(NN)", args, container);
411 if (args == NULL) {
412 return;
413 }
Fred Drake85d835f2001-02-08 15:39:08 +0000414 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000415 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200416 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000417 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000418 self->in_callback = 0;
419 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000420 if (rv == NULL) {
421 flag_error(self);
422 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000423 }
Fred Drake85d835f2001-02-08 15:39:08 +0000424 Py_DECREF(rv);
425 }
426}
427
428#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429 RETURN, GETUSERDATA) \
430static RC \
431my_##NAME##Handler PARAMS {\
432 xmlparseobject *self = GETUSERDATA ; \
433 PyObject *args = NULL; \
434 PyObject *rv = NULL; \
435 INIT \
436\
Fred Drake71b63ff2002-06-28 22:29:01 +0000437 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200438 if (PyErr_Occurred()) \
439 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000440 if (flush_character_buffer(self) < 0) \
441 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000442 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000443 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000444 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200445 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000446 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000447 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000448 Py_DECREF(args); \
449 if (rv == NULL) { \
450 flag_error(self); \
451 return RETURN; \
452 } \
453 CONVERSION \
454 Py_DECREF(rv); \
455 } \
456 return RETURN; \
457}
458
Fred Drake6f987622000-08-25 18:03:30 +0000459#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000462
Fred Drake6f987622000-08-25 18:03:30 +0000463#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465 rc = PyLong_AsLong(rv);, rc, \
466 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000467
Fred Drake71b63ff2002-06-28 22:29:01 +0000468VOID_HANDLER(EndElement,
469 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000470 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000471
Fred Drake6f987622000-08-25 18:03:30 +0000472VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000473 (void *userData,
474 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000475 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000476 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000477
Fred Drake6f987622000-08-25 18:03:30 +0000478VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000479 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000480 const XML_Char *entityName,
481 const XML_Char *base,
482 const XML_Char *systemId,
483 const XML_Char *publicId,
484 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000485 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000486 string_intern(self, entityName), string_intern(self, base),
487 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000488 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000489
Fred Drake85d835f2001-02-08 15:39:08 +0000490VOID_HANDLER(EntityDecl,
491 (void *userData,
492 const XML_Char *entityName,
493 int is_parameter_entity,
494 const XML_Char *value,
495 int value_length,
496 const XML_Char *base,
497 const XML_Char *systemId,
498 const XML_Char *publicId,
499 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000500 ("NiNNNNN",
501 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000502 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000503 string_intern(self, base), string_intern(self, systemId),
504 string_intern(self, publicId),
505 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000506
507VOID_HANDLER(XmlDecl,
508 (void *userData,
509 const XML_Char *version,
510 const XML_Char *encoding,
511 int standalone),
512 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000513 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000514 standalone))
515
516static PyObject *
517conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000518 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000519{
520 PyObject *result = NULL;
521 PyObject *children = PyTuple_New(model->numchildren);
522 int i;
523
524 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000525 assert(model->numchildren < INT_MAX);
526 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000527 PyObject *child = conv_content_model(&model->children[i],
528 conv_string);
529 if (child == NULL) {
530 Py_XDECREF(children);
531 return NULL;
532 }
533 PyTuple_SET_ITEM(children, i, child);
534 }
535 result = Py_BuildValue("(iiO&N)",
536 model->type, model->quant,
537 conv_string,model->name, children);
538 }
539 return result;
540}
541
Fred Drake06dd8cf2003-02-02 03:54:17 +0000542static void
543my_ElementDeclHandler(void *userData,
544 const XML_Char *name,
545 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000546{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000547 xmlparseobject *self = (xmlparseobject *)userData;
548 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000549
Fred Drake06dd8cf2003-02-02 03:54:17 +0000550 if (have_handler(self, ElementDecl)) {
551 PyObject *rv = NULL;
552 PyObject *modelobj, *nameobj;
553
Victor Stinner9e09c262013-07-18 23:17:01 +0200554 if (PyErr_Occurred())
555 return;
556
Fred Drake06dd8cf2003-02-02 03:54:17 +0000557 if (flush_character_buffer(self) < 0)
558 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000559 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000560 if (modelobj == NULL) {
561 flag_error(self);
562 goto finally;
563 }
564 nameobj = string_intern(self, name);
565 if (nameobj == NULL) {
566 Py_DECREF(modelobj);
567 flag_error(self);
568 goto finally;
569 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000570 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000571 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000572 flag_error(self);
573 goto finally;
574 }
575 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200576 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000577 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000578 self->in_callback = 0;
579 if (rv == NULL) {
580 flag_error(self);
581 goto finally;
582 }
583 Py_DECREF(rv);
584 }
585 finally:
586 Py_XDECREF(args);
587 XML_FreeContentModel(self->itself, model);
588 return;
589}
Fred Drake85d835f2001-02-08 15:39:08 +0000590
591VOID_HANDLER(AttlistDecl,
592 (void *userData,
593 const XML_Char *elname,
594 const XML_Char *attname,
595 const XML_Char *att_type,
596 const XML_Char *dflt,
597 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000598 ("(NNO&O&i)",
599 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000600 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000601 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000602
Martin v. Löwisc847f402003-01-21 11:09:21 +0000603#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000604VOID_HANDLER(SkippedEntity,
605 (void *userData,
606 const XML_Char *entityName,
607 int is_parameter_entity),
608 ("Ni",
609 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000610#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000611
Fred Drake71b63ff2002-06-28 22:29:01 +0000612VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 (void *userData,
614 const XML_Char *notationName,
615 const XML_Char *base,
616 const XML_Char *systemId,
617 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000618 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000619 string_intern(self, notationName), string_intern(self, base),
620 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000621
Fred Drake6f987622000-08-25 18:03:30 +0000622VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 (void *userData,
624 const XML_Char *prefix,
625 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 ("(NN)",
627 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000628
Fred Drake6f987622000-08-25 18:03:30 +0000629VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 (void *userData,
631 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000632 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000633
Fred Drake6f987622000-08-25 18:03:30 +0000634VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000635 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000636 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000637
Fred Drake6f987622000-08-25 18:03:30 +0000638VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000639 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000640 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000641
Fred Drake6f987622000-08-25 18:03:30 +0000642VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000643 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000645
Fred Drake6f987622000-08-25 18:03:30 +0000646VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 (void *userData, const XML_Char *s, int len),
648 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000649
Fred Drake6f987622000-08-25 18:03:30 +0000650VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 (void *userData, const XML_Char *s, int len),
652 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300653#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000654
Fred Drake71b63ff2002-06-28 22:29:01 +0000655INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 (void *userData),
657 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000658
Fred Drake6f987622000-08-25 18:03:30 +0000659RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 (XML_Parser parser,
661 const XML_Char *context,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId),
665 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000666 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 conv_string_to_unicode ,context, string_intern(self, base),
668 string_intern(self, systemId), string_intern(self, publicId)),
669 rc = PyLong_AsLong(rv);, rc,
670 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000671
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000672/* XXX UnknownEncodingHandler */
673
Fred Drake85d835f2001-02-08 15:39:08 +0000674VOID_HANDLER(StartDoctypeDecl,
675 (void *userData, const XML_Char *doctypeName,
676 const XML_Char *sysid, const XML_Char *pubid,
677 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000678 ("(NNNi)", string_intern(self, doctypeName),
679 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000680 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000681
682VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000683
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000684/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400685/*[clinic input]
686class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687[clinic start generated code]*/
688/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000690
Fred Drake71b63ff2002-06-28 22:29:01 +0000691static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600692get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
Fred Drake71b63ff2002-06-28 22:29:01 +0000693{
694 if (PyErr_Occurred()) {
695 return NULL;
696 }
697 if (rv == 0) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600698 return set_error(state, self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000699 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000700 if (flush_character_buffer(self) < 0) {
701 return NULL;
702 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000703 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000704}
705
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200706#define MAX_CHUNK_SIZE (1 << 20)
707
Brett Cannond0aeda82014-08-22 14:23:20 -0400708/*[clinic input]
709pyexpat.xmlparser.Parse
710
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600711 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -0400712 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200713 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400714 /
715
716Parse XML data.
717
718`isfinal' should be true at end of input.
719[clinic start generated code]*/
720
Brett Cannond0aeda82014-08-22 14:23:20 -0400721static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600722pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723 PyObject *data, int isfinal)
724/*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400725{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200726 const char *s;
727 Py_ssize_t slen;
728 Py_buffer view;
729 int rc;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600730 pyexpat_state *state = PyType_GetModuleState(cls);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000731
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200732 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200733 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200734 s = PyUnicode_AsUTF8AndSize(data, &slen);
735 if (s == NULL)
736 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200737 /* Explicitly set UTF-8 encoding. Return code ignored. */
738 (void)XML_SetEncoding(self->itself, "utf-8");
739 }
740 else {
741 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742 return NULL;
743 s = view.buf;
744 slen = view.len;
745 }
746
747 while (slen > MAX_CHUNK_SIZE) {
748 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
749 if (!rc)
750 goto done;
751 s += MAX_CHUNK_SIZE;
752 slen -= MAX_CHUNK_SIZE;
753 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200754 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
755 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300756 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200757
758done:
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600759 if (view.buf != NULL) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200760 PyBuffer_Release(&view);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600761 }
762 return get_parse_result(state, self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763}
764
Fred Drakeca1f4262000-09-21 20:10:23 +0000765/* File reading copied from cPickle */
766
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767#define BUF_SIZE 2048
768
Fred Drake0582df92000-07-12 04:49:00 +0000769static int
770readinst(char *buf, int buf_size, PyObject *meth)
771{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000772 PyObject *str;
773 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200774 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000775
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000776 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000777 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000778 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Christian Heimes72b710a2008-05-26 13:28:38 +0000780 if (PyBytes_Check(str))
781 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000782 else if (PyByteArray_Check(str))
783 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000785 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000786 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000787 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000788 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000789 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000790 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000791 if (len > buf_size) {
792 PyErr_Format(PyExc_ValueError,
793 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000794 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000795 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000796 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000797 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000798 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000799 Py_DECREF(str);
800 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000801 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000802
803error:
804 Py_XDECREF(str);
805 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000806}
807
Brett Cannond0aeda82014-08-22 14:23:20 -0400808/*[clinic input]
809pyexpat.xmlparser.ParseFile
810
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600811 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -0400812 file: object
813 /
814
815Parse XML data from file-like object.
816[clinic start generated code]*/
817
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000818static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600819pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
820 PyObject *file)
821/*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000822{
Fred Drake0582df92000-07-12 04:49:00 +0000823 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000824 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200825 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000826
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600827 pyexpat_state *state = PyType_GetModuleState(cls);
828
Serhiy Storchaka41c57b32019-09-01 12:03:39 +0300829 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
830 return NULL;
831 }
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000832 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000833 PyErr_SetString(PyExc_TypeError,
834 "argument must have 'read' attribute");
835 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000836 }
837 for (;;) {
838 int bytes_read;
839 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000840 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000841 Py_XDECREF(readmethod);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600842 return get_parse_result(state, self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000843 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000845 bytes_read = readinst(buf, BUF_SIZE, readmethod);
846 if (bytes_read < 0) {
847 Py_DECREF(readmethod);
848 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000849 }
850 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000851 if (PyErr_Occurred()) {
852 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000853 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000854 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000855
Fred Drake0582df92000-07-12 04:49:00 +0000856 if (!rv || bytes_read == 0)
857 break;
858 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000859 Py_XDECREF(readmethod);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600860 return get_parse_result(state, self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000861}
862
Brett Cannond0aeda82014-08-22 14:23:20 -0400863/*[clinic input]
864pyexpat.xmlparser.SetBase
865
866 base: str
867 /
868
869Set the base URL for the parser.
870[clinic start generated code]*/
871
Brett Cannond0aeda82014-08-22 14:23:20 -0400872static PyObject *
873pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300874/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400875{
Fred Drake0582df92000-07-12 04:49:00 +0000876 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400879 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000880}
881
Brett Cannond0aeda82014-08-22 14:23:20 -0400882/*[clinic input]
883pyexpat.xmlparser.GetBase
884
885Return base URL string for the parser.
886[clinic start generated code]*/
887
Brett Cannond0aeda82014-08-22 14:23:20 -0400888static PyObject *
889pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300890/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000891{
Fred Drake0582df92000-07-12 04:49:00 +0000892 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000893}
894
Brett Cannond0aeda82014-08-22 14:23:20 -0400895/*[clinic input]
896pyexpat.xmlparser.GetInputContext
897
898Return the untranslated text of the input that caused the current event.
899
900If the event was generated by a large amount of text (such as a start tag
901for an element with many attributes), not all of the text may be available.
902[clinic start generated code]*/
903
Brett Cannond0aeda82014-08-22 14:23:20 -0400904static PyObject *
905pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300906/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000907{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000908 if (self->in_callback) {
909 int offset, size;
910 const char *buffer
911 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000912
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000913 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000914 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000915 size - offset);
916 else
917 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000918 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000919 else
920 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000921}
Fred Drakebd6101c2001-02-14 18:29:45 +0000922
Brett Cannond0aeda82014-08-22 14:23:20 -0400923/*[clinic input]
924pyexpat.xmlparser.ExternalEntityParserCreate
925
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600926 cls: defining_class
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700927 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400928 encoding: str = NULL
929 /
930
931Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
932[clinic start generated code]*/
933
Brett Cannond0aeda82014-08-22 14:23:20 -0400934static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400935pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600936 PyTypeObject *cls,
Larry Hastings89964c42015-04-14 18:07:59 -0400937 const char *context,
938 const char *encoding)
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600939/*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400940{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000941 xmlparseobject *new_parser;
942 int i;
943
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600944 pyexpat_state *state = PyType_GetModuleState(cls);
945
946 new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
947 if (new_parser == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +0000948 return NULL;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600949 }
950
Fred Drake2a3d7db2002-06-28 22:56:48 +0000951 new_parser->buffer_size = self->buffer_size;
952 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000953 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000954 new_parser->ordered_attributes = self->ordered_attributes;
955 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000956 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000957 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000958 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000960 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000961 new_parser->intern = self->intern;
962 Py_XINCREF(new_parser->intern);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000963
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000964 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200965 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000966 if (new_parser->buffer == NULL) {
967 Py_DECREF(new_parser);
968 return PyErr_NoMemory();
969 }
970 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000971 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000972 Py_DECREF(new_parser);
973 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000974 }
975
976 XML_SetUserData(new_parser->itself, (void *)new_parser);
977
978 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000979 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000980 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000981
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200982 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000983 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000984 Py_DECREF(new_parser);
985 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000986 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000987 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000988
989 /* then copy handlers from self */
990 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000991 PyObject *handler = self->handlers[i];
992 if (handler != NULL) {
993 Py_INCREF(handler);
994 new_parser->handlers[i] = handler;
995 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000996 handler_info[i].handler);
997 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000998 }
Victor Stinner1b184552019-10-08 00:09:31 +0200999
1000 PyObject_GC_Track(new_parser);
Fred Drake71b63ff2002-06-28 22:29:01 +00001001 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001002}
1003
Brett Cannond0aeda82014-08-22 14:23:20 -04001004/*[clinic input]
1005pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001006
Brett Cannond0aeda82014-08-22 14:23:20 -04001007 flag: int
1008 /
1009
1010Controls parsing of parameter entities (including the external DTD subset).
1011
1012Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1013XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1014XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1015was successful.
1016[clinic start generated code]*/
1017
Brett Cannond0aeda82014-08-22 14:23:20 -04001018static PyObject *
1019pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001020/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001021{
1022 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001023 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001024}
1025
Martin v. Löwisc847f402003-01-21 11:09:21 +00001026
1027#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001028/*[clinic input]
1029pyexpat.xmlparser.UseForeignDTD
1030
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001031 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -04001032 flag: bool = True
1033 /
1034
1035Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1036
1037This readily allows the use of a 'default' document type controlled by the
1038application, while still getting the advantage of providing document type
1039information to the parser. 'flag' defaults to True if not provided.
1040[clinic start generated code]*/
1041
Brett Cannond0aeda82014-08-22 14:23:20 -04001042static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001043pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1044 int flag)
1045/*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001046{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001047 pyexpat_state *state = PyType_GetModuleState(cls);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001048 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001049
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001050 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001051 if (rc != XML_ERROR_NONE) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001052 return set_error(state, self, rc);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001053 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001054 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001055}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001056#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001057
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001058static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001059 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1060 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1061 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1062 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1063 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1064 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1065 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001066#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001067 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001068#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001069 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001070};
1071
1072/* ---------- */
1073
1074
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001075
Fred Drake71b63ff2002-06-28 22:29:01 +00001076/* pyexpat international encoding support.
1077 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001078*/
1079
Fred Drake71b63ff2002-06-28 22:29:01 +00001080static int
1081PyUnknownEncodingHandler(void *encodingHandlerData,
1082 const XML_Char *name,
1083 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001084{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001085 static unsigned char template_buffer[256] = {0};
1086 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001087 int i;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001088 const void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001089 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001090
Victor Stinner9e09c262013-07-18 23:17:01 +02001091 if (PyErr_Occurred())
1092 return XML_STATUS_ERROR;
1093
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001094 if (template_buffer[1] == 0) {
1095 for (i = 0; i < 256; i++)
1096 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001097 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001098
1099 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001100 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001101 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001102 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001103 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001104
1105 if (PyUnicode_GET_LENGTH(u) != 256) {
1106 Py_DECREF(u);
1107 PyErr_SetString(PyExc_ValueError,
1108 "multi-byte encodings are not supported");
1109 return XML_STATUS_ERROR;
1110 }
1111
1112 kind = PyUnicode_KIND(u);
1113 data = PyUnicode_DATA(u);
1114 for (i = 0; i < 256; i++) {
1115 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1116 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1117 info->map[i] = ch;
1118 else
1119 info->map[i] = -1;
1120 }
1121
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001122 info->data = NULL;
1123 info->convert = NULL;
1124 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001125 Py_DECREF(u);
1126
1127 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001128}
1129
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001130
1131static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001132newxmlparseobject(pyexpat_state *state, const char *encoding,
1133 const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001134{
1135 int i;
1136 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001137
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001138 self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
Fred Drake0582df92000-07-12 04:49:00 +00001139 if (self == NULL)
1140 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001141
Fred Drake2a3d7db2002-06-28 22:56:48 +00001142 self->buffer = NULL;
1143 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1144 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001145 self->ordered_attributes = 0;
1146 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001147 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001148 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001149 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001150 self->intern = intern;
1151 Py_XINCREF(self->intern);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001152
Christian Heimesfa535f52013-07-07 17:35:11 +02001153 /* namespace_separator is either NULL or contains one char + \0 */
1154 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1155 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001156 if (self->itself == NULL) {
1157 PyErr_SetString(PyExc_RuntimeError,
1158 "XML_ParserCreate failed");
1159 Py_DECREF(self);
1160 return NULL;
1161 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001162#if XML_COMBINED_VERSION >= 20100
1163 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001164 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001165 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001166#endif
Fred Drake0582df92000-07-12 04:49:00 +00001167 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001168 XML_SetUnknownEncodingHandler(self->itself,
1169 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001170
Fred Drake2a3d7db2002-06-28 22:56:48 +00001171 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001172 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001173
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001174 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001175 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001176 Py_DECREF(self);
1177 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001178 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001179 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001180
Victor Stinner1b184552019-10-08 00:09:31 +02001181 PyObject_GC_Track(self);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001183}
1184
Miss Islington (bot)0bf05002021-05-27 08:26:15 -07001185static int
1186xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1187{
1188 for (int i = 0; handler_info[i].name != NULL; i++) {
1189 Py_VISIT(op->handlers[i]);
1190 }
1191 Py_VISIT(Py_TYPE(op));
1192 return 0;
1193}
1194
1195static int
1196xmlparse_clear(xmlparseobject *op)
1197{
1198 clear_handlers(op, 0);
1199 Py_CLEAR(op->intern);
1200 return 0;
1201}
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001202
1203static void
Fred Drake0582df92000-07-12 04:49:00 +00001204xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001205{
Martin v. Löwis894258c2001-09-23 10:20:10 +00001206 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001207 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001208 XML_ParserFree(self->itself);
1209 self->itself = NULL;
Miss Islington (bot)0bf05002021-05-27 08:26:15 -07001210 (void)xmlparse_clear(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001211
Fred Drake85d835f2001-02-08 15:39:08 +00001212 if (self->handlers != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001213 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001214 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001215 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001216 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001217 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001218 self->buffer = NULL;
1219 }
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001220 PyTypeObject *tp = Py_TYPE(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001221 PyObject_GC_Del(self);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001222 Py_DECREF(tp);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001223}
1224
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001225
1226static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001227xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001228{
Victor Stinner28f468c2018-11-22 13:21:43 +01001229 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1230 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001231 PyObject *result = self->handlers[handlernum];
1232 if (result == NULL)
1233 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001234 Py_INCREF(result);
1235 return result;
1236}
1237
Fred Drake6f987622000-08-25 18:03:30 +00001238static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001239xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001240{
Victor Stinner28f468c2018-11-22 13:21:43 +01001241 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1242 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001243 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001244 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1245 return -1;
1246 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001247 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001248 /* If we're changing the character data handler, flush all
1249 * cached data with the old handler. Not sure there's a
1250 * "right" thing to do, though, but this probably won't
1251 * happen.
1252 */
1253 if (flush_character_buffer(self) < 0)
1254 return -1;
1255 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001256
1257 xmlhandler c_handler = NULL;
1258 if (v == Py_None) {
1259 /* If this is the character data handler, and a character
1260 data handler is already active, we need to be more
1261 careful. What we can safely do is replace the existing
1262 character data handler callback function with a no-op
1263 function that will refuse to call Python. The downside
1264 is that this doesn't completely remove the character
1265 data handler from the C layer if there's any callback
1266 active, so Expat does a little more work than it
1267 otherwise would, but that's really an odd case. A more
1268 elaborate system of handlers and state could remove the
1269 C handler more effectively. */
1270 if (handlernum == CharacterData && self->in_callback)
1271 c_handler = noop_character_data_handler;
1272 v = NULL;
1273 }
1274 else if (v != NULL) {
1275 Py_INCREF(v);
1276 c_handler = handler_info[handlernum].handler;
1277 }
1278 Py_XSETREF(self->handlers[handlernum], v);
1279 handler_info[handlernum].setter(self->itself, c_handler);
1280 return 0;
1281}
1282
1283#define INT_GETTER(name) \
1284 static PyObject * \
1285 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1286 { \
1287 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1288 }
1289INT_GETTER(ErrorCode)
1290INT_GETTER(ErrorLineNumber)
1291INT_GETTER(ErrorColumnNumber)
1292INT_GETTER(ErrorByteIndex)
1293INT_GETTER(CurrentLineNumber)
1294INT_GETTER(CurrentColumnNumber)
1295INT_GETTER(CurrentByteIndex)
1296
1297#undef INT_GETTER
1298
1299static PyObject *
1300xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1301{
1302 return PyBool_FromLong(self->buffer != NULL);
1303}
1304
1305static int
1306xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1307{
1308 if (v == NULL) {
1309 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1310 return -1;
1311 }
1312 int b = PyObject_IsTrue(v);
1313 if (b < 0)
1314 return -1;
1315 if (b) {
1316 if (self->buffer == NULL) {
1317 self->buffer = PyMem_Malloc(self->buffer_size);
1318 if (self->buffer == NULL) {
1319 PyErr_NoMemory();
1320 return -1;
1321 }
1322 self->buffer_used = 0;
1323 }
1324 }
1325 else if (self->buffer != NULL) {
1326 if (flush_character_buffer(self) < 0)
1327 return -1;
1328 PyMem_Free(self->buffer);
1329 self->buffer = NULL;
1330 }
1331 return 0;
1332}
1333
1334static PyObject *
1335xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1336{
1337 return PyLong_FromLong((long) self->buffer_size);
1338}
1339
1340static int
1341xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1342{
1343 if (v == NULL) {
1344 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1345 return -1;
1346 }
1347 long new_buffer_size;
1348 if (!PyLong_Check(v)) {
1349 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1350 return -1;
1351 }
1352
1353 new_buffer_size = PyLong_AsLong(v);
1354 if (new_buffer_size <= 0) {
1355 if (!PyErr_Occurred())
1356 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1357 return -1;
1358 }
1359
1360 /* trivial case -- no change */
1361 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001362 return 0;
1363 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001364
1365 /* check maximum */
1366 if (new_buffer_size > INT_MAX) {
1367 char errmsg[100];
1368 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1369 PyErr_SetString(PyExc_ValueError, errmsg);
1370 return -1;
1371 }
1372
1373 if (self->buffer != NULL) {
1374 /* there is already a buffer */
1375 if (self->buffer_used != 0) {
1376 if (flush_character_buffer(self) < 0) {
1377 return -1;
1378 }
1379 }
1380 /* free existing buffer */
1381 PyMem_Free(self->buffer);
1382 }
1383 self->buffer = PyMem_Malloc(new_buffer_size);
1384 if (self->buffer == NULL) {
1385 PyErr_NoMemory();
1386 return -1;
1387 }
1388 self->buffer_size = new_buffer_size;
1389 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001390}
1391
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001392static PyObject *
1393xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1394{
1395 return PyLong_FromLong((long) self->buffer_used);
1396}
1397
1398static PyObject *
1399xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1400{
1401 return PyBool_FromLong(self->ns_prefixes);
1402}
1403
1404static int
1405xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1406{
1407 if (v == NULL) {
1408 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1409 return -1;
1410 }
1411 int b = PyObject_IsTrue(v);
1412 if (b < 0)
1413 return -1;
1414 self->ns_prefixes = b;
1415 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1416 return 0;
1417}
1418
1419static PyObject *
1420xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1421{
1422 return PyBool_FromLong(self->ordered_attributes);
1423}
1424
1425static int
1426xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1427{
1428 if (v == NULL) {
1429 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1430 return -1;
1431 }
1432 int b = PyObject_IsTrue(v);
1433 if (b < 0)
1434 return -1;
1435 self->ordered_attributes = b;
1436 return 0;
1437}
1438
1439static PyObject *
1440xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1441{
1442 return PyBool_FromLong((long) self->specified_attributes);
1443}
1444
1445static int
1446xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1447{
1448 if (v == NULL) {
1449 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1450 return -1;
1451 }
1452 int b = PyObject_IsTrue(v);
1453 if (b < 0)
1454 return -1;
1455 self->specified_attributes = b;
1456 return 0;
1457}
1458
1459static PyMemberDef xmlparse_members[] = {
1460 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1461 {NULL}
1462};
1463
1464#define XMLPARSE_GETTER_DEF(name) \
1465 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1466#define XMLPARSE_GETTER_SETTER_DEF(name) \
1467 {#name, (getter)xmlparse_##name##_getter, \
1468 (setter)xmlparse_##name##_setter, NULL},
1469
1470static PyGetSetDef xmlparse_getsetlist[] = {
1471 XMLPARSE_GETTER_DEF(ErrorCode)
1472 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1473 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1474 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1475 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1476 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1477 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1478 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1479 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1480 XMLPARSE_GETTER_DEF(buffer_used)
1481 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1482 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1483 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1484 {NULL},
1485};
1486
1487#undef XMLPARSE_GETTER_DEF
1488#undef XMLPARSE_GETTER_SETTER_DEF
1489
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001490PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001491
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001492static PyType_Slot _xml_parse_type_spec_slots[] = {
1493 {Py_tp_dealloc, xmlparse_dealloc},
1494 {Py_tp_doc, (void *)Xmlparsetype__doc__},
1495 {Py_tp_traverse, xmlparse_traverse},
1496 {Py_tp_clear, xmlparse_clear},
1497 {Py_tp_methods, xmlparse_methods},
1498 {Py_tp_members, xmlparse_members},
1499 {Py_tp_getset, xmlparse_getsetlist},
1500 {0, 0}
1501};
1502
1503static PyType_Spec _xml_parse_type_spec = {
1504 .name = "pyexpat.xmlparser",
1505 .basicsize = sizeof(xmlparseobject),
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +02001506 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001507 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001508 .slots = _xml_parse_type_spec_slots,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001509};
1510
1511/* End of code for xmlparser objects */
1512/* -------------------------------------------------------- */
1513
Brett Cannond0aeda82014-08-22 14:23:20 -04001514/*[clinic input]
1515pyexpat.ParserCreate
1516
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001517 encoding: str(accept={str, NoneType}) = None
1518 namespace_separator: str(accept={str, NoneType}) = None
Brett Cannond0aeda82014-08-22 14:23:20 -04001519 intern: object = NULL
1520
1521Return a new XML parser object.
1522[clinic start generated code]*/
1523
Brett Cannond0aeda82014-08-22 14:23:20 -04001524static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001525pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001526 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001527/*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001528{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001529 PyObject *result;
1530 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001531
Fred Drakecde79132001-04-25 16:01:30 +00001532 if (namespace_separator != NULL
1533 && strlen(namespace_separator) > 1) {
1534 PyErr_SetString(PyExc_ValueError,
1535 "namespace_separator must be at most one"
1536 " character, omitted, or None");
1537 return NULL;
1538 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001539 /* Explicitly passing None means no interning is desired.
1540 Not passing anything means that a new dictionary is used. */
1541 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001543 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 intern = PyDict_New();
1545 if (!intern)
1546 return NULL;
1547 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001548 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001549 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1551 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001552 }
1553
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001554 pyexpat_state *state = pyexpat_get_state(module);
1555 result = newxmlparseobject(state, encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001556 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001558 }
1559 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001560}
1561
Brett Cannond0aeda82014-08-22 14:23:20 -04001562/*[clinic input]
1563pyexpat.ErrorString
1564
1565 code: long
1566 /
1567
1568Returns string error for given number.
1569[clinic start generated code]*/
1570
Brett Cannond0aeda82014-08-22 14:23:20 -04001571static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001572pyexpat_ErrorString_impl(PyObject *module, long code)
1573/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001574{
Fred Drake0582df92000-07-12 04:49:00 +00001575 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001576}
1577
1578/* List of methods defined in the module */
1579
1580static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001581 PYEXPAT_PARSERCREATE_METHODDEF
1582 PYEXPAT_ERRORSTRING_METHODDEF
1583 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001584};
1585
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001586/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001587
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001588PyDoc_STRVAR(pyexpat_module_documentation,
1589"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001590
Fred Drakecde79132001-04-25 16:01:30 +00001591/* Initialization function for the module */
1592
1593#ifndef MODULE_NAME
1594#define MODULE_NAME "pyexpat"
1595#endif
1596
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001597static int init_handler_descrs(pyexpat_state *state)
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001598{
1599 int i;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001600 assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001601 for (i = 0; handler_info[i].name != NULL; i++) {
1602 struct HandlerInfo *hi = &handler_info[i];
1603 hi->getset.name = hi->name;
1604 hi->getset.get = (getter)xmlparse_handler_getter;
1605 hi->getset.set = (setter)xmlparse_handler_setter;
1606 hi->getset.closure = &handler_info[i];
1607
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001608 PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001609 if (descr == NULL)
1610 return -1;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001611
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001612 if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001613 Py_DECREF(descr);
1614 return -1;
1615 }
1616 Py_DECREF(descr);
1617 }
1618 return 0;
1619}
1620
Mohamed Koubaa71842182020-11-04 11:37:23 -06001621static PyObject *
1622add_submodule(PyObject *mod, const char *fullname)
Fred Drake0582df92000-07-12 04:49:00 +00001623{
Mohamed Koubaa71842182020-11-04 11:37:23 -06001624 const char *name = strrchr(fullname, '.') + 1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001625
Mohamed Koubaa71842182020-11-04 11:37:23 -06001626 PyObject *submodule = PyModule_New(fullname);
1627 if (submodule == NULL) {
Martin v. Löwis1a214512008-06-11 05:26:20 +00001628 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001629 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630
Mohamed Koubaa71842182020-11-04 11:37:23 -06001631 PyObject *mod_name = PyUnicode_FromString(fullname);
1632 if (mod_name == NULL) {
1633 Py_DECREF(submodule);
1634 return NULL;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001635 }
Fred Drake6f987622000-08-25 18:03:30 +00001636
Mohamed Koubaa71842182020-11-04 11:37:23 -06001637 if (_PyImport_SetModule(mod_name, submodule) < 0) {
1638 Py_DECREF(submodule);
1639 Py_DECREF(mod_name);
1640 return NULL;
1641 }
1642 Py_DECREF(mod_name);
1643
1644 /* gives away the reference to the submodule */
1645 if (PyModule_AddObject(mod, name, submodule) < 0) {
1646 Py_DECREF(submodule);
1647 return NULL;
1648 }
1649
1650 return submodule;
1651}
1652
1653static int
1654add_error(PyObject *errors_module, PyObject *codes_dict,
1655 PyObject *rev_codes_dict, const char *name, int value)
1656{
1657 const char *error_string = XML_ErrorString(value);
1658 if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1659 return -1;
1660 }
1661
1662 PyObject *num = PyLong_FromLong(value);
1663 if (num == NULL) {
1664 return -1;
1665 }
1666
1667 if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1668 Py_DECREF(num);
1669 return -1;
1670 }
1671
1672 PyObject *str = PyUnicode_FromString(error_string);
1673 if (str == NULL) {
1674 Py_DECREF(num);
1675 return -1;
1676 }
1677
1678 int res = PyDict_SetItem(rev_codes_dict, num, str);
1679 Py_DECREF(str);
1680 Py_DECREF(num);
1681 if (res < 0) {
1682 return -1;
1683 }
1684
1685 return 0;
1686}
1687
1688static int
1689add_errors_module(PyObject *mod)
1690{
1691 PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1692 if (errors_module == NULL) {
1693 return -1;
1694 }
1695
1696 PyObject *codes_dict = PyDict_New();
1697 PyObject *rev_codes_dict = PyDict_New();
Georg Brandlb4dac712010-10-15 14:46:48 +00001698 if (codes_dict == NULL || rev_codes_dict == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001699 goto error;
Georg Brandlb4dac712010-10-15 14:46:48 +00001700 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001701
Mohamed Koubaa71842182020-11-04 11:37:23 -06001702#define ADD_CONST(name) do { \
1703 if (add_error(errors_module, codes_dict, rev_codes_dict, \
1704 #name, name) < 0) { \
1705 goto error; \
1706 } \
1707 } while(0)
Fred Drake7bd9f412000-07-04 23:51:31 +00001708
Mohamed Koubaa71842182020-11-04 11:37:23 -06001709 ADD_CONST(XML_ERROR_NO_MEMORY);
1710 ADD_CONST(XML_ERROR_SYNTAX);
1711 ADD_CONST(XML_ERROR_NO_ELEMENTS);
1712 ADD_CONST(XML_ERROR_INVALID_TOKEN);
1713 ADD_CONST(XML_ERROR_UNCLOSED_TOKEN);
1714 ADD_CONST(XML_ERROR_PARTIAL_CHAR);
1715 ADD_CONST(XML_ERROR_TAG_MISMATCH);
1716 ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1717 ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1718 ADD_CONST(XML_ERROR_PARAM_ENTITY_REF);
1719 ADD_CONST(XML_ERROR_UNDEFINED_ENTITY);
1720 ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1721 ADD_CONST(XML_ERROR_ASYNC_ENTITY);
1722 ADD_CONST(XML_ERROR_BAD_CHAR_REF);
1723 ADD_CONST(XML_ERROR_BINARY_ENTITY_REF);
1724 ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1725 ADD_CONST(XML_ERROR_MISPLACED_XML_PI);
1726 ADD_CONST(XML_ERROR_UNKNOWN_ENCODING);
1727 ADD_CONST(XML_ERROR_INCORRECT_ENCODING);
1728 ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1729 ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1730 ADD_CONST(XML_ERROR_NOT_STANDALONE);
1731 ADD_CONST(XML_ERROR_UNEXPECTED_STATE);
1732 ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1733 ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1734 ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
Fred Drake283b6702004-08-04 22:28:16 +00001735 /* Added in Expat 1.95.7. */
Mohamed Koubaa71842182020-11-04 11:37:23 -06001736 ADD_CONST(XML_ERROR_UNBOUND_PREFIX);
Fred Drake283b6702004-08-04 22:28:16 +00001737 /* Added in Expat 1.95.8. */
Mohamed Koubaa71842182020-11-04 11:37:23 -06001738 ADD_CONST(XML_ERROR_UNDECLARING_PREFIX);
1739 ADD_CONST(XML_ERROR_INCOMPLETE_PE);
1740 ADD_CONST(XML_ERROR_XML_DECL);
1741 ADD_CONST(XML_ERROR_TEXT_DECL);
1742 ADD_CONST(XML_ERROR_PUBLICID);
1743 ADD_CONST(XML_ERROR_SUSPENDED);
1744 ADD_CONST(XML_ERROR_NOT_SUSPENDED);
1745 ADD_CONST(XML_ERROR_ABORTED);
1746 ADD_CONST(XML_ERROR_FINISHED);
1747 ADD_CONST(XML_ERROR_SUSPEND_PE);
1748#undef ADD_CONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001749
Georg Brandlb4dac712010-10-15 14:46:48 +00001750 if (PyModule_AddStringConstant(errors_module, "__doc__",
1751 "Constants used to describe "
Mohamed Koubaa71842182020-11-04 11:37:23 -06001752 "error conditions.") < 0) {
1753 goto error;
1754 }
Fred Drake85d835f2001-02-08 15:39:08 +00001755
Mohamed Koubaa71842182020-11-04 11:37:23 -06001756 Py_INCREF(codes_dict);
1757 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1758 Py_DECREF(codes_dict);
1759 goto error;
1760 }
1761 Py_CLEAR(codes_dict);
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001762
Mohamed Koubaa71842182020-11-04 11:37:23 -06001763 Py_INCREF(rev_codes_dict);
1764 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1765 Py_DECREF(rev_codes_dict);
1766 goto error;
1767 }
1768 Py_CLEAR(rev_codes_dict);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001769
Mohamed Koubaa71842182020-11-04 11:37:23 -06001770 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001771
Mohamed Koubaa71842182020-11-04 11:37:23 -06001772error:
1773 Py_XDECREF(codes_dict);
1774 Py_XDECREF(rev_codes_dict);
1775 return -1;
1776}
1777
1778static int
1779add_model_module(PyObject *mod)
1780{
1781 PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1782 if (model_module == NULL) {
1783 return -1;
1784 }
1785
1786#define MYCONST(c) do { \
1787 if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1788 return -1; \
1789 } \
1790 } while(0)
1791
1792 if (PyModule_AddStringConstant(
1793 model_module, "__doc__",
1794 "Constants used to interpret content model information.") < 0) {
1795 return -1;
1796 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001797
Fred Drake85d835f2001-02-08 15:39:08 +00001798 MYCONST(XML_CTYPE_EMPTY);
1799 MYCONST(XML_CTYPE_ANY);
1800 MYCONST(XML_CTYPE_MIXED);
1801 MYCONST(XML_CTYPE_NAME);
1802 MYCONST(XML_CTYPE_CHOICE);
1803 MYCONST(XML_CTYPE_SEQ);
1804
1805 MYCONST(XML_CQUANT_NONE);
1806 MYCONST(XML_CQUANT_OPT);
1807 MYCONST(XML_CQUANT_REP);
1808 MYCONST(XML_CQUANT_PLUS);
1809#undef MYCONST
Mohamed Koubaa71842182020-11-04 11:37:23 -06001810 return 0;
1811}
Fredrik Lundhc3345042005-12-13 19:49:55 +00001812
Mohamed Koubaa71842182020-11-04 11:37:23 -06001813#if XML_COMBINED_VERSION > 19505
1814static int
1815add_features(PyObject *mod)
1816{
1817 PyObject *list = PyList_New(0);
1818 if (list == NULL) {
1819 return -1;
1820 }
1821
1822 const XML_Feature *features = XML_GetFeatureList();
1823 for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1824 PyObject *item = Py_BuildValue("si", features[i].name,
1825 features[i].value);
1826 if (item == NULL) {
1827 goto error;
1828 }
1829 int ok = PyList_Append(list, item);
1830 Py_DECREF(item);
1831 if (ok < 0) {
1832 goto error;
1833 }
1834 }
1835 if (PyModule_AddObject(mod, "features", list) < 0) {
1836 goto error;
1837 }
1838 return 0;
1839
1840error:
1841 Py_DECREF(list);
1842 return -1;
1843}
1844#endif
1845
Hai Shi7c83eaa2021-01-03 23:47:44 +08001846static void
1847pyexpat_destructor(PyObject *op)
1848{
1849 void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
1850 PyMem_Free(p);
1851}
1852
Mohamed Koubaa71842182020-11-04 11:37:23 -06001853static int
1854pyexpat_exec(PyObject *mod)
1855{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001856 pyexpat_state *state = pyexpat_get_state(mod);
1857 state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1858 mod, &_xml_parse_type_spec, NULL);
1859
1860 if (state->xml_parse_type == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001861 return -1;
1862 }
1863
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001864 if (init_handler_descrs(state) < 0) {
1865 return -1;
1866 }
1867 state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1868 NULL, NULL);
1869 if (state->error == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001870 return -1;
1871 }
1872
1873 /* Add some symbolic constants to the module */
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001874
1875 if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001876 return -1;
1877 }
1878
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001879 if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001880 return -1;
1881 }
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001882
1883 if (PyModule_AddObjectRef(mod, "XMLParserType",
1884 (PyObject *) state->xml_parse_type) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001885 return -1;
1886 }
1887
1888 if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1889 XML_ExpatVersion()) < 0) {
1890 return -1;
1891 }
1892 {
1893 XML_Expat_Version info = XML_ExpatVersionInfo();
1894 PyObject *versionInfo = Py_BuildValue("(iii)",
1895 info.major,
1896 info.minor,
1897 info.micro);
1898 if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1899 Py_DECREF(versionInfo);
1900 return -1;
1901 }
1902 }
1903 /* XXX When Expat supports some way of figuring out how it was
1904 compiled, this should check and set native_encoding
1905 appropriately.
1906 */
1907 if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1908 return -1;
1909 }
1910
1911 if (add_errors_module(mod) < 0) {
1912 return -1;
1913 }
1914
1915 if (add_model_module(mod) < 0) {
1916 return -1;
1917 }
1918
1919#if XML_COMBINED_VERSION > 19505
1920 if (add_features(mod) < 0) {
1921 return -1;
1922 }
1923#endif
1924
1925#define MYCONST(c) do { \
1926 if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
1927 return -1; \
1928 } \
1929 } while(0)
1930
1931 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1932 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1933 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1934#undef MYCONST
1935
Hai Shi7c83eaa2021-01-03 23:47:44 +08001936 struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
1937 if (capi == NULL) {
1938 PyErr_NoMemory();
1939 return -1;
1940 }
Fredrik Lundhc3345042005-12-13 19:49:55 +00001941 /* initialize pyexpat dispatch table */
Hai Shi7c83eaa2021-01-03 23:47:44 +08001942 capi->size = sizeof(*capi);
1943 capi->magic = PyExpat_CAPI_MAGIC;
1944 capi->MAJOR_VERSION = XML_MAJOR_VERSION;
1945 capi->MINOR_VERSION = XML_MINOR_VERSION;
1946 capi->MICRO_VERSION = XML_MICRO_VERSION;
1947 capi->ErrorString = XML_ErrorString;
1948 capi->GetErrorCode = XML_GetErrorCode;
1949 capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
1950 capi->GetErrorLineNumber = XML_GetErrorLineNumber;
1951 capi->Parse = XML_Parse;
1952 capi->ParserCreate_MM = XML_ParserCreate_MM;
1953 capi->ParserFree = XML_ParserFree;
1954 capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
1955 capi->SetCommentHandler = XML_SetCommentHandler;
1956 capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1957 capi->SetElementHandler = XML_SetElementHandler;
1958 capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1959 capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1960 capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1961 capi->SetUserData = XML_SetUserData;
1962 capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1963 capi->SetEncoding = XML_SetEncoding;
1964 capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001965#if XML_COMBINED_VERSION >= 20100
Hai Shi7c83eaa2021-01-03 23:47:44 +08001966 capi->SetHashSalt = XML_SetHashSalt;
Christian Heimescb5778f2018-09-18 14:38:58 +02001967#else
Hai Shi7c83eaa2021-01-03 23:47:44 +08001968 capi->SetHashSalt = NULL;
Christian Heimescb5778f2018-09-18 14:38:58 +02001969#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970
Benjamin Petersonb173f782009-05-05 22:31:58 +00001971 /* export using capsule */
Hai Shi7c83eaa2021-01-03 23:47:44 +08001972 PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
1973 pyexpat_destructor);
Mohamed Koubaa71842182020-11-04 11:37:23 -06001974 if (capi_object == NULL) {
Hai Shi7c83eaa2021-01-03 23:47:44 +08001975 PyMem_Free(capi);
Mohamed Koubaa71842182020-11-04 11:37:23 -06001976 return -1;
1977 }
1978
1979 if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
1980 Py_DECREF(capi_object);
1981 return -1;
1982 }
1983
1984 return 0;
1985}
1986
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001987static int
1988pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
1989{
1990 pyexpat_state *state = pyexpat_get_state(module);
1991 Py_VISIT(state->xml_parse_type);
1992 Py_VISIT(state->error);
1993 return 0;
1994}
1995
1996static int
1997pyexpat_clear(PyObject *module)
1998{
1999 pyexpat_state *state = pyexpat_get_state(module);
2000 Py_CLEAR(state->xml_parse_type);
2001 Py_CLEAR(state->error);
2002 return 0;
2003}
2004
2005static void
2006pyexpat_free(void *module)
2007{
2008 pyexpat_clear((PyObject *)module);
2009}
2010
2011static PyModuleDef_Slot pyexpat_slots[] = {
2012 {Py_mod_exec, pyexpat_exec},
2013 {0, NULL}
2014};
2015
Mohamed Koubaa71842182020-11-04 11:37:23 -06002016static struct PyModuleDef pyexpatmodule = {
2017 PyModuleDef_HEAD_INIT,
2018 .m_name = MODULE_NAME,
2019 .m_doc = pyexpat_module_documentation,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002020 .m_size = sizeof(pyexpat_state),
Mohamed Koubaa71842182020-11-04 11:37:23 -06002021 .m_methods = pyexpat_methods,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002022 .m_slots = pyexpat_slots,
2023 .m_traverse = pyexpat_traverse,
2024 .m_clear = pyexpat_clear,
2025 .m_free = pyexpat_free
Mohamed Koubaa71842182020-11-04 11:37:23 -06002026};
2027
2028PyMODINIT_FUNC
2029PyInit_pyexpat(void)
2030{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002031 return PyModuleDef_Init(&pyexpatmodule);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002032}
2033
Fred Drake6f987622000-08-25 18:03:30 +00002034static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002035clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002036{
Fred Drakecde79132001-04-25 16:01:30 +00002037 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002038
Fred Drake71b63ff2002-06-28 22:29:01 +00002039 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002040 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 self->handlers[i] = NULL;
2042 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02002043 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002045 }
Fred Drakecde79132001-04-25 16:01:30 +00002046 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002047}
2048
Tim Peters0c322792002-07-17 16:49:03 +00002049static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002050
2051#define HANDLER_INFO(name) \
2052 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2053
2054 HANDLER_INFO(StartElementHandler)
2055 HANDLER_INFO(EndElementHandler)
2056 HANDLER_INFO(ProcessingInstructionHandler)
2057 HANDLER_INFO(CharacterDataHandler)
2058 HANDLER_INFO(UnparsedEntityDeclHandler)
2059 HANDLER_INFO(NotationDeclHandler)
2060 HANDLER_INFO(StartNamespaceDeclHandler)
2061 HANDLER_INFO(EndNamespaceDeclHandler)
2062 HANDLER_INFO(CommentHandler)
2063 HANDLER_INFO(StartCdataSectionHandler)
2064 HANDLER_INFO(EndCdataSectionHandler)
2065 HANDLER_INFO(DefaultHandler)
2066 HANDLER_INFO(DefaultHandlerExpand)
2067 HANDLER_INFO(NotStandaloneHandler)
2068 HANDLER_INFO(ExternalEntityRefHandler)
2069 HANDLER_INFO(StartDoctypeDeclHandler)
2070 HANDLER_INFO(EndDoctypeDeclHandler)
2071 HANDLER_INFO(EntityDeclHandler)
2072 HANDLER_INFO(XmlDeclHandler)
2073 HANDLER_INFO(ElementDeclHandler)
2074 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00002075#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002076 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00002077#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002078
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002079#undef HANDLER_INFO
2080
Fred Drake0582df92000-07-12 04:49:00 +00002081 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002082};