blob: 7f727a86f5f47db7810b581842758df2a8150d7f [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Victor Stinner4a21e572020-04-15 02:35:41 +02004#include "structmember.h" // PyMemberDef
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -060050typedef struct {
51 PyTypeObject *xml_parse_type;
52 PyObject *error;
53} pyexpat_state;
54
55static inline pyexpat_state*
56pyexpat_get_state(PyObject *module)
57{
58 void *state = PyModule_GetState(module);
59 assert(state != NULL);
60 return (pyexpat_state *)state;
61}
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000062
63/* ----------------------------------------------------- */
64
65/* Declarations for objects of type xmlparser */
66
67typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000068 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake0582df92000-07-12 04:49:00 +000070 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000071 int ordered_attributes; /* Return attributes as a list. */
72 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000073 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000074 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000075 XML_Char *buffer; /* Buffer used when accumulating characters */
76 /* NULL if not enabled */
77 int buffer_size; /* Size of buffer, in XML_Char units */
78 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000079 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000080 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000081} xmlparseobject;
82
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030083#include "clinic/pyexpat.c.h"
84
Fred Drake2a3d7db2002-06-28 22:56:48 +000085#define CHARACTER_DATA_BUFFER_SIZE 8192
86
Fred Drake117ac852002-09-24 16:24:54 +000087typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000088typedef void* xmlhandler;
89
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000090struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000091 const char *name;
92 xmlhandlersetter setter;
93 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030094 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000095};
96
Jeremy Hylton938ace62002-07-17 16:30:39 +000097static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000098
Fred Drakebd6101c2001-02-14 18:29:45 +000099/* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200103set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +0000104{
Christian Heimes217cfd12007-12-02 14:31:20 +0000105 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000106
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000107 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000109 return 0;
110 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000111 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000112 return 1;
113}
114
115/* Build and set an Expat exception, including positioning
116 * information. Always returns NULL.
117 */
Fred Drake85d835f2001-02-08 15:39:08 +0000118static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600119set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000120{
121 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100122 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000123 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000126
Victor Stinner499dfcf2011-03-21 13:26:24 +0100127 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128 XML_ErrorString(code), lineno, column);
129 if (buffer == NULL)
130 return NULL;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600131 err = PyObject_CallOneArg(state->error, buffer);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100132 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000133 if ( err != NULL
134 && set_error_attr(err, "code", code)
135 && set_error_attr(err, "offset", column)
136 && set_error_attr(err, "lineno", lineno)) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600137 PyErr_SetObject(state->error, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000138 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000139 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000140 return NULL;
141}
142
Fred Drake71b63ff2002-06-28 22:29:01 +0000143static int
144have_handler(xmlparseobject *self, int type)
145{
146 PyObject *handler = self->handlers[type];
147 return handler != NULL;
148}
149
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000150/* Convert a string of XML_Chars into a Unicode string.
151 Returns None if str is a null pointer. */
152
Fred Drake0582df92000-07-12 04:49:00 +0000153static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000154conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000155{
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000157 and hence in UTF-8. */
158 /* UTF-8 from Expat, Unicode desired */
159 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200160 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000161 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000163}
164
Fred Drake0582df92000-07-12 04:49:00 +0000165static PyObject *
166conv_string_len_to_unicode(const XML_Char *str, int len)
167{
Fred Drake71b63ff2002-06-28 22:29:01 +0000168 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200172 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000173 }
Fred Drake6f987622000-08-25 18:03:30 +0000174 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000176
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000177/* Callback routines */
178
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000179static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000180
Martin v. Löwis069dde22003-01-21 10:58:18 +0000181/* This handler is used when an error has been detected, in the hope
182 that actual parsing can be terminated early. This will only help
183 if an external entity reference is encountered. */
184static int
185error_external_entity_ref_handler(XML_Parser parser,
186 const XML_Char *context,
187 const XML_Char *base,
188 const XML_Char *systemId,
189 const XML_Char *publicId)
190{
191 return 0;
192}
193
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000194/* Dummy character data handler used when an error (exception) has
195 been detected, and the actual parsing can be terminated early.
196 This is needed since character data handler can't be safely removed
197 from within the character data handler, but can be replaced. It is
198 used only from the character data handler trampoline, and must be
199 used right after `flag_error()` is called. */
200static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202{
203 /* Do nothing. */
204}
205
Fred Drake6f987622000-08-25 18:03:30 +0000206static void
207flag_error(xmlparseobject *self)
208{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000209 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000210 XML_SetExternalEntityRefHandler(self->itself,
211 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000212}
213
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000214static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200215call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000216 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000217{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200218 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000219
Jeroen Demeyer1dbd0842019-07-11 17:57:32 +0200220 res = PyObject_Call(func, args, NULL);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000221 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200222 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000223 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000224 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000225 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000226}
227
Fred Drakeb91a36b2002-06-27 19:40:48 +0000228static PyObject*
229string_intern(xmlparseobject *self, const char* str)
230{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000231 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000232 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000233 /* result can be NULL if the unicode conversion failed. */
234 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000236 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200238 value = PyDict_GetItemWithError(self->intern, result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000239 if (!value) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200240 if (!PyErr_Occurred() &&
241 PyDict_SetItem(self->intern, result, result) == 0)
242 {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000243 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200244 }
Zackery Spytz68def052018-10-19 00:57:38 -0600245 else {
246 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000247 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600248 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000249 }
250 Py_INCREF(value);
251 Py_DECREF(result);
252 return value;
253}
254
Fred Drake2a3d7db2002-06-28 22:56:48 +0000255/* Return 0 on success, -1 on exception.
256 * flag_error() will be called before return if needed.
257 */
258static int
259call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260{
261 PyObject *args;
262 PyObject *temp;
263
Georg Brandlc01537f2010-10-15 16:26:08 +0000264 if (!have_handler(self, CharacterData))
265 return -1;
266
Fred Drake2a3d7db2002-06-28 22:56:48 +0000267 args = PyTuple_New(1);
268 if (args == NULL)
269 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000270 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000271 if (temp == NULL) {
272 Py_DECREF(args);
273 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000274 XML_SetCharacterDataHandler(self->itself,
275 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000276 return -1;
277 }
278 PyTuple_SET_ITEM(args, 0, temp);
279 /* temp is now a borrowed reference; consider it unused. */
280 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200281 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000282 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000283 /* temp is an owned reference again, or NULL */
284 self->in_callback = 0;
285 Py_DECREF(args);
286 if (temp == NULL) {
287 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 XML_SetCharacterDataHandler(self->itself,
289 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000290 return -1;
291 }
292 Py_DECREF(temp);
293 return 0;
294}
295
296static int
297flush_character_buffer(xmlparseobject *self)
298{
299 int rc;
300 if (self->buffer == NULL || self->buffer_used == 0)
301 return 0;
302 rc = call_character_handler(self, self->buffer, self->buffer_used);
303 self->buffer_used = 0;
304 return rc;
305}
306
307static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000309{
310 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200311
312 if (PyErr_Occurred())
313 return;
314
Fred Drake2a3d7db2002-06-28 22:56:48 +0000315 if (self->buffer == NULL)
316 call_character_handler(self, data, len);
317 else {
318 if ((self->buffer_used + len) > self->buffer_size) {
319 if (flush_character_buffer(self) < 0)
320 return;
321 /* handler might have changed; drop the rest on the floor
322 * if there isn't a handler anymore
323 */
324 if (!have_handler(self, CharacterData))
325 return;
326 }
327 if (len > self->buffer_size) {
328 call_character_handler(self, data, len);
329 self->buffer_used = 0;
330 }
331 else {
332 memcpy(self->buffer + self->buffer_used,
333 data, len * sizeof(XML_Char));
334 self->buffer_used += len;
335 }
336 }
337}
338
Fred Drake85d835f2001-02-08 15:39:08 +0000339static void
340my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000341 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000342{
343 xmlparseobject *self = (xmlparseobject *)userData;
344
Fred Drake71b63ff2002-06-28 22:29:01 +0000345 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000346 PyObject *container, *rv, *args;
347 int i, max;
348
Victor Stinner9e09c262013-07-18 23:17:01 +0200349 if (PyErr_Occurred())
350 return;
351
Fred Drake2a3d7db2002-06-28 22:56:48 +0000352 if (flush_character_buffer(self) < 0)
353 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000354 /* Set max to the number of slots filled in atts[]; max/2 is
355 * the number of attributes we need to process.
356 */
357 if (self->specified_attributes) {
358 max = XML_GetSpecifiedAttributeCount(self->itself);
359 }
360 else {
361 max = 0;
362 while (atts[max] != NULL)
363 max += 2;
364 }
365 /* Build the container. */
366 if (self->ordered_attributes)
367 container = PyList_New(max);
368 else
369 container = PyDict_New();
370 if (container == NULL) {
371 flag_error(self);
372 return;
373 }
374 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000375 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000376 PyObject *v;
377 if (n == NULL) {
378 flag_error(self);
379 Py_DECREF(container);
380 return;
381 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000382 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000383 if (v == NULL) {
384 flag_error(self);
385 Py_DECREF(container);
386 Py_DECREF(n);
387 return;
388 }
389 if (self->ordered_attributes) {
390 PyList_SET_ITEM(container, i, n);
391 PyList_SET_ITEM(container, i+1, v);
392 }
393 else if (PyDict_SetItem(container, n, v)) {
394 flag_error(self);
395 Py_DECREF(n);
396 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600397 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000398 return;
399 }
400 else {
401 Py_DECREF(n);
402 Py_DECREF(v);
403 }
404 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000405 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000406 if (args == NULL) {
407 Py_DECREF(container);
408 return;
409 }
Zackery Spytz68def052018-10-19 00:57:38 -0600410 args = Py_BuildValue("(NN)", args, container);
411 if (args == NULL) {
412 return;
413 }
Fred Drake85d835f2001-02-08 15:39:08 +0000414 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000415 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200416 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000417 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000418 self->in_callback = 0;
419 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000420 if (rv == NULL) {
421 flag_error(self);
422 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000423 }
Fred Drake85d835f2001-02-08 15:39:08 +0000424 Py_DECREF(rv);
425 }
426}
427
428#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429 RETURN, GETUSERDATA) \
430static RC \
431my_##NAME##Handler PARAMS {\
432 xmlparseobject *self = GETUSERDATA ; \
433 PyObject *args = NULL; \
434 PyObject *rv = NULL; \
435 INIT \
436\
Fred Drake71b63ff2002-06-28 22:29:01 +0000437 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200438 if (PyErr_Occurred()) \
439 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000440 if (flush_character_buffer(self) < 0) \
441 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000442 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000443 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000444 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200445 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000446 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000447 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000448 Py_DECREF(args); \
449 if (rv == NULL) { \
450 flag_error(self); \
451 return RETURN; \
452 } \
453 CONVERSION \
454 Py_DECREF(rv); \
455 } \
456 return RETURN; \
457}
458
Fred Drake6f987622000-08-25 18:03:30 +0000459#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000462
Fred Drake6f987622000-08-25 18:03:30 +0000463#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465 rc = PyLong_AsLong(rv);, rc, \
466 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000467
Fred Drake71b63ff2002-06-28 22:29:01 +0000468VOID_HANDLER(EndElement,
469 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000470 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000471
Fred Drake6f987622000-08-25 18:03:30 +0000472VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000473 (void *userData,
474 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000475 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000476 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000477
Fred Drake6f987622000-08-25 18:03:30 +0000478VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000479 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000480 const XML_Char *entityName,
481 const XML_Char *base,
482 const XML_Char *systemId,
483 const XML_Char *publicId,
484 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000485 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000486 string_intern(self, entityName), string_intern(self, base),
487 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000488 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000489
Fred Drake85d835f2001-02-08 15:39:08 +0000490VOID_HANDLER(EntityDecl,
491 (void *userData,
492 const XML_Char *entityName,
493 int is_parameter_entity,
494 const XML_Char *value,
495 int value_length,
496 const XML_Char *base,
497 const XML_Char *systemId,
498 const XML_Char *publicId,
499 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000500 ("NiNNNNN",
501 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000502 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000503 string_intern(self, base), string_intern(self, systemId),
504 string_intern(self, publicId),
505 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000506
507VOID_HANDLER(XmlDecl,
508 (void *userData,
509 const XML_Char *version,
510 const XML_Char *encoding,
511 int standalone),
512 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000513 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000514 standalone))
515
516static PyObject *
517conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000518 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000519{
520 PyObject *result = NULL;
521 PyObject *children = PyTuple_New(model->numchildren);
522 int i;
523
524 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000525 assert(model->numchildren < INT_MAX);
526 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000527 PyObject *child = conv_content_model(&model->children[i],
528 conv_string);
529 if (child == NULL) {
530 Py_XDECREF(children);
531 return NULL;
532 }
533 PyTuple_SET_ITEM(children, i, child);
534 }
535 result = Py_BuildValue("(iiO&N)",
536 model->type, model->quant,
537 conv_string,model->name, children);
538 }
539 return result;
540}
541
Fred Drake06dd8cf2003-02-02 03:54:17 +0000542static void
543my_ElementDeclHandler(void *userData,
544 const XML_Char *name,
545 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000546{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000547 xmlparseobject *self = (xmlparseobject *)userData;
548 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000549
Fred Drake06dd8cf2003-02-02 03:54:17 +0000550 if (have_handler(self, ElementDecl)) {
551 PyObject *rv = NULL;
552 PyObject *modelobj, *nameobj;
553
Victor Stinner9e09c262013-07-18 23:17:01 +0200554 if (PyErr_Occurred())
555 return;
556
Fred Drake06dd8cf2003-02-02 03:54:17 +0000557 if (flush_character_buffer(self) < 0)
558 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000559 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000560 if (modelobj == NULL) {
561 flag_error(self);
562 goto finally;
563 }
564 nameobj = string_intern(self, name);
565 if (nameobj == NULL) {
566 Py_DECREF(modelobj);
567 flag_error(self);
568 goto finally;
569 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000570 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000571 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000572 flag_error(self);
573 goto finally;
574 }
575 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200576 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000577 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000578 self->in_callback = 0;
579 if (rv == NULL) {
580 flag_error(self);
581 goto finally;
582 }
583 Py_DECREF(rv);
584 }
585 finally:
586 Py_XDECREF(args);
587 XML_FreeContentModel(self->itself, model);
588 return;
589}
Fred Drake85d835f2001-02-08 15:39:08 +0000590
591VOID_HANDLER(AttlistDecl,
592 (void *userData,
593 const XML_Char *elname,
594 const XML_Char *attname,
595 const XML_Char *att_type,
596 const XML_Char *dflt,
597 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000598 ("(NNO&O&i)",
599 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000600 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000601 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000602
Martin v. Löwisc847f402003-01-21 11:09:21 +0000603#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000604VOID_HANDLER(SkippedEntity,
605 (void *userData,
606 const XML_Char *entityName,
607 int is_parameter_entity),
608 ("Ni",
609 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000610#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000611
Fred Drake71b63ff2002-06-28 22:29:01 +0000612VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 (void *userData,
614 const XML_Char *notationName,
615 const XML_Char *base,
616 const XML_Char *systemId,
617 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000618 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000619 string_intern(self, notationName), string_intern(self, base),
620 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000621
Fred Drake6f987622000-08-25 18:03:30 +0000622VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 (void *userData,
624 const XML_Char *prefix,
625 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 ("(NN)",
627 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000628
Fred Drake6f987622000-08-25 18:03:30 +0000629VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 (void *userData,
631 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000632 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000633
Fred Drake6f987622000-08-25 18:03:30 +0000634VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000635 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000636 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000637
Fred Drake6f987622000-08-25 18:03:30 +0000638VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000639 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000640 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000641
Fred Drake6f987622000-08-25 18:03:30 +0000642VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000643 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000645
Fred Drake6f987622000-08-25 18:03:30 +0000646VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 (void *userData, const XML_Char *s, int len),
648 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000649
Fred Drake6f987622000-08-25 18:03:30 +0000650VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 (void *userData, const XML_Char *s, int len),
652 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300653#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000654
Fred Drake71b63ff2002-06-28 22:29:01 +0000655INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 (void *userData),
657 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000658
Fred Drake6f987622000-08-25 18:03:30 +0000659RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 (XML_Parser parser,
661 const XML_Char *context,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId),
665 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000666 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 conv_string_to_unicode ,context, string_intern(self, base),
668 string_intern(self, systemId), string_intern(self, publicId)),
669 rc = PyLong_AsLong(rv);, rc,
670 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000671
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000672/* XXX UnknownEncodingHandler */
673
Fred Drake85d835f2001-02-08 15:39:08 +0000674VOID_HANDLER(StartDoctypeDecl,
675 (void *userData, const XML_Char *doctypeName,
676 const XML_Char *sysid, const XML_Char *pubid,
677 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000678 ("(NNNi)", string_intern(self, doctypeName),
679 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000680 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000681
682VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000683
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000684/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400685/*[clinic input]
686class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687[clinic start generated code]*/
688/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000690
Fred Drake71b63ff2002-06-28 22:29:01 +0000691static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600692get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
Fred Drake71b63ff2002-06-28 22:29:01 +0000693{
694 if (PyErr_Occurred()) {
695 return NULL;
696 }
697 if (rv == 0) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600698 return set_error(state, self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000699 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000700 if (flush_character_buffer(self) < 0) {
701 return NULL;
702 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000703 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000704}
705
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200706#define MAX_CHUNK_SIZE (1 << 20)
707
Brett Cannond0aeda82014-08-22 14:23:20 -0400708/*[clinic input]
709pyexpat.xmlparser.Parse
710
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600711 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -0400712 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200713 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400714 /
715
716Parse XML data.
717
718`isfinal' should be true at end of input.
719[clinic start generated code]*/
720
Brett Cannond0aeda82014-08-22 14:23:20 -0400721static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600722pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723 PyObject *data, int isfinal)
724/*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400725{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200726 const char *s;
727 Py_ssize_t slen;
728 Py_buffer view;
729 int rc;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600730 pyexpat_state *state = PyType_GetModuleState(cls);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000731
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200732 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200733 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200734 s = PyUnicode_AsUTF8AndSize(data, &slen);
735 if (s == NULL)
736 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200737 /* Explicitly set UTF-8 encoding. Return code ignored. */
738 (void)XML_SetEncoding(self->itself, "utf-8");
739 }
740 else {
741 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742 return NULL;
743 s = view.buf;
744 slen = view.len;
745 }
746
747 while (slen > MAX_CHUNK_SIZE) {
748 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
749 if (!rc)
750 goto done;
751 s += MAX_CHUNK_SIZE;
752 slen -= MAX_CHUNK_SIZE;
753 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200754 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
755 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300756 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200757
758done:
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600759 if (view.buf != NULL) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200760 PyBuffer_Release(&view);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600761 }
762 return get_parse_result(state, self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763}
764
Fred Drakeca1f4262000-09-21 20:10:23 +0000765/* File reading copied from cPickle */
766
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767#define BUF_SIZE 2048
768
Fred Drake0582df92000-07-12 04:49:00 +0000769static int
770readinst(char *buf, int buf_size, PyObject *meth)
771{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000772 PyObject *str;
773 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200774 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000775
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000776 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000777 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000778 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Christian Heimes72b710a2008-05-26 13:28:38 +0000780 if (PyBytes_Check(str))
781 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000782 else if (PyByteArray_Check(str))
783 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000785 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000786 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000787 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000788 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000789 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000790 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000791 if (len > buf_size) {
792 PyErr_Format(PyExc_ValueError,
793 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000794 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000795 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000796 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000797 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000798 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000799 Py_DECREF(str);
800 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000801 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000802
803error:
804 Py_XDECREF(str);
805 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000806}
807
Brett Cannond0aeda82014-08-22 14:23:20 -0400808/*[clinic input]
809pyexpat.xmlparser.ParseFile
810
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600811 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -0400812 file: object
813 /
814
815Parse XML data from file-like object.
816[clinic start generated code]*/
817
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000818static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600819pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
820 PyObject *file)
821/*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000822{
Fred Drake0582df92000-07-12 04:49:00 +0000823 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000824 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200825 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000826
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600827 pyexpat_state *state = PyType_GetModuleState(cls);
828
Serhiy Storchaka41c57b32019-09-01 12:03:39 +0300829 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
830 return NULL;
831 }
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000832 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000833 PyErr_SetString(PyExc_TypeError,
834 "argument must have 'read' attribute");
835 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000836 }
837 for (;;) {
838 int bytes_read;
839 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000840 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000841 Py_XDECREF(readmethod);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600842 return get_parse_result(state, self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000843 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000845 bytes_read = readinst(buf, BUF_SIZE, readmethod);
846 if (bytes_read < 0) {
847 Py_DECREF(readmethod);
848 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000849 }
850 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000851 if (PyErr_Occurred()) {
852 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000853 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000854 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000855
Fred Drake0582df92000-07-12 04:49:00 +0000856 if (!rv || bytes_read == 0)
857 break;
858 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000859 Py_XDECREF(readmethod);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600860 return get_parse_result(state, self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000861}
862
Brett Cannond0aeda82014-08-22 14:23:20 -0400863/*[clinic input]
864pyexpat.xmlparser.SetBase
865
866 base: str
867 /
868
869Set the base URL for the parser.
870[clinic start generated code]*/
871
Brett Cannond0aeda82014-08-22 14:23:20 -0400872static PyObject *
873pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300874/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400875{
Fred Drake0582df92000-07-12 04:49:00 +0000876 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400879 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000880}
881
Brett Cannond0aeda82014-08-22 14:23:20 -0400882/*[clinic input]
883pyexpat.xmlparser.GetBase
884
885Return base URL string for the parser.
886[clinic start generated code]*/
887
Brett Cannond0aeda82014-08-22 14:23:20 -0400888static PyObject *
889pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300890/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000891{
Fred Drake0582df92000-07-12 04:49:00 +0000892 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000893}
894
Brett Cannond0aeda82014-08-22 14:23:20 -0400895/*[clinic input]
896pyexpat.xmlparser.GetInputContext
897
898Return the untranslated text of the input that caused the current event.
899
900If the event was generated by a large amount of text (such as a start tag
901for an element with many attributes), not all of the text may be available.
902[clinic start generated code]*/
903
Brett Cannond0aeda82014-08-22 14:23:20 -0400904static PyObject *
905pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300906/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000907{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000908 if (self->in_callback) {
909 int offset, size;
910 const char *buffer
911 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000912
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000913 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000914 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000915 size - offset);
916 else
917 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000918 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000919 else
920 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000921}
Fred Drakebd6101c2001-02-14 18:29:45 +0000922
Brett Cannond0aeda82014-08-22 14:23:20 -0400923/*[clinic input]
924pyexpat.xmlparser.ExternalEntityParserCreate
925
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600926 cls: defining_class
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700927 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400928 encoding: str = NULL
929 /
930
931Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
932[clinic start generated code]*/
933
Brett Cannond0aeda82014-08-22 14:23:20 -0400934static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400935pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600936 PyTypeObject *cls,
Larry Hastings89964c42015-04-14 18:07:59 -0400937 const char *context,
938 const char *encoding)
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600939/*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400940{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000941 xmlparseobject *new_parser;
942 int i;
943
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600944 pyexpat_state *state = PyType_GetModuleState(cls);
945
946 new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
947 if (new_parser == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +0000948 return NULL;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600949 }
950
Fred Drake2a3d7db2002-06-28 22:56:48 +0000951 new_parser->buffer_size = self->buffer_size;
952 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000953 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000954 new_parser->ordered_attributes = self->ordered_attributes;
955 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000956 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000957 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000958 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000960 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000961 new_parser->intern = self->intern;
962 Py_XINCREF(new_parser->intern);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000963
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000964 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200965 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000966 if (new_parser->buffer == NULL) {
967 Py_DECREF(new_parser);
968 return PyErr_NoMemory();
969 }
970 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000971 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000972 Py_DECREF(new_parser);
973 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000974 }
975
976 XML_SetUserData(new_parser->itself, (void *)new_parser);
977
978 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000979 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000980 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000981
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200982 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000983 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000984 Py_DECREF(new_parser);
985 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000986 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000987 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000988
989 /* then copy handlers from self */
990 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000991 PyObject *handler = self->handlers[i];
992 if (handler != NULL) {
993 Py_INCREF(handler);
994 new_parser->handlers[i] = handler;
995 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000996 handler_info[i].handler);
997 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000998 }
Victor Stinner1b184552019-10-08 00:09:31 +0200999
1000 PyObject_GC_Track(new_parser);
Fred Drake71b63ff2002-06-28 22:29:01 +00001001 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001002}
1003
Brett Cannond0aeda82014-08-22 14:23:20 -04001004/*[clinic input]
1005pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001006
Brett Cannond0aeda82014-08-22 14:23:20 -04001007 flag: int
1008 /
1009
1010Controls parsing of parameter entities (including the external DTD subset).
1011
1012Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1013XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1014XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1015was successful.
1016[clinic start generated code]*/
1017
Brett Cannond0aeda82014-08-22 14:23:20 -04001018static PyObject *
1019pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001020/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001021{
1022 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001023 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001024}
1025
Martin v. Löwisc847f402003-01-21 11:09:21 +00001026
1027#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001028/*[clinic input]
1029pyexpat.xmlparser.UseForeignDTD
1030
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001031 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -04001032 flag: bool = True
1033 /
1034
1035Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1036
1037This readily allows the use of a 'default' document type controlled by the
1038application, while still getting the advantage of providing document type
1039information to the parser. 'flag' defaults to True if not provided.
1040[clinic start generated code]*/
1041
Brett Cannond0aeda82014-08-22 14:23:20 -04001042static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001043pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1044 int flag)
1045/*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001046{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001047 pyexpat_state *state = PyType_GetModuleState(cls);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001048 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001049
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001050 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001051 if (rc != XML_ERROR_NONE) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001052 return set_error(state, self, rc);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001053 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001054 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001055}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001056#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001057
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001058static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001059 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1060 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1061 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1062 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1063 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1064 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1065 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001066#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001067 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001068#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001069 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001070};
1071
1072/* ---------- */
1073
1074
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001075
Fred Drake71b63ff2002-06-28 22:29:01 +00001076/* pyexpat international encoding support.
1077 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001078*/
1079
Fred Drake71b63ff2002-06-28 22:29:01 +00001080static int
1081PyUnknownEncodingHandler(void *encodingHandlerData,
1082 const XML_Char *name,
1083 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001084{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001085 static unsigned char template_buffer[256] = {0};
1086 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001087 int i;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001088 const void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001089 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001090
Victor Stinner9e09c262013-07-18 23:17:01 +02001091 if (PyErr_Occurred())
1092 return XML_STATUS_ERROR;
1093
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001094 if (template_buffer[1] == 0) {
1095 for (i = 0; i < 256; i++)
1096 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001097 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001098
1099 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001100 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001101 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001102 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001103 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001104
1105 if (PyUnicode_GET_LENGTH(u) != 256) {
1106 Py_DECREF(u);
1107 PyErr_SetString(PyExc_ValueError,
1108 "multi-byte encodings are not supported");
1109 return XML_STATUS_ERROR;
1110 }
1111
1112 kind = PyUnicode_KIND(u);
1113 data = PyUnicode_DATA(u);
1114 for (i = 0; i < 256; i++) {
1115 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1116 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1117 info->map[i] = ch;
1118 else
1119 info->map[i] = -1;
1120 }
1121
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001122 info->data = NULL;
1123 info->convert = NULL;
1124 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001125 Py_DECREF(u);
1126
1127 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001128}
1129
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001130
1131static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001132newxmlparseobject(pyexpat_state *state, const char *encoding,
1133 const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001134{
1135 int i;
1136 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001137
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001138 self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
Fred Drake0582df92000-07-12 04:49:00 +00001139 if (self == NULL)
1140 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001141
Fred Drake2a3d7db2002-06-28 22:56:48 +00001142 self->buffer = NULL;
1143 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1144 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001145 self->ordered_attributes = 0;
1146 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001147 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001148 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001149 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001150 self->intern = intern;
1151 Py_XINCREF(self->intern);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001152
Christian Heimesfa535f52013-07-07 17:35:11 +02001153 /* namespace_separator is either NULL or contains one char + \0 */
1154 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1155 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001156 if (self->itself == NULL) {
1157 PyErr_SetString(PyExc_RuntimeError,
1158 "XML_ParserCreate failed");
1159 Py_DECREF(self);
1160 return NULL;
1161 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001162#if XML_COMBINED_VERSION >= 20100
1163 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001164 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001165 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001166#endif
Fred Drake0582df92000-07-12 04:49:00 +00001167 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001168 XML_SetUnknownEncodingHandler(self->itself,
1169 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001170
Fred Drake2a3d7db2002-06-28 22:56:48 +00001171 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001172 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001173
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001174 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001175 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001176 Py_DECREF(self);
1177 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001178 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001179 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001180
Victor Stinner1b184552019-10-08 00:09:31 +02001181 PyObject_GC_Track(self);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001183}
1184
1185
1186static void
Fred Drake0582df92000-07-12 04:49:00 +00001187xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001188{
Fred Drake0582df92000-07-12 04:49:00 +00001189 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001190 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001191 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001192 XML_ParserFree(self->itself);
1193 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001194
Fred Drake85d835f2001-02-08 15:39:08 +00001195 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001196 for (i = 0; handler_info[i].name != NULL; i++)
1197 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001198 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001199 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001200 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001201 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001202 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001203 self->buffer = NULL;
1204 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001205 Py_XDECREF(self->intern);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001206 PyTypeObject *tp = Py_TYPE(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001207 PyObject_GC_Del(self);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001208 Py_DECREF(tp);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001209}
1210
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001211
1212static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001213xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001214{
Victor Stinner28f468c2018-11-22 13:21:43 +01001215 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1216 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001217 PyObject *result = self->handlers[handlernum];
1218 if (result == NULL)
1219 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001220 Py_INCREF(result);
1221 return result;
1222}
1223
Fred Drake6f987622000-08-25 18:03:30 +00001224static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001225xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001226{
Victor Stinner28f468c2018-11-22 13:21:43 +01001227 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1228 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001229 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001230 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1231 return -1;
1232 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001233 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001234 /* If we're changing the character data handler, flush all
1235 * cached data with the old handler. Not sure there's a
1236 * "right" thing to do, though, but this probably won't
1237 * happen.
1238 */
1239 if (flush_character_buffer(self) < 0)
1240 return -1;
1241 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001242
1243 xmlhandler c_handler = NULL;
1244 if (v == Py_None) {
1245 /* If this is the character data handler, and a character
1246 data handler is already active, we need to be more
1247 careful. What we can safely do is replace the existing
1248 character data handler callback function with a no-op
1249 function that will refuse to call Python. The downside
1250 is that this doesn't completely remove the character
1251 data handler from the C layer if there's any callback
1252 active, so Expat does a little more work than it
1253 otherwise would, but that's really an odd case. A more
1254 elaborate system of handlers and state could remove the
1255 C handler more effectively. */
1256 if (handlernum == CharacterData && self->in_callback)
1257 c_handler = noop_character_data_handler;
1258 v = NULL;
1259 }
1260 else if (v != NULL) {
1261 Py_INCREF(v);
1262 c_handler = handler_info[handlernum].handler;
1263 }
1264 Py_XSETREF(self->handlers[handlernum], v);
1265 handler_info[handlernum].setter(self->itself, c_handler);
1266 return 0;
1267}
1268
1269#define INT_GETTER(name) \
1270 static PyObject * \
1271 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1272 { \
1273 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1274 }
1275INT_GETTER(ErrorCode)
1276INT_GETTER(ErrorLineNumber)
1277INT_GETTER(ErrorColumnNumber)
1278INT_GETTER(ErrorByteIndex)
1279INT_GETTER(CurrentLineNumber)
1280INT_GETTER(CurrentColumnNumber)
1281INT_GETTER(CurrentByteIndex)
1282
1283#undef INT_GETTER
1284
1285static PyObject *
1286xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1287{
1288 return PyBool_FromLong(self->buffer != NULL);
1289}
1290
1291static int
1292xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1293{
1294 if (v == NULL) {
1295 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1296 return -1;
1297 }
1298 int b = PyObject_IsTrue(v);
1299 if (b < 0)
1300 return -1;
1301 if (b) {
1302 if (self->buffer == NULL) {
1303 self->buffer = PyMem_Malloc(self->buffer_size);
1304 if (self->buffer == NULL) {
1305 PyErr_NoMemory();
1306 return -1;
1307 }
1308 self->buffer_used = 0;
1309 }
1310 }
1311 else if (self->buffer != NULL) {
1312 if (flush_character_buffer(self) < 0)
1313 return -1;
1314 PyMem_Free(self->buffer);
1315 self->buffer = NULL;
1316 }
1317 return 0;
1318}
1319
1320static PyObject *
1321xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1322{
1323 return PyLong_FromLong((long) self->buffer_size);
1324}
1325
1326static int
1327xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1328{
1329 if (v == NULL) {
1330 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1331 return -1;
1332 }
1333 long new_buffer_size;
1334 if (!PyLong_Check(v)) {
1335 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1336 return -1;
1337 }
1338
1339 new_buffer_size = PyLong_AsLong(v);
1340 if (new_buffer_size <= 0) {
1341 if (!PyErr_Occurred())
1342 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1343 return -1;
1344 }
1345
1346 /* trivial case -- no change */
1347 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001348 return 0;
1349 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001350
1351 /* check maximum */
1352 if (new_buffer_size > INT_MAX) {
1353 char errmsg[100];
1354 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1355 PyErr_SetString(PyExc_ValueError, errmsg);
1356 return -1;
1357 }
1358
1359 if (self->buffer != NULL) {
1360 /* there is already a buffer */
1361 if (self->buffer_used != 0) {
1362 if (flush_character_buffer(self) < 0) {
1363 return -1;
1364 }
1365 }
1366 /* free existing buffer */
1367 PyMem_Free(self->buffer);
1368 }
1369 self->buffer = PyMem_Malloc(new_buffer_size);
1370 if (self->buffer == NULL) {
1371 PyErr_NoMemory();
1372 return -1;
1373 }
1374 self->buffer_size = new_buffer_size;
1375 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001376}
1377
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001378static PyObject *
1379xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1380{
1381 return PyLong_FromLong((long) self->buffer_used);
1382}
1383
1384static PyObject *
1385xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1386{
1387 return PyBool_FromLong(self->ns_prefixes);
1388}
1389
1390static int
1391xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1392{
1393 if (v == NULL) {
1394 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1395 return -1;
1396 }
1397 int b = PyObject_IsTrue(v);
1398 if (b < 0)
1399 return -1;
1400 self->ns_prefixes = b;
1401 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1402 return 0;
1403}
1404
1405static PyObject *
1406xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1407{
1408 return PyBool_FromLong(self->ordered_attributes);
1409}
1410
1411static int
1412xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1413{
1414 if (v == NULL) {
1415 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1416 return -1;
1417 }
1418 int b = PyObject_IsTrue(v);
1419 if (b < 0)
1420 return -1;
1421 self->ordered_attributes = b;
1422 return 0;
1423}
1424
1425static PyObject *
1426xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1427{
1428 return PyBool_FromLong((long) self->specified_attributes);
1429}
1430
1431static int
1432xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1433{
1434 if (v == NULL) {
1435 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1436 return -1;
1437 }
1438 int b = PyObject_IsTrue(v);
1439 if (b < 0)
1440 return -1;
1441 self->specified_attributes = b;
1442 return 0;
1443}
1444
1445static PyMemberDef xmlparse_members[] = {
1446 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1447 {NULL}
1448};
1449
1450#define XMLPARSE_GETTER_DEF(name) \
1451 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1452#define XMLPARSE_GETTER_SETTER_DEF(name) \
1453 {#name, (getter)xmlparse_##name##_getter, \
1454 (setter)xmlparse_##name##_setter, NULL},
1455
1456static PyGetSetDef xmlparse_getsetlist[] = {
1457 XMLPARSE_GETTER_DEF(ErrorCode)
1458 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1459 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1460 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1461 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1462 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1463 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1464 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1465 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1466 XMLPARSE_GETTER_DEF(buffer_used)
1467 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1468 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1469 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1470 {NULL},
1471};
1472
1473#undef XMLPARSE_GETTER_DEF
1474#undef XMLPARSE_GETTER_SETTER_DEF
1475
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001476static int
1477xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1478{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001479 int i;
1480 for (i = 0; handler_info[i].name != NULL; i++)
1481 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001482 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001483}
1484
1485static int
1486xmlparse_clear(xmlparseobject *op)
1487{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001488 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001489 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001490 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001491}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001493PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001494
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001495static PyType_Slot _xml_parse_type_spec_slots[] = {
1496 {Py_tp_dealloc, xmlparse_dealloc},
1497 {Py_tp_doc, (void *)Xmlparsetype__doc__},
1498 {Py_tp_traverse, xmlparse_traverse},
1499 {Py_tp_clear, xmlparse_clear},
1500 {Py_tp_methods, xmlparse_methods},
1501 {Py_tp_members, xmlparse_members},
1502 {Py_tp_getset, xmlparse_getsetlist},
1503 {0, 0}
1504};
1505
1506static PyType_Spec _xml_parse_type_spec = {
1507 .name = "pyexpat.xmlparser",
1508 .basicsize = sizeof(xmlparseobject),
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +02001509 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1510 Py_TPFLAGS_DISALLOW_INSTANTIATION),
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001511 .slots = _xml_parse_type_spec_slots,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001512};
1513
1514/* End of code for xmlparser objects */
1515/* -------------------------------------------------------- */
1516
Brett Cannond0aeda82014-08-22 14:23:20 -04001517/*[clinic input]
1518pyexpat.ParserCreate
1519
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001520 encoding: str(accept={str, NoneType}) = None
1521 namespace_separator: str(accept={str, NoneType}) = None
Brett Cannond0aeda82014-08-22 14:23:20 -04001522 intern: object = NULL
1523
1524Return a new XML parser object.
1525[clinic start generated code]*/
1526
Brett Cannond0aeda82014-08-22 14:23:20 -04001527static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001528pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001529 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001530/*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001531{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001532 PyObject *result;
1533 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001534
Fred Drakecde79132001-04-25 16:01:30 +00001535 if (namespace_separator != NULL
1536 && strlen(namespace_separator) > 1) {
1537 PyErr_SetString(PyExc_ValueError,
1538 "namespace_separator must be at most one"
1539 " character, omitted, or None");
1540 return NULL;
1541 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001542 /* Explicitly passing None means no interning is desired.
1543 Not passing anything means that a new dictionary is used. */
1544 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001546 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 intern = PyDict_New();
1548 if (!intern)
1549 return NULL;
1550 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001551 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001552 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1554 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001555 }
1556
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001557 pyexpat_state *state = pyexpat_get_state(module);
1558 result = newxmlparseobject(state, encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001559 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001561 }
1562 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001563}
1564
Brett Cannond0aeda82014-08-22 14:23:20 -04001565/*[clinic input]
1566pyexpat.ErrorString
1567
1568 code: long
1569 /
1570
1571Returns string error for given number.
1572[clinic start generated code]*/
1573
Brett Cannond0aeda82014-08-22 14:23:20 -04001574static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001575pyexpat_ErrorString_impl(PyObject *module, long code)
1576/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001577{
Fred Drake0582df92000-07-12 04:49:00 +00001578 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001579}
1580
1581/* List of methods defined in the module */
1582
1583static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001584 PYEXPAT_PARSERCREATE_METHODDEF
1585 PYEXPAT_ERRORSTRING_METHODDEF
1586 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001587};
1588
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001589/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(pyexpat_module_documentation,
1592"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001593
Fred Drakecde79132001-04-25 16:01:30 +00001594/* Initialization function for the module */
1595
1596#ifndef MODULE_NAME
1597#define MODULE_NAME "pyexpat"
1598#endif
1599
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001600static int init_handler_descrs(pyexpat_state *state)
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001601{
1602 int i;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001603 assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001604 for (i = 0; handler_info[i].name != NULL; i++) {
1605 struct HandlerInfo *hi = &handler_info[i];
1606 hi->getset.name = hi->name;
1607 hi->getset.get = (getter)xmlparse_handler_getter;
1608 hi->getset.set = (setter)xmlparse_handler_setter;
1609 hi->getset.closure = &handler_info[i];
1610
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001611 PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001612 if (descr == NULL)
1613 return -1;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001614
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001615 if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001616 Py_DECREF(descr);
1617 return -1;
1618 }
1619 Py_DECREF(descr);
1620 }
1621 return 0;
1622}
1623
Mohamed Koubaa71842182020-11-04 11:37:23 -06001624static PyObject *
1625add_submodule(PyObject *mod, const char *fullname)
Fred Drake0582df92000-07-12 04:49:00 +00001626{
Mohamed Koubaa71842182020-11-04 11:37:23 -06001627 const char *name = strrchr(fullname, '.') + 1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001628
Mohamed Koubaa71842182020-11-04 11:37:23 -06001629 PyObject *submodule = PyModule_New(fullname);
1630 if (submodule == NULL) {
Martin v. Löwis1a214512008-06-11 05:26:20 +00001631 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001632 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633
Mohamed Koubaa71842182020-11-04 11:37:23 -06001634 PyObject *mod_name = PyUnicode_FromString(fullname);
1635 if (mod_name == NULL) {
1636 Py_DECREF(submodule);
1637 return NULL;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001638 }
Fred Drake6f987622000-08-25 18:03:30 +00001639
Mohamed Koubaa71842182020-11-04 11:37:23 -06001640 if (_PyImport_SetModule(mod_name, submodule) < 0) {
1641 Py_DECREF(submodule);
1642 Py_DECREF(mod_name);
1643 return NULL;
1644 }
1645 Py_DECREF(mod_name);
1646
1647 /* gives away the reference to the submodule */
1648 if (PyModule_AddObject(mod, name, submodule) < 0) {
1649 Py_DECREF(submodule);
1650 return NULL;
1651 }
1652
1653 return submodule;
1654}
1655
1656static int
1657add_error(PyObject *errors_module, PyObject *codes_dict,
1658 PyObject *rev_codes_dict, const char *name, int value)
1659{
1660 const char *error_string = XML_ErrorString(value);
1661 if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1662 return -1;
1663 }
1664
1665 PyObject *num = PyLong_FromLong(value);
1666 if (num == NULL) {
1667 return -1;
1668 }
1669
1670 if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1671 Py_DECREF(num);
1672 return -1;
1673 }
1674
1675 PyObject *str = PyUnicode_FromString(error_string);
1676 if (str == NULL) {
1677 Py_DECREF(num);
1678 return -1;
1679 }
1680
1681 int res = PyDict_SetItem(rev_codes_dict, num, str);
1682 Py_DECREF(str);
1683 Py_DECREF(num);
1684 if (res < 0) {
1685 return -1;
1686 }
1687
1688 return 0;
1689}
1690
1691static int
1692add_errors_module(PyObject *mod)
1693{
1694 PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1695 if (errors_module == NULL) {
1696 return -1;
1697 }
1698
1699 PyObject *codes_dict = PyDict_New();
1700 PyObject *rev_codes_dict = PyDict_New();
Georg Brandlb4dac712010-10-15 14:46:48 +00001701 if (codes_dict == NULL || rev_codes_dict == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001702 goto error;
Georg Brandlb4dac712010-10-15 14:46:48 +00001703 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001704
Mohamed Koubaa71842182020-11-04 11:37:23 -06001705#define ADD_CONST(name) do { \
1706 if (add_error(errors_module, codes_dict, rev_codes_dict, \
1707 #name, name) < 0) { \
1708 goto error; \
1709 } \
1710 } while(0)
Fred Drake7bd9f412000-07-04 23:51:31 +00001711
Mohamed Koubaa71842182020-11-04 11:37:23 -06001712 ADD_CONST(XML_ERROR_NO_MEMORY);
1713 ADD_CONST(XML_ERROR_SYNTAX);
1714 ADD_CONST(XML_ERROR_NO_ELEMENTS);
1715 ADD_CONST(XML_ERROR_INVALID_TOKEN);
1716 ADD_CONST(XML_ERROR_UNCLOSED_TOKEN);
1717 ADD_CONST(XML_ERROR_PARTIAL_CHAR);
1718 ADD_CONST(XML_ERROR_TAG_MISMATCH);
1719 ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1720 ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1721 ADD_CONST(XML_ERROR_PARAM_ENTITY_REF);
1722 ADD_CONST(XML_ERROR_UNDEFINED_ENTITY);
1723 ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1724 ADD_CONST(XML_ERROR_ASYNC_ENTITY);
1725 ADD_CONST(XML_ERROR_BAD_CHAR_REF);
1726 ADD_CONST(XML_ERROR_BINARY_ENTITY_REF);
1727 ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1728 ADD_CONST(XML_ERROR_MISPLACED_XML_PI);
1729 ADD_CONST(XML_ERROR_UNKNOWN_ENCODING);
1730 ADD_CONST(XML_ERROR_INCORRECT_ENCODING);
1731 ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1732 ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1733 ADD_CONST(XML_ERROR_NOT_STANDALONE);
1734 ADD_CONST(XML_ERROR_UNEXPECTED_STATE);
1735 ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1736 ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1737 ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
Fred Drake283b6702004-08-04 22:28:16 +00001738 /* Added in Expat 1.95.7. */
Mohamed Koubaa71842182020-11-04 11:37:23 -06001739 ADD_CONST(XML_ERROR_UNBOUND_PREFIX);
Fred Drake283b6702004-08-04 22:28:16 +00001740 /* Added in Expat 1.95.8. */
Mohamed Koubaa71842182020-11-04 11:37:23 -06001741 ADD_CONST(XML_ERROR_UNDECLARING_PREFIX);
1742 ADD_CONST(XML_ERROR_INCOMPLETE_PE);
1743 ADD_CONST(XML_ERROR_XML_DECL);
1744 ADD_CONST(XML_ERROR_TEXT_DECL);
1745 ADD_CONST(XML_ERROR_PUBLICID);
1746 ADD_CONST(XML_ERROR_SUSPENDED);
1747 ADD_CONST(XML_ERROR_NOT_SUSPENDED);
1748 ADD_CONST(XML_ERROR_ABORTED);
1749 ADD_CONST(XML_ERROR_FINISHED);
1750 ADD_CONST(XML_ERROR_SUSPEND_PE);
1751#undef ADD_CONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001752
Georg Brandlb4dac712010-10-15 14:46:48 +00001753 if (PyModule_AddStringConstant(errors_module, "__doc__",
1754 "Constants used to describe "
Mohamed Koubaa71842182020-11-04 11:37:23 -06001755 "error conditions.") < 0) {
1756 goto error;
1757 }
Fred Drake85d835f2001-02-08 15:39:08 +00001758
Mohamed Koubaa71842182020-11-04 11:37:23 -06001759 Py_INCREF(codes_dict);
1760 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1761 Py_DECREF(codes_dict);
1762 goto error;
1763 }
1764 Py_CLEAR(codes_dict);
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001765
Mohamed Koubaa71842182020-11-04 11:37:23 -06001766 Py_INCREF(rev_codes_dict);
1767 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1768 Py_DECREF(rev_codes_dict);
1769 goto error;
1770 }
1771 Py_CLEAR(rev_codes_dict);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001772
Mohamed Koubaa71842182020-11-04 11:37:23 -06001773 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001774
Mohamed Koubaa71842182020-11-04 11:37:23 -06001775error:
1776 Py_XDECREF(codes_dict);
1777 Py_XDECREF(rev_codes_dict);
1778 return -1;
1779}
1780
1781static int
1782add_model_module(PyObject *mod)
1783{
1784 PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1785 if (model_module == NULL) {
1786 return -1;
1787 }
1788
1789#define MYCONST(c) do { \
1790 if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1791 return -1; \
1792 } \
1793 } while(0)
1794
1795 if (PyModule_AddStringConstant(
1796 model_module, "__doc__",
1797 "Constants used to interpret content model information.") < 0) {
1798 return -1;
1799 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001800
Fred Drake85d835f2001-02-08 15:39:08 +00001801 MYCONST(XML_CTYPE_EMPTY);
1802 MYCONST(XML_CTYPE_ANY);
1803 MYCONST(XML_CTYPE_MIXED);
1804 MYCONST(XML_CTYPE_NAME);
1805 MYCONST(XML_CTYPE_CHOICE);
1806 MYCONST(XML_CTYPE_SEQ);
1807
1808 MYCONST(XML_CQUANT_NONE);
1809 MYCONST(XML_CQUANT_OPT);
1810 MYCONST(XML_CQUANT_REP);
1811 MYCONST(XML_CQUANT_PLUS);
1812#undef MYCONST
Mohamed Koubaa71842182020-11-04 11:37:23 -06001813 return 0;
1814}
Fredrik Lundhc3345042005-12-13 19:49:55 +00001815
Mohamed Koubaa71842182020-11-04 11:37:23 -06001816#if XML_COMBINED_VERSION > 19505
1817static int
1818add_features(PyObject *mod)
1819{
1820 PyObject *list = PyList_New(0);
1821 if (list == NULL) {
1822 return -1;
1823 }
1824
1825 const XML_Feature *features = XML_GetFeatureList();
1826 for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1827 PyObject *item = Py_BuildValue("si", features[i].name,
1828 features[i].value);
1829 if (item == NULL) {
1830 goto error;
1831 }
1832 int ok = PyList_Append(list, item);
1833 Py_DECREF(item);
1834 if (ok < 0) {
1835 goto error;
1836 }
1837 }
1838 if (PyModule_AddObject(mod, "features", list) < 0) {
1839 goto error;
1840 }
1841 return 0;
1842
1843error:
1844 Py_DECREF(list);
1845 return -1;
1846}
1847#endif
1848
Hai Shi7c83eaa2021-01-03 23:47:44 +08001849static void
1850pyexpat_destructor(PyObject *op)
1851{
1852 void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
1853 PyMem_Free(p);
1854}
1855
Mohamed Koubaa71842182020-11-04 11:37:23 -06001856static int
1857pyexpat_exec(PyObject *mod)
1858{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001859 pyexpat_state *state = pyexpat_get_state(mod);
1860 state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1861 mod, &_xml_parse_type_spec, NULL);
1862
1863 if (state->xml_parse_type == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001864 return -1;
1865 }
1866
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001867 if (init_handler_descrs(state) < 0) {
1868 return -1;
1869 }
1870 state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1871 NULL, NULL);
1872 if (state->error == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001873 return -1;
1874 }
1875
1876 /* Add some symbolic constants to the module */
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001877
1878 if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001879 return -1;
1880 }
1881
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001882 if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001883 return -1;
1884 }
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001885
1886 if (PyModule_AddObjectRef(mod, "XMLParserType",
1887 (PyObject *) state->xml_parse_type) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001888 return -1;
1889 }
1890
1891 if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1892 XML_ExpatVersion()) < 0) {
1893 return -1;
1894 }
1895 {
1896 XML_Expat_Version info = XML_ExpatVersionInfo();
1897 PyObject *versionInfo = Py_BuildValue("(iii)",
1898 info.major,
1899 info.minor,
1900 info.micro);
1901 if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1902 Py_DECREF(versionInfo);
1903 return -1;
1904 }
1905 }
1906 /* XXX When Expat supports some way of figuring out how it was
1907 compiled, this should check and set native_encoding
1908 appropriately.
1909 */
1910 if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1911 return -1;
1912 }
1913
1914 if (add_errors_module(mod) < 0) {
1915 return -1;
1916 }
1917
1918 if (add_model_module(mod) < 0) {
1919 return -1;
1920 }
1921
1922#if XML_COMBINED_VERSION > 19505
1923 if (add_features(mod) < 0) {
1924 return -1;
1925 }
1926#endif
1927
1928#define MYCONST(c) do { \
1929 if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
1930 return -1; \
1931 } \
1932 } while(0)
1933
1934 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1935 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1936 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1937#undef MYCONST
1938
Hai Shi7c83eaa2021-01-03 23:47:44 +08001939 struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
1940 if (capi == NULL) {
1941 PyErr_NoMemory();
1942 return -1;
1943 }
Fredrik Lundhc3345042005-12-13 19:49:55 +00001944 /* initialize pyexpat dispatch table */
Hai Shi7c83eaa2021-01-03 23:47:44 +08001945 capi->size = sizeof(*capi);
1946 capi->magic = PyExpat_CAPI_MAGIC;
1947 capi->MAJOR_VERSION = XML_MAJOR_VERSION;
1948 capi->MINOR_VERSION = XML_MINOR_VERSION;
1949 capi->MICRO_VERSION = XML_MICRO_VERSION;
1950 capi->ErrorString = XML_ErrorString;
1951 capi->GetErrorCode = XML_GetErrorCode;
1952 capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
1953 capi->GetErrorLineNumber = XML_GetErrorLineNumber;
1954 capi->Parse = XML_Parse;
1955 capi->ParserCreate_MM = XML_ParserCreate_MM;
1956 capi->ParserFree = XML_ParserFree;
1957 capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
1958 capi->SetCommentHandler = XML_SetCommentHandler;
1959 capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1960 capi->SetElementHandler = XML_SetElementHandler;
1961 capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1962 capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1963 capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1964 capi->SetUserData = XML_SetUserData;
1965 capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1966 capi->SetEncoding = XML_SetEncoding;
1967 capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001968#if XML_COMBINED_VERSION >= 20100
Hai Shi7c83eaa2021-01-03 23:47:44 +08001969 capi->SetHashSalt = XML_SetHashSalt;
Christian Heimescb5778f2018-09-18 14:38:58 +02001970#else
Hai Shi7c83eaa2021-01-03 23:47:44 +08001971 capi->SetHashSalt = NULL;
Christian Heimescb5778f2018-09-18 14:38:58 +02001972#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973
Benjamin Petersonb173f782009-05-05 22:31:58 +00001974 /* export using capsule */
Hai Shi7c83eaa2021-01-03 23:47:44 +08001975 PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
1976 pyexpat_destructor);
Mohamed Koubaa71842182020-11-04 11:37:23 -06001977 if (capi_object == NULL) {
Hai Shi7c83eaa2021-01-03 23:47:44 +08001978 PyMem_Free(capi);
Mohamed Koubaa71842182020-11-04 11:37:23 -06001979 return -1;
1980 }
1981
1982 if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
1983 Py_DECREF(capi_object);
1984 return -1;
1985 }
1986
1987 return 0;
1988}
1989
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001990static int
1991pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
1992{
1993 pyexpat_state *state = pyexpat_get_state(module);
1994 Py_VISIT(state->xml_parse_type);
1995 Py_VISIT(state->error);
1996 return 0;
1997}
1998
1999static int
2000pyexpat_clear(PyObject *module)
2001{
2002 pyexpat_state *state = pyexpat_get_state(module);
2003 Py_CLEAR(state->xml_parse_type);
2004 Py_CLEAR(state->error);
2005 return 0;
2006}
2007
2008static void
2009pyexpat_free(void *module)
2010{
2011 pyexpat_clear((PyObject *)module);
2012}
2013
2014static PyModuleDef_Slot pyexpat_slots[] = {
2015 {Py_mod_exec, pyexpat_exec},
2016 {0, NULL}
2017};
2018
Mohamed Koubaa71842182020-11-04 11:37:23 -06002019static struct PyModuleDef pyexpatmodule = {
2020 PyModuleDef_HEAD_INIT,
2021 .m_name = MODULE_NAME,
2022 .m_doc = pyexpat_module_documentation,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002023 .m_size = sizeof(pyexpat_state),
Mohamed Koubaa71842182020-11-04 11:37:23 -06002024 .m_methods = pyexpat_methods,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002025 .m_slots = pyexpat_slots,
2026 .m_traverse = pyexpat_traverse,
2027 .m_clear = pyexpat_clear,
2028 .m_free = pyexpat_free
Mohamed Koubaa71842182020-11-04 11:37:23 -06002029};
2030
2031PyMODINIT_FUNC
2032PyInit_pyexpat(void)
2033{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002034 return PyModuleDef_Init(&pyexpatmodule);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002035}
2036
Fred Drake6f987622000-08-25 18:03:30 +00002037static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002038clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002039{
Fred Drakecde79132001-04-25 16:01:30 +00002040 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002041
Fred Drake71b63ff2002-06-28 22:29:01 +00002042 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002043 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 self->handlers[i] = NULL;
2045 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02002046 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002048 }
Fred Drakecde79132001-04-25 16:01:30 +00002049 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002050}
2051
Tim Peters0c322792002-07-17 16:49:03 +00002052static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002053
2054#define HANDLER_INFO(name) \
2055 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2056
2057 HANDLER_INFO(StartElementHandler)
2058 HANDLER_INFO(EndElementHandler)
2059 HANDLER_INFO(ProcessingInstructionHandler)
2060 HANDLER_INFO(CharacterDataHandler)
2061 HANDLER_INFO(UnparsedEntityDeclHandler)
2062 HANDLER_INFO(NotationDeclHandler)
2063 HANDLER_INFO(StartNamespaceDeclHandler)
2064 HANDLER_INFO(EndNamespaceDeclHandler)
2065 HANDLER_INFO(CommentHandler)
2066 HANDLER_INFO(StartCdataSectionHandler)
2067 HANDLER_INFO(EndCdataSectionHandler)
2068 HANDLER_INFO(DefaultHandler)
2069 HANDLER_INFO(DefaultHandlerExpand)
2070 HANDLER_INFO(NotStandaloneHandler)
2071 HANDLER_INFO(ExternalEntityRefHandler)
2072 HANDLER_INFO(StartDoctypeDeclHandler)
2073 HANDLER_INFO(EndDoctypeDeclHandler)
2074 HANDLER_INFO(EntityDeclHandler)
2075 HANDLER_INFO(XmlDeclHandler)
2076 HANDLER_INFO(ElementDeclHandler)
2077 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00002078#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002079 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00002080#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002081
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002082#undef HANDLER_INFO
2083
Fred Drake0582df92000-07-12 04:49:00 +00002084 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002085};