blob: a13d340a3ea0f096bcbf78ce9abe66e24045b991 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Victor Stinner4a21e572020-04-15 02:35:41 +02004#include "structmember.h" // PyMemberDef
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -060050typedef struct {
51 PyTypeObject *xml_parse_type;
52 PyObject *error;
53} pyexpat_state;
54
55static inline pyexpat_state*
56pyexpat_get_state(PyObject *module)
57{
58 void *state = PyModule_GetState(module);
59 assert(state != NULL);
60 return (pyexpat_state *)state;
61}
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000062
63/* ----------------------------------------------------- */
64
65/* Declarations for objects of type xmlparser */
66
67typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000068 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake0582df92000-07-12 04:49:00 +000070 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000071 int ordered_attributes; /* Return attributes as a list. */
72 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000073 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000074 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000075 XML_Char *buffer; /* Buffer used when accumulating characters */
76 /* NULL if not enabled */
77 int buffer_size; /* Size of buffer, in XML_Char units */
78 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000079 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000080 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000081} xmlparseobject;
82
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030083#include "clinic/pyexpat.c.h"
84
Fred Drake2a3d7db2002-06-28 22:56:48 +000085#define CHARACTER_DATA_BUFFER_SIZE 8192
86
Fred Drake117ac852002-09-24 16:24:54 +000087typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000088typedef void* xmlhandler;
89
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000090struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000091 const char *name;
92 xmlhandlersetter setter;
93 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030094 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000095};
96
Jeremy Hylton938ace62002-07-17 16:30:39 +000097static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000098
Fred Drakebd6101c2001-02-14 18:29:45 +000099/* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200103set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +0000104{
Christian Heimes217cfd12007-12-02 14:31:20 +0000105 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000106
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000107 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000109 return 0;
110 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000111 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000112 return 1;
113}
114
115/* Build and set an Expat exception, including positioning
116 * information. Always returns NULL.
117 */
Fred Drake85d835f2001-02-08 15:39:08 +0000118static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600119set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000120{
121 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100122 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000123 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000126
Victor Stinner499dfcf2011-03-21 13:26:24 +0100127 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128 XML_ErrorString(code), lineno, column);
129 if (buffer == NULL)
130 return NULL;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600131 err = PyObject_CallOneArg(state->error, buffer);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100132 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000133 if ( err != NULL
134 && set_error_attr(err, "code", code)
135 && set_error_attr(err, "offset", column)
136 && set_error_attr(err, "lineno", lineno)) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600137 PyErr_SetObject(state->error, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000138 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000139 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000140 return NULL;
141}
142
Fred Drake71b63ff2002-06-28 22:29:01 +0000143static int
144have_handler(xmlparseobject *self, int type)
145{
146 PyObject *handler = self->handlers[type];
147 return handler != NULL;
148}
149
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000150/* Convert a string of XML_Chars into a Unicode string.
151 Returns None if str is a null pointer. */
152
Fred Drake0582df92000-07-12 04:49:00 +0000153static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000154conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000155{
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000157 and hence in UTF-8. */
158 /* UTF-8 from Expat, Unicode desired */
159 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200160 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000161 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000163}
164
Fred Drake0582df92000-07-12 04:49:00 +0000165static PyObject *
166conv_string_len_to_unicode(const XML_Char *str, int len)
167{
Fred Drake71b63ff2002-06-28 22:29:01 +0000168 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200172 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000173 }
Fred Drake6f987622000-08-25 18:03:30 +0000174 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000176
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000177/* Callback routines */
178
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000179static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000180
Martin v. Löwis069dde22003-01-21 10:58:18 +0000181/* This handler is used when an error has been detected, in the hope
182 that actual parsing can be terminated early. This will only help
183 if an external entity reference is encountered. */
184static int
185error_external_entity_ref_handler(XML_Parser parser,
186 const XML_Char *context,
187 const XML_Char *base,
188 const XML_Char *systemId,
189 const XML_Char *publicId)
190{
191 return 0;
192}
193
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000194/* Dummy character data handler used when an error (exception) has
195 been detected, and the actual parsing can be terminated early.
196 This is needed since character data handler can't be safely removed
197 from within the character data handler, but can be replaced. It is
198 used only from the character data handler trampoline, and must be
199 used right after `flag_error()` is called. */
200static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202{
203 /* Do nothing. */
204}
205
Fred Drake6f987622000-08-25 18:03:30 +0000206static void
207flag_error(xmlparseobject *self)
208{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000209 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000210 XML_SetExternalEntityRefHandler(self->itself,
211 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000212}
213
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000214static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200215call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000216 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000217{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200218 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000219
Jeroen Demeyer1dbd0842019-07-11 17:57:32 +0200220 res = PyObject_Call(func, args, NULL);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000221 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200222 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000223 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000224 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000225 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000226}
227
Fred Drakeb91a36b2002-06-27 19:40:48 +0000228static PyObject*
229string_intern(xmlparseobject *self, const char* str)
230{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000231 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000232 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000233 /* result can be NULL if the unicode conversion failed. */
234 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000236 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200238 value = PyDict_GetItemWithError(self->intern, result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000239 if (!value) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200240 if (!PyErr_Occurred() &&
241 PyDict_SetItem(self->intern, result, result) == 0)
242 {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000243 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200244 }
Zackery Spytz68def052018-10-19 00:57:38 -0600245 else {
246 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000247 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600248 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000249 }
250 Py_INCREF(value);
251 Py_DECREF(result);
252 return value;
253}
254
Fred Drake2a3d7db2002-06-28 22:56:48 +0000255/* Return 0 on success, -1 on exception.
256 * flag_error() will be called before return if needed.
257 */
258static int
259call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260{
261 PyObject *args;
262 PyObject *temp;
263
Georg Brandlc01537f2010-10-15 16:26:08 +0000264 if (!have_handler(self, CharacterData))
265 return -1;
266
Fred Drake2a3d7db2002-06-28 22:56:48 +0000267 args = PyTuple_New(1);
268 if (args == NULL)
269 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000270 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000271 if (temp == NULL) {
272 Py_DECREF(args);
273 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000274 XML_SetCharacterDataHandler(self->itself,
275 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000276 return -1;
277 }
278 PyTuple_SET_ITEM(args, 0, temp);
279 /* temp is now a borrowed reference; consider it unused. */
280 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200281 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000282 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000283 /* temp is an owned reference again, or NULL */
284 self->in_callback = 0;
285 Py_DECREF(args);
286 if (temp == NULL) {
287 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 XML_SetCharacterDataHandler(self->itself,
289 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000290 return -1;
291 }
292 Py_DECREF(temp);
293 return 0;
294}
295
296static int
297flush_character_buffer(xmlparseobject *self)
298{
299 int rc;
300 if (self->buffer == NULL || self->buffer_used == 0)
301 return 0;
302 rc = call_character_handler(self, self->buffer, self->buffer_used);
303 self->buffer_used = 0;
304 return rc;
305}
306
307static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000309{
310 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200311
312 if (PyErr_Occurred())
313 return;
314
Fred Drake2a3d7db2002-06-28 22:56:48 +0000315 if (self->buffer == NULL)
316 call_character_handler(self, data, len);
317 else {
318 if ((self->buffer_used + len) > self->buffer_size) {
319 if (flush_character_buffer(self) < 0)
320 return;
321 /* handler might have changed; drop the rest on the floor
322 * if there isn't a handler anymore
323 */
324 if (!have_handler(self, CharacterData))
325 return;
326 }
327 if (len > self->buffer_size) {
328 call_character_handler(self, data, len);
329 self->buffer_used = 0;
330 }
331 else {
332 memcpy(self->buffer + self->buffer_used,
333 data, len * sizeof(XML_Char));
334 self->buffer_used += len;
335 }
336 }
337}
338
Fred Drake85d835f2001-02-08 15:39:08 +0000339static void
340my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000341 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000342{
343 xmlparseobject *self = (xmlparseobject *)userData;
344
Fred Drake71b63ff2002-06-28 22:29:01 +0000345 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000346 PyObject *container, *rv, *args;
347 int i, max;
348
Victor Stinner9e09c262013-07-18 23:17:01 +0200349 if (PyErr_Occurred())
350 return;
351
Fred Drake2a3d7db2002-06-28 22:56:48 +0000352 if (flush_character_buffer(self) < 0)
353 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000354 /* Set max to the number of slots filled in atts[]; max/2 is
355 * the number of attributes we need to process.
356 */
357 if (self->specified_attributes) {
358 max = XML_GetSpecifiedAttributeCount(self->itself);
359 }
360 else {
361 max = 0;
362 while (atts[max] != NULL)
363 max += 2;
364 }
365 /* Build the container. */
366 if (self->ordered_attributes)
367 container = PyList_New(max);
368 else
369 container = PyDict_New();
370 if (container == NULL) {
371 flag_error(self);
372 return;
373 }
374 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000375 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000376 PyObject *v;
377 if (n == NULL) {
378 flag_error(self);
379 Py_DECREF(container);
380 return;
381 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000382 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000383 if (v == NULL) {
384 flag_error(self);
385 Py_DECREF(container);
386 Py_DECREF(n);
387 return;
388 }
389 if (self->ordered_attributes) {
390 PyList_SET_ITEM(container, i, n);
391 PyList_SET_ITEM(container, i+1, v);
392 }
393 else if (PyDict_SetItem(container, n, v)) {
394 flag_error(self);
395 Py_DECREF(n);
396 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600397 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000398 return;
399 }
400 else {
401 Py_DECREF(n);
402 Py_DECREF(v);
403 }
404 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000405 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000406 if (args == NULL) {
407 Py_DECREF(container);
408 return;
409 }
Zackery Spytz68def052018-10-19 00:57:38 -0600410 args = Py_BuildValue("(NN)", args, container);
411 if (args == NULL) {
412 return;
413 }
Fred Drake85d835f2001-02-08 15:39:08 +0000414 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000415 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200416 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000417 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000418 self->in_callback = 0;
419 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000420 if (rv == NULL) {
421 flag_error(self);
422 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000423 }
Fred Drake85d835f2001-02-08 15:39:08 +0000424 Py_DECREF(rv);
425 }
426}
427
428#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429 RETURN, GETUSERDATA) \
430static RC \
431my_##NAME##Handler PARAMS {\
432 xmlparseobject *self = GETUSERDATA ; \
433 PyObject *args = NULL; \
434 PyObject *rv = NULL; \
435 INIT \
436\
Fred Drake71b63ff2002-06-28 22:29:01 +0000437 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200438 if (PyErr_Occurred()) \
439 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000440 if (flush_character_buffer(self) < 0) \
441 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000442 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000443 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000444 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200445 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000446 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000447 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000448 Py_DECREF(args); \
449 if (rv == NULL) { \
450 flag_error(self); \
451 return RETURN; \
452 } \
453 CONVERSION \
454 Py_DECREF(rv); \
455 } \
456 return RETURN; \
457}
458
Fred Drake6f987622000-08-25 18:03:30 +0000459#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000462
Fred Drake6f987622000-08-25 18:03:30 +0000463#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465 rc = PyLong_AsLong(rv);, rc, \
466 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000467
Fred Drake71b63ff2002-06-28 22:29:01 +0000468VOID_HANDLER(EndElement,
469 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000470 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000471
Fred Drake6f987622000-08-25 18:03:30 +0000472VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000473 (void *userData,
474 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000475 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000476 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000477
Fred Drake6f987622000-08-25 18:03:30 +0000478VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000479 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000480 const XML_Char *entityName,
481 const XML_Char *base,
482 const XML_Char *systemId,
483 const XML_Char *publicId,
484 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000485 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000486 string_intern(self, entityName), string_intern(self, base),
487 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000488 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000489
Fred Drake85d835f2001-02-08 15:39:08 +0000490VOID_HANDLER(EntityDecl,
491 (void *userData,
492 const XML_Char *entityName,
493 int is_parameter_entity,
494 const XML_Char *value,
495 int value_length,
496 const XML_Char *base,
497 const XML_Char *systemId,
498 const XML_Char *publicId,
499 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000500 ("NiNNNNN",
501 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000502 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000503 string_intern(self, base), string_intern(self, systemId),
504 string_intern(self, publicId),
505 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000506
507VOID_HANDLER(XmlDecl,
508 (void *userData,
509 const XML_Char *version,
510 const XML_Char *encoding,
511 int standalone),
512 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000513 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000514 standalone))
515
516static PyObject *
517conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000518 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000519{
520 PyObject *result = NULL;
521 PyObject *children = PyTuple_New(model->numchildren);
522 int i;
523
524 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000525 assert(model->numchildren < INT_MAX);
526 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000527 PyObject *child = conv_content_model(&model->children[i],
528 conv_string);
529 if (child == NULL) {
530 Py_XDECREF(children);
531 return NULL;
532 }
533 PyTuple_SET_ITEM(children, i, child);
534 }
535 result = Py_BuildValue("(iiO&N)",
536 model->type, model->quant,
537 conv_string,model->name, children);
538 }
539 return result;
540}
541
Fred Drake06dd8cf2003-02-02 03:54:17 +0000542static void
543my_ElementDeclHandler(void *userData,
544 const XML_Char *name,
545 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000546{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000547 xmlparseobject *self = (xmlparseobject *)userData;
548 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000549
Fred Drake06dd8cf2003-02-02 03:54:17 +0000550 if (have_handler(self, ElementDecl)) {
551 PyObject *rv = NULL;
552 PyObject *modelobj, *nameobj;
553
Victor Stinner9e09c262013-07-18 23:17:01 +0200554 if (PyErr_Occurred())
555 return;
556
Fred Drake06dd8cf2003-02-02 03:54:17 +0000557 if (flush_character_buffer(self) < 0)
558 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000559 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000560 if (modelobj == NULL) {
561 flag_error(self);
562 goto finally;
563 }
564 nameobj = string_intern(self, name);
565 if (nameobj == NULL) {
566 Py_DECREF(modelobj);
567 flag_error(self);
568 goto finally;
569 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000570 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000571 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000572 flag_error(self);
573 goto finally;
574 }
575 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200576 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000577 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000578 self->in_callback = 0;
579 if (rv == NULL) {
580 flag_error(self);
581 goto finally;
582 }
583 Py_DECREF(rv);
584 }
585 finally:
586 Py_XDECREF(args);
587 XML_FreeContentModel(self->itself, model);
588 return;
589}
Fred Drake85d835f2001-02-08 15:39:08 +0000590
591VOID_HANDLER(AttlistDecl,
592 (void *userData,
593 const XML_Char *elname,
594 const XML_Char *attname,
595 const XML_Char *att_type,
596 const XML_Char *dflt,
597 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000598 ("(NNO&O&i)",
599 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000600 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000601 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000602
Martin v. Löwisc847f402003-01-21 11:09:21 +0000603#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000604VOID_HANDLER(SkippedEntity,
605 (void *userData,
606 const XML_Char *entityName,
607 int is_parameter_entity),
608 ("Ni",
609 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000610#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000611
Fred Drake71b63ff2002-06-28 22:29:01 +0000612VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 (void *userData,
614 const XML_Char *notationName,
615 const XML_Char *base,
616 const XML_Char *systemId,
617 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000618 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000619 string_intern(self, notationName), string_intern(self, base),
620 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000621
Fred Drake6f987622000-08-25 18:03:30 +0000622VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 (void *userData,
624 const XML_Char *prefix,
625 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 ("(NN)",
627 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000628
Fred Drake6f987622000-08-25 18:03:30 +0000629VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 (void *userData,
631 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000632 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000633
Fred Drake6f987622000-08-25 18:03:30 +0000634VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000635 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000636 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000637
Fred Drake6f987622000-08-25 18:03:30 +0000638VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000639 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000640 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000641
Fred Drake6f987622000-08-25 18:03:30 +0000642VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000643 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000645
Fred Drake6f987622000-08-25 18:03:30 +0000646VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 (void *userData, const XML_Char *s, int len),
648 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000649
Fred Drake6f987622000-08-25 18:03:30 +0000650VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 (void *userData, const XML_Char *s, int len),
652 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300653#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000654
Fred Drake71b63ff2002-06-28 22:29:01 +0000655INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 (void *userData),
657 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000658
Fred Drake6f987622000-08-25 18:03:30 +0000659RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 (XML_Parser parser,
661 const XML_Char *context,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId),
665 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000666 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 conv_string_to_unicode ,context, string_intern(self, base),
668 string_intern(self, systemId), string_intern(self, publicId)),
669 rc = PyLong_AsLong(rv);, rc,
670 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000671
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000672/* XXX UnknownEncodingHandler */
673
Fred Drake85d835f2001-02-08 15:39:08 +0000674VOID_HANDLER(StartDoctypeDecl,
675 (void *userData, const XML_Char *doctypeName,
676 const XML_Char *sysid, const XML_Char *pubid,
677 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000678 ("(NNNi)", string_intern(self, doctypeName),
679 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000680 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000681
682VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000683
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000684/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400685/*[clinic input]
686class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687[clinic start generated code]*/
688/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000690
Fred Drake71b63ff2002-06-28 22:29:01 +0000691static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600692get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
Fred Drake71b63ff2002-06-28 22:29:01 +0000693{
694 if (PyErr_Occurred()) {
695 return NULL;
696 }
697 if (rv == 0) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600698 return set_error(state, self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000699 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000700 if (flush_character_buffer(self) < 0) {
701 return NULL;
702 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000703 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000704}
705
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200706#define MAX_CHUNK_SIZE (1 << 20)
707
Brett Cannond0aeda82014-08-22 14:23:20 -0400708/*[clinic input]
709pyexpat.xmlparser.Parse
710
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600711 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -0400712 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200713 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400714 /
715
716Parse XML data.
717
718`isfinal' should be true at end of input.
719[clinic start generated code]*/
720
Brett Cannond0aeda82014-08-22 14:23:20 -0400721static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600722pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723 PyObject *data, int isfinal)
724/*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400725{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200726 const char *s;
727 Py_ssize_t slen;
728 Py_buffer view;
729 int rc;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600730 pyexpat_state *state = PyType_GetModuleState(cls);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000731
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200732 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200733 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200734 s = PyUnicode_AsUTF8AndSize(data, &slen);
735 if (s == NULL)
736 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200737 /* Explicitly set UTF-8 encoding. Return code ignored. */
738 (void)XML_SetEncoding(self->itself, "utf-8");
739 }
740 else {
741 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742 return NULL;
743 s = view.buf;
744 slen = view.len;
745 }
746
747 while (slen > MAX_CHUNK_SIZE) {
748 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
749 if (!rc)
750 goto done;
751 s += MAX_CHUNK_SIZE;
752 slen -= MAX_CHUNK_SIZE;
753 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200754 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
755 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300756 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200757
758done:
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600759 if (view.buf != NULL) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200760 PyBuffer_Release(&view);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600761 }
762 return get_parse_result(state, self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763}
764
Fred Drakeca1f4262000-09-21 20:10:23 +0000765/* File reading copied from cPickle */
766
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767#define BUF_SIZE 2048
768
Fred Drake0582df92000-07-12 04:49:00 +0000769static int
770readinst(char *buf, int buf_size, PyObject *meth)
771{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000772 PyObject *str;
773 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200774 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000775
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000776 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000777 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000778 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Christian Heimes72b710a2008-05-26 13:28:38 +0000780 if (PyBytes_Check(str))
781 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000782 else if (PyByteArray_Check(str))
783 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000785 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000786 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000787 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000788 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000789 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000790 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000791 if (len > buf_size) {
792 PyErr_Format(PyExc_ValueError,
793 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000794 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000795 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000796 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000797 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000798 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000799 Py_DECREF(str);
800 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000801 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000802
803error:
804 Py_XDECREF(str);
805 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000806}
807
Brett Cannond0aeda82014-08-22 14:23:20 -0400808/*[clinic input]
809pyexpat.xmlparser.ParseFile
810
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600811 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -0400812 file: object
813 /
814
815Parse XML data from file-like object.
816[clinic start generated code]*/
817
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000818static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600819pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
820 PyObject *file)
821/*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000822{
Fred Drake0582df92000-07-12 04:49:00 +0000823 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000824 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200825 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000826
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600827 pyexpat_state *state = PyType_GetModuleState(cls);
828
Serhiy Storchaka41c57b32019-09-01 12:03:39 +0300829 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
830 return NULL;
831 }
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000832 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000833 PyErr_SetString(PyExc_TypeError,
834 "argument must have 'read' attribute");
835 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000836 }
837 for (;;) {
838 int bytes_read;
839 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000840 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000841 Py_XDECREF(readmethod);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600842 return get_parse_result(state, self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000843 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000845 bytes_read = readinst(buf, BUF_SIZE, readmethod);
846 if (bytes_read < 0) {
847 Py_DECREF(readmethod);
848 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000849 }
850 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000851 if (PyErr_Occurred()) {
852 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000853 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000854 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000855
Fred Drake0582df92000-07-12 04:49:00 +0000856 if (!rv || bytes_read == 0)
857 break;
858 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000859 Py_XDECREF(readmethod);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600860 return get_parse_result(state, self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000861}
862
Brett Cannond0aeda82014-08-22 14:23:20 -0400863/*[clinic input]
864pyexpat.xmlparser.SetBase
865
866 base: str
867 /
868
869Set the base URL for the parser.
870[clinic start generated code]*/
871
Brett Cannond0aeda82014-08-22 14:23:20 -0400872static PyObject *
873pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300874/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400875{
Fred Drake0582df92000-07-12 04:49:00 +0000876 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400879 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000880}
881
Brett Cannond0aeda82014-08-22 14:23:20 -0400882/*[clinic input]
883pyexpat.xmlparser.GetBase
884
885Return base URL string for the parser.
886[clinic start generated code]*/
887
Brett Cannond0aeda82014-08-22 14:23:20 -0400888static PyObject *
889pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300890/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000891{
Fred Drake0582df92000-07-12 04:49:00 +0000892 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000893}
894
Brett Cannond0aeda82014-08-22 14:23:20 -0400895/*[clinic input]
896pyexpat.xmlparser.GetInputContext
897
898Return the untranslated text of the input that caused the current event.
899
900If the event was generated by a large amount of text (such as a start tag
901for an element with many attributes), not all of the text may be available.
902[clinic start generated code]*/
903
Brett Cannond0aeda82014-08-22 14:23:20 -0400904static PyObject *
905pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300906/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000907{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000908 if (self->in_callback) {
909 int offset, size;
910 const char *buffer
911 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000912
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000913 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000914 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000915 size - offset);
916 else
917 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000918 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000919 else
920 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000921}
Fred Drakebd6101c2001-02-14 18:29:45 +0000922
Brett Cannond0aeda82014-08-22 14:23:20 -0400923/*[clinic input]
924pyexpat.xmlparser.ExternalEntityParserCreate
925
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600926 cls: defining_class
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700927 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400928 encoding: str = NULL
929 /
930
931Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
932[clinic start generated code]*/
933
Brett Cannond0aeda82014-08-22 14:23:20 -0400934static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400935pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600936 PyTypeObject *cls,
Larry Hastings89964c42015-04-14 18:07:59 -0400937 const char *context,
938 const char *encoding)
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600939/*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400940{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000941 xmlparseobject *new_parser;
942 int i;
943
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600944 pyexpat_state *state = PyType_GetModuleState(cls);
945
946 new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
947 if (new_parser == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +0000948 return NULL;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -0600949 }
950
Fred Drake2a3d7db2002-06-28 22:56:48 +0000951 new_parser->buffer_size = self->buffer_size;
952 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000953 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000954 new_parser->ordered_attributes = self->ordered_attributes;
955 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000956 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000957 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000958 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000960 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000961 new_parser->intern = self->intern;
962 Py_XINCREF(new_parser->intern);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000963
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000964 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200965 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000966 if (new_parser->buffer == NULL) {
967 Py_DECREF(new_parser);
968 return PyErr_NoMemory();
969 }
970 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000971 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000972 Py_DECREF(new_parser);
973 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000974 }
975
976 XML_SetUserData(new_parser->itself, (void *)new_parser);
977
978 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000979 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000980 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000981
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200982 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000983 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000984 Py_DECREF(new_parser);
985 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000986 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000987 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000988
989 /* then copy handlers from self */
990 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000991 PyObject *handler = self->handlers[i];
992 if (handler != NULL) {
993 Py_INCREF(handler);
994 new_parser->handlers[i] = handler;
995 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000996 handler_info[i].handler);
997 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000998 }
Victor Stinner1b184552019-10-08 00:09:31 +0200999
1000 PyObject_GC_Track(new_parser);
Fred Drake71b63ff2002-06-28 22:29:01 +00001001 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001002}
1003
Brett Cannond0aeda82014-08-22 14:23:20 -04001004/*[clinic input]
1005pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001006
Brett Cannond0aeda82014-08-22 14:23:20 -04001007 flag: int
1008 /
1009
1010Controls parsing of parameter entities (including the external DTD subset).
1011
1012Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1013XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1014XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1015was successful.
1016[clinic start generated code]*/
1017
Brett Cannond0aeda82014-08-22 14:23:20 -04001018static PyObject *
1019pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001020/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001021{
1022 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001023 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001024}
1025
Martin v. Löwisc847f402003-01-21 11:09:21 +00001026
1027#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001028/*[clinic input]
1029pyexpat.xmlparser.UseForeignDTD
1030
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001031 cls: defining_class
Brett Cannond0aeda82014-08-22 14:23:20 -04001032 flag: bool = True
1033 /
1034
1035Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1036
1037This readily allows the use of a 'default' document type controlled by the
1038application, while still getting the advantage of providing document type
1039information to the parser. 'flag' defaults to True if not provided.
1040[clinic start generated code]*/
1041
Brett Cannond0aeda82014-08-22 14:23:20 -04001042static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001043pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1044 int flag)
1045/*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001046{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001047 pyexpat_state *state = PyType_GetModuleState(cls);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001048 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001049
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001050 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001051 if (rc != XML_ERROR_NONE) {
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001052 return set_error(state, self, rc);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001053 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001054 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001055}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001056#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001057
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001058static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001059 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1060 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1061 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1062 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1063 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1064 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1065 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001066#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001067 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001068#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001069 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001070};
1071
1072/* ---------- */
1073
1074
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001075
Fred Drake71b63ff2002-06-28 22:29:01 +00001076/* pyexpat international encoding support.
1077 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001078*/
1079
Fred Drake71b63ff2002-06-28 22:29:01 +00001080static int
1081PyUnknownEncodingHandler(void *encodingHandlerData,
1082 const XML_Char *name,
1083 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001084{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001085 static unsigned char template_buffer[256] = {0};
1086 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001087 int i;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001088 const void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001089 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001090
Victor Stinner9e09c262013-07-18 23:17:01 +02001091 if (PyErr_Occurred())
1092 return XML_STATUS_ERROR;
1093
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001094 if (template_buffer[1] == 0) {
1095 for (i = 0; i < 256; i++)
1096 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001097 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001098
1099 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001100 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001101 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001102 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001103 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001104
1105 if (PyUnicode_GET_LENGTH(u) != 256) {
1106 Py_DECREF(u);
1107 PyErr_SetString(PyExc_ValueError,
1108 "multi-byte encodings are not supported");
1109 return XML_STATUS_ERROR;
1110 }
1111
1112 kind = PyUnicode_KIND(u);
1113 data = PyUnicode_DATA(u);
1114 for (i = 0; i < 256; i++) {
1115 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1116 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1117 info->map[i] = ch;
1118 else
1119 info->map[i] = -1;
1120 }
1121
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001122 info->data = NULL;
1123 info->convert = NULL;
1124 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001125 Py_DECREF(u);
1126
1127 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001128}
1129
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001130
1131static PyObject *
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001132newxmlparseobject(pyexpat_state *state, const char *encoding,
1133 const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001134{
1135 int i;
1136 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001137
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001138 self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
Fred Drake0582df92000-07-12 04:49:00 +00001139 if (self == NULL)
1140 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001141
Fred Drake2a3d7db2002-06-28 22:56:48 +00001142 self->buffer = NULL;
1143 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1144 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001145 self->ordered_attributes = 0;
1146 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001147 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001148 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001149 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001150 self->intern = intern;
1151 Py_XINCREF(self->intern);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001152
Christian Heimesfa535f52013-07-07 17:35:11 +02001153 /* namespace_separator is either NULL or contains one char + \0 */
1154 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1155 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001156 if (self->itself == NULL) {
1157 PyErr_SetString(PyExc_RuntimeError,
1158 "XML_ParserCreate failed");
1159 Py_DECREF(self);
1160 return NULL;
1161 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001162#if XML_COMBINED_VERSION >= 20100
1163 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001164 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001165 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001166#endif
Fred Drake0582df92000-07-12 04:49:00 +00001167 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001168 XML_SetUnknownEncodingHandler(self->itself,
1169 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001170
Fred Drake2a3d7db2002-06-28 22:56:48 +00001171 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001172 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001173
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001174 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001175 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001176 Py_DECREF(self);
1177 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001178 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001179 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001180
Victor Stinner1b184552019-10-08 00:09:31 +02001181 PyObject_GC_Track(self);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001183}
1184
1185
1186static void
Fred Drake0582df92000-07-12 04:49:00 +00001187xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001188{
Fred Drake0582df92000-07-12 04:49:00 +00001189 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001190 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001191 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001192 XML_ParserFree(self->itself);
1193 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001194
Fred Drake85d835f2001-02-08 15:39:08 +00001195 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001196 for (i = 0; handler_info[i].name != NULL; i++)
1197 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001198 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001199 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001200 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001201 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001202 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001203 self->buffer = NULL;
1204 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001205 Py_XDECREF(self->intern);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001206 PyTypeObject *tp = Py_TYPE(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001207 PyObject_GC_Del(self);
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001208 Py_DECREF(tp);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001209}
1210
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001211
1212static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001213xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001214{
Victor Stinner28f468c2018-11-22 13:21:43 +01001215 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1216 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001217 PyObject *result = self->handlers[handlernum];
1218 if (result == NULL)
1219 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001220 Py_INCREF(result);
1221 return result;
1222}
1223
Fred Drake6f987622000-08-25 18:03:30 +00001224static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001225xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001226{
Victor Stinner28f468c2018-11-22 13:21:43 +01001227 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1228 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001229 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001230 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1231 return -1;
1232 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001233 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001234 /* If we're changing the character data handler, flush all
1235 * cached data with the old handler. Not sure there's a
1236 * "right" thing to do, though, but this probably won't
1237 * happen.
1238 */
1239 if (flush_character_buffer(self) < 0)
1240 return -1;
1241 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001242
1243 xmlhandler c_handler = NULL;
1244 if (v == Py_None) {
1245 /* If this is the character data handler, and a character
1246 data handler is already active, we need to be more
1247 careful. What we can safely do is replace the existing
1248 character data handler callback function with a no-op
1249 function that will refuse to call Python. The downside
1250 is that this doesn't completely remove the character
1251 data handler from the C layer if there's any callback
1252 active, so Expat does a little more work than it
1253 otherwise would, but that's really an odd case. A more
1254 elaborate system of handlers and state could remove the
1255 C handler more effectively. */
1256 if (handlernum == CharacterData && self->in_callback)
1257 c_handler = noop_character_data_handler;
1258 v = NULL;
1259 }
1260 else if (v != NULL) {
1261 Py_INCREF(v);
1262 c_handler = handler_info[handlernum].handler;
1263 }
1264 Py_XSETREF(self->handlers[handlernum], v);
1265 handler_info[handlernum].setter(self->itself, c_handler);
1266 return 0;
1267}
1268
1269#define INT_GETTER(name) \
1270 static PyObject * \
1271 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1272 { \
1273 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1274 }
1275INT_GETTER(ErrorCode)
1276INT_GETTER(ErrorLineNumber)
1277INT_GETTER(ErrorColumnNumber)
1278INT_GETTER(ErrorByteIndex)
1279INT_GETTER(CurrentLineNumber)
1280INT_GETTER(CurrentColumnNumber)
1281INT_GETTER(CurrentByteIndex)
1282
1283#undef INT_GETTER
1284
1285static PyObject *
1286xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1287{
1288 return PyBool_FromLong(self->buffer != NULL);
1289}
1290
1291static int
1292xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1293{
1294 if (v == NULL) {
1295 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1296 return -1;
1297 }
1298 int b = PyObject_IsTrue(v);
1299 if (b < 0)
1300 return -1;
1301 if (b) {
1302 if (self->buffer == NULL) {
1303 self->buffer = PyMem_Malloc(self->buffer_size);
1304 if (self->buffer == NULL) {
1305 PyErr_NoMemory();
1306 return -1;
1307 }
1308 self->buffer_used = 0;
1309 }
1310 }
1311 else if (self->buffer != NULL) {
1312 if (flush_character_buffer(self) < 0)
1313 return -1;
1314 PyMem_Free(self->buffer);
1315 self->buffer = NULL;
1316 }
1317 return 0;
1318}
1319
1320static PyObject *
1321xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1322{
1323 return PyLong_FromLong((long) self->buffer_size);
1324}
1325
1326static int
1327xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1328{
1329 if (v == NULL) {
1330 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1331 return -1;
1332 }
1333 long new_buffer_size;
1334 if (!PyLong_Check(v)) {
1335 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1336 return -1;
1337 }
1338
1339 new_buffer_size = PyLong_AsLong(v);
1340 if (new_buffer_size <= 0) {
1341 if (!PyErr_Occurred())
1342 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1343 return -1;
1344 }
1345
1346 /* trivial case -- no change */
1347 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001348 return 0;
1349 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001350
1351 /* check maximum */
1352 if (new_buffer_size > INT_MAX) {
1353 char errmsg[100];
1354 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1355 PyErr_SetString(PyExc_ValueError, errmsg);
1356 return -1;
1357 }
1358
1359 if (self->buffer != NULL) {
1360 /* there is already a buffer */
1361 if (self->buffer_used != 0) {
1362 if (flush_character_buffer(self) < 0) {
1363 return -1;
1364 }
1365 }
1366 /* free existing buffer */
1367 PyMem_Free(self->buffer);
1368 }
1369 self->buffer = PyMem_Malloc(new_buffer_size);
1370 if (self->buffer == NULL) {
1371 PyErr_NoMemory();
1372 return -1;
1373 }
1374 self->buffer_size = new_buffer_size;
1375 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001376}
1377
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001378static PyObject *
1379xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1380{
1381 return PyLong_FromLong((long) self->buffer_used);
1382}
1383
1384static PyObject *
1385xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1386{
1387 return PyBool_FromLong(self->ns_prefixes);
1388}
1389
1390static int
1391xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1392{
1393 if (v == NULL) {
1394 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1395 return -1;
1396 }
1397 int b = PyObject_IsTrue(v);
1398 if (b < 0)
1399 return -1;
1400 self->ns_prefixes = b;
1401 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1402 return 0;
1403}
1404
1405static PyObject *
1406xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1407{
1408 return PyBool_FromLong(self->ordered_attributes);
1409}
1410
1411static int
1412xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1413{
1414 if (v == NULL) {
1415 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1416 return -1;
1417 }
1418 int b = PyObject_IsTrue(v);
1419 if (b < 0)
1420 return -1;
1421 self->ordered_attributes = b;
1422 return 0;
1423}
1424
1425static PyObject *
1426xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1427{
1428 return PyBool_FromLong((long) self->specified_attributes);
1429}
1430
1431static int
1432xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1433{
1434 if (v == NULL) {
1435 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1436 return -1;
1437 }
1438 int b = PyObject_IsTrue(v);
1439 if (b < 0)
1440 return -1;
1441 self->specified_attributes = b;
1442 return 0;
1443}
1444
1445static PyMemberDef xmlparse_members[] = {
1446 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1447 {NULL}
1448};
1449
1450#define XMLPARSE_GETTER_DEF(name) \
1451 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1452#define XMLPARSE_GETTER_SETTER_DEF(name) \
1453 {#name, (getter)xmlparse_##name##_getter, \
1454 (setter)xmlparse_##name##_setter, NULL},
1455
1456static PyGetSetDef xmlparse_getsetlist[] = {
1457 XMLPARSE_GETTER_DEF(ErrorCode)
1458 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1459 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1460 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1461 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1462 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1463 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1464 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1465 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1466 XMLPARSE_GETTER_DEF(buffer_used)
1467 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1468 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1469 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1470 {NULL},
1471};
1472
1473#undef XMLPARSE_GETTER_DEF
1474#undef XMLPARSE_GETTER_SETTER_DEF
1475
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001476static int
1477xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1478{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001479 int i;
1480 for (i = 0; handler_info[i].name != NULL; i++)
1481 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001482 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001483}
1484
1485static int
1486xmlparse_clear(xmlparseobject *op)
1487{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001488 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001489 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001490 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001491}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001492
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001493PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001494
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001495static PyType_Slot _xml_parse_type_spec_slots[] = {
1496 {Py_tp_dealloc, xmlparse_dealloc},
1497 {Py_tp_doc, (void *)Xmlparsetype__doc__},
1498 {Py_tp_traverse, xmlparse_traverse},
1499 {Py_tp_clear, xmlparse_clear},
1500 {Py_tp_methods, xmlparse_methods},
1501 {Py_tp_members, xmlparse_members},
1502 {Py_tp_getset, xmlparse_getsetlist},
1503 {0, 0}
1504};
1505
1506static PyType_Spec _xml_parse_type_spec = {
1507 .name = "pyexpat.xmlparser",
1508 .basicsize = sizeof(xmlparseobject),
1509 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1510 .slots = _xml_parse_type_spec_slots,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001511};
1512
1513/* End of code for xmlparser objects */
1514/* -------------------------------------------------------- */
1515
Brett Cannond0aeda82014-08-22 14:23:20 -04001516/*[clinic input]
1517pyexpat.ParserCreate
1518
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001519 encoding: str(accept={str, NoneType}) = None
1520 namespace_separator: str(accept={str, NoneType}) = None
Brett Cannond0aeda82014-08-22 14:23:20 -04001521 intern: object = NULL
1522
1523Return a new XML parser object.
1524[clinic start generated code]*/
1525
Brett Cannond0aeda82014-08-22 14:23:20 -04001526static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001527pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001528 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001529/*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001530{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001531 PyObject *result;
1532 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001533
Fred Drakecde79132001-04-25 16:01:30 +00001534 if (namespace_separator != NULL
1535 && strlen(namespace_separator) > 1) {
1536 PyErr_SetString(PyExc_ValueError,
1537 "namespace_separator must be at most one"
1538 " character, omitted, or None");
1539 return NULL;
1540 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001541 /* Explicitly passing None means no interning is desired.
1542 Not passing anything means that a new dictionary is used. */
1543 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001545 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 intern = PyDict_New();
1547 if (!intern)
1548 return NULL;
1549 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001550 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001551 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1553 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001554 }
1555
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001556 pyexpat_state *state = pyexpat_get_state(module);
1557 result = newxmlparseobject(state, encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001558 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001560 }
1561 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001562}
1563
Brett Cannond0aeda82014-08-22 14:23:20 -04001564/*[clinic input]
1565pyexpat.ErrorString
1566
1567 code: long
1568 /
1569
1570Returns string error for given number.
1571[clinic start generated code]*/
1572
Brett Cannond0aeda82014-08-22 14:23:20 -04001573static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001574pyexpat_ErrorString_impl(PyObject *module, long code)
1575/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001576{
Fred Drake0582df92000-07-12 04:49:00 +00001577 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001578}
1579
1580/* List of methods defined in the module */
1581
1582static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001583 PYEXPAT_PARSERCREATE_METHODDEF
1584 PYEXPAT_ERRORSTRING_METHODDEF
1585 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001586};
1587
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001588/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001589
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001590PyDoc_STRVAR(pyexpat_module_documentation,
1591"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001592
Fred Drakecde79132001-04-25 16:01:30 +00001593/* Initialization function for the module */
1594
1595#ifndef MODULE_NAME
1596#define MODULE_NAME "pyexpat"
1597#endif
1598
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001599static int init_handler_descrs(pyexpat_state *state)
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001600{
1601 int i;
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001602 assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001603 for (i = 0; handler_info[i].name != NULL; i++) {
1604 struct HandlerInfo *hi = &handler_info[i];
1605 hi->getset.name = hi->name;
1606 hi->getset.get = (getter)xmlparse_handler_getter;
1607 hi->getset.set = (setter)xmlparse_handler_setter;
1608 hi->getset.closure = &handler_info[i];
1609
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001610 PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001611 if (descr == NULL)
1612 return -1;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001613
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001614 if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001615 Py_DECREF(descr);
1616 return -1;
1617 }
1618 Py_DECREF(descr);
1619 }
1620 return 0;
1621}
1622
Mohamed Koubaa71842182020-11-04 11:37:23 -06001623static PyObject *
1624add_submodule(PyObject *mod, const char *fullname)
Fred Drake0582df92000-07-12 04:49:00 +00001625{
Mohamed Koubaa71842182020-11-04 11:37:23 -06001626 const char *name = strrchr(fullname, '.') + 1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001627
Mohamed Koubaa71842182020-11-04 11:37:23 -06001628 PyObject *submodule = PyModule_New(fullname);
1629 if (submodule == NULL) {
Martin v. Löwis1a214512008-06-11 05:26:20 +00001630 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001631 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632
Mohamed Koubaa71842182020-11-04 11:37:23 -06001633 PyObject *mod_name = PyUnicode_FromString(fullname);
1634 if (mod_name == NULL) {
1635 Py_DECREF(submodule);
1636 return NULL;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001637 }
Fred Drake6f987622000-08-25 18:03:30 +00001638
Mohamed Koubaa71842182020-11-04 11:37:23 -06001639 if (_PyImport_SetModule(mod_name, submodule) < 0) {
1640 Py_DECREF(submodule);
1641 Py_DECREF(mod_name);
1642 return NULL;
1643 }
1644 Py_DECREF(mod_name);
1645
1646 /* gives away the reference to the submodule */
1647 if (PyModule_AddObject(mod, name, submodule) < 0) {
1648 Py_DECREF(submodule);
1649 return NULL;
1650 }
1651
1652 return submodule;
1653}
1654
1655static int
1656add_error(PyObject *errors_module, PyObject *codes_dict,
1657 PyObject *rev_codes_dict, const char *name, int value)
1658{
1659 const char *error_string = XML_ErrorString(value);
1660 if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1661 return -1;
1662 }
1663
1664 PyObject *num = PyLong_FromLong(value);
1665 if (num == NULL) {
1666 return -1;
1667 }
1668
1669 if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1670 Py_DECREF(num);
1671 return -1;
1672 }
1673
1674 PyObject *str = PyUnicode_FromString(error_string);
1675 if (str == NULL) {
1676 Py_DECREF(num);
1677 return -1;
1678 }
1679
1680 int res = PyDict_SetItem(rev_codes_dict, num, str);
1681 Py_DECREF(str);
1682 Py_DECREF(num);
1683 if (res < 0) {
1684 return -1;
1685 }
1686
1687 return 0;
1688}
1689
1690static int
1691add_errors_module(PyObject *mod)
1692{
1693 PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1694 if (errors_module == NULL) {
1695 return -1;
1696 }
1697
1698 PyObject *codes_dict = PyDict_New();
1699 PyObject *rev_codes_dict = PyDict_New();
Georg Brandlb4dac712010-10-15 14:46:48 +00001700 if (codes_dict == NULL || rev_codes_dict == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001701 goto error;
Georg Brandlb4dac712010-10-15 14:46:48 +00001702 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001703
Mohamed Koubaa71842182020-11-04 11:37:23 -06001704#define ADD_CONST(name) do { \
1705 if (add_error(errors_module, codes_dict, rev_codes_dict, \
1706 #name, name) < 0) { \
1707 goto error; \
1708 } \
1709 } while(0)
Fred Drake7bd9f412000-07-04 23:51:31 +00001710
Mohamed Koubaa71842182020-11-04 11:37:23 -06001711 ADD_CONST(XML_ERROR_NO_MEMORY);
1712 ADD_CONST(XML_ERROR_SYNTAX);
1713 ADD_CONST(XML_ERROR_NO_ELEMENTS);
1714 ADD_CONST(XML_ERROR_INVALID_TOKEN);
1715 ADD_CONST(XML_ERROR_UNCLOSED_TOKEN);
1716 ADD_CONST(XML_ERROR_PARTIAL_CHAR);
1717 ADD_CONST(XML_ERROR_TAG_MISMATCH);
1718 ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1719 ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1720 ADD_CONST(XML_ERROR_PARAM_ENTITY_REF);
1721 ADD_CONST(XML_ERROR_UNDEFINED_ENTITY);
1722 ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1723 ADD_CONST(XML_ERROR_ASYNC_ENTITY);
1724 ADD_CONST(XML_ERROR_BAD_CHAR_REF);
1725 ADD_CONST(XML_ERROR_BINARY_ENTITY_REF);
1726 ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1727 ADD_CONST(XML_ERROR_MISPLACED_XML_PI);
1728 ADD_CONST(XML_ERROR_UNKNOWN_ENCODING);
1729 ADD_CONST(XML_ERROR_INCORRECT_ENCODING);
1730 ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1731 ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1732 ADD_CONST(XML_ERROR_NOT_STANDALONE);
1733 ADD_CONST(XML_ERROR_UNEXPECTED_STATE);
1734 ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1735 ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1736 ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
Fred Drake283b6702004-08-04 22:28:16 +00001737 /* Added in Expat 1.95.7. */
Mohamed Koubaa71842182020-11-04 11:37:23 -06001738 ADD_CONST(XML_ERROR_UNBOUND_PREFIX);
Fred Drake283b6702004-08-04 22:28:16 +00001739 /* Added in Expat 1.95.8. */
Mohamed Koubaa71842182020-11-04 11:37:23 -06001740 ADD_CONST(XML_ERROR_UNDECLARING_PREFIX);
1741 ADD_CONST(XML_ERROR_INCOMPLETE_PE);
1742 ADD_CONST(XML_ERROR_XML_DECL);
1743 ADD_CONST(XML_ERROR_TEXT_DECL);
1744 ADD_CONST(XML_ERROR_PUBLICID);
1745 ADD_CONST(XML_ERROR_SUSPENDED);
1746 ADD_CONST(XML_ERROR_NOT_SUSPENDED);
1747 ADD_CONST(XML_ERROR_ABORTED);
1748 ADD_CONST(XML_ERROR_FINISHED);
1749 ADD_CONST(XML_ERROR_SUSPEND_PE);
1750#undef ADD_CONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001751
Georg Brandlb4dac712010-10-15 14:46:48 +00001752 if (PyModule_AddStringConstant(errors_module, "__doc__",
1753 "Constants used to describe "
Mohamed Koubaa71842182020-11-04 11:37:23 -06001754 "error conditions.") < 0) {
1755 goto error;
1756 }
Fred Drake85d835f2001-02-08 15:39:08 +00001757
Mohamed Koubaa71842182020-11-04 11:37:23 -06001758 Py_INCREF(codes_dict);
1759 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1760 Py_DECREF(codes_dict);
1761 goto error;
1762 }
1763 Py_CLEAR(codes_dict);
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001764
Mohamed Koubaa71842182020-11-04 11:37:23 -06001765 Py_INCREF(rev_codes_dict);
1766 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1767 Py_DECREF(rev_codes_dict);
1768 goto error;
1769 }
1770 Py_CLEAR(rev_codes_dict);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001771
Mohamed Koubaa71842182020-11-04 11:37:23 -06001772 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001773
Mohamed Koubaa71842182020-11-04 11:37:23 -06001774error:
1775 Py_XDECREF(codes_dict);
1776 Py_XDECREF(rev_codes_dict);
1777 return -1;
1778}
1779
1780static int
1781add_model_module(PyObject *mod)
1782{
1783 PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1784 if (model_module == NULL) {
1785 return -1;
1786 }
1787
1788#define MYCONST(c) do { \
1789 if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1790 return -1; \
1791 } \
1792 } while(0)
1793
1794 if (PyModule_AddStringConstant(
1795 model_module, "__doc__",
1796 "Constants used to interpret content model information.") < 0) {
1797 return -1;
1798 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001799
Fred Drake85d835f2001-02-08 15:39:08 +00001800 MYCONST(XML_CTYPE_EMPTY);
1801 MYCONST(XML_CTYPE_ANY);
1802 MYCONST(XML_CTYPE_MIXED);
1803 MYCONST(XML_CTYPE_NAME);
1804 MYCONST(XML_CTYPE_CHOICE);
1805 MYCONST(XML_CTYPE_SEQ);
1806
1807 MYCONST(XML_CQUANT_NONE);
1808 MYCONST(XML_CQUANT_OPT);
1809 MYCONST(XML_CQUANT_REP);
1810 MYCONST(XML_CQUANT_PLUS);
1811#undef MYCONST
Mohamed Koubaa71842182020-11-04 11:37:23 -06001812 return 0;
1813}
Fredrik Lundhc3345042005-12-13 19:49:55 +00001814
Mohamed Koubaa71842182020-11-04 11:37:23 -06001815#if XML_COMBINED_VERSION > 19505
1816static int
1817add_features(PyObject *mod)
1818{
1819 PyObject *list = PyList_New(0);
1820 if (list == NULL) {
1821 return -1;
1822 }
1823
1824 const XML_Feature *features = XML_GetFeatureList();
1825 for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1826 PyObject *item = Py_BuildValue("si", features[i].name,
1827 features[i].value);
1828 if (item == NULL) {
1829 goto error;
1830 }
1831 int ok = PyList_Append(list, item);
1832 Py_DECREF(item);
1833 if (ok < 0) {
1834 goto error;
1835 }
1836 }
1837 if (PyModule_AddObject(mod, "features", list) < 0) {
1838 goto error;
1839 }
1840 return 0;
1841
1842error:
1843 Py_DECREF(list);
1844 return -1;
1845}
1846#endif
1847
Hai Shi7c83eaa2021-01-03 23:47:44 +08001848static void
1849pyexpat_destructor(PyObject *op)
1850{
1851 void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
1852 PyMem_Free(p);
1853}
1854
Mohamed Koubaa71842182020-11-04 11:37:23 -06001855static int
1856pyexpat_exec(PyObject *mod)
1857{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001858 pyexpat_state *state = pyexpat_get_state(mod);
1859 state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1860 mod, &_xml_parse_type_spec, NULL);
1861
1862 if (state->xml_parse_type == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001863 return -1;
1864 }
1865
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001866 if (init_handler_descrs(state) < 0) {
1867 return -1;
1868 }
1869 state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1870 NULL, NULL);
1871 if (state->error == NULL) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001872 return -1;
1873 }
1874
1875 /* Add some symbolic constants to the module */
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001876
1877 if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001878 return -1;
1879 }
1880
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001881 if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001882 return -1;
1883 }
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001884
1885 if (PyModule_AddObjectRef(mod, "XMLParserType",
1886 (PyObject *) state->xml_parse_type) < 0) {
Mohamed Koubaa71842182020-11-04 11:37:23 -06001887 return -1;
1888 }
1889
1890 if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1891 XML_ExpatVersion()) < 0) {
1892 return -1;
1893 }
1894 {
1895 XML_Expat_Version info = XML_ExpatVersionInfo();
1896 PyObject *versionInfo = Py_BuildValue("(iii)",
1897 info.major,
1898 info.minor,
1899 info.micro);
1900 if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1901 Py_DECREF(versionInfo);
1902 return -1;
1903 }
1904 }
1905 /* XXX When Expat supports some way of figuring out how it was
1906 compiled, this should check and set native_encoding
1907 appropriately.
1908 */
1909 if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1910 return -1;
1911 }
1912
1913 if (add_errors_module(mod) < 0) {
1914 return -1;
1915 }
1916
1917 if (add_model_module(mod) < 0) {
1918 return -1;
1919 }
1920
1921#if XML_COMBINED_VERSION > 19505
1922 if (add_features(mod) < 0) {
1923 return -1;
1924 }
1925#endif
1926
1927#define MYCONST(c) do { \
1928 if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
1929 return -1; \
1930 } \
1931 } while(0)
1932
1933 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1934 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1935 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1936#undef MYCONST
1937
Hai Shi7c83eaa2021-01-03 23:47:44 +08001938 struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
1939 if (capi == NULL) {
1940 PyErr_NoMemory();
1941 return -1;
1942 }
Fredrik Lundhc3345042005-12-13 19:49:55 +00001943 /* initialize pyexpat dispatch table */
Hai Shi7c83eaa2021-01-03 23:47:44 +08001944 capi->size = sizeof(*capi);
1945 capi->magic = PyExpat_CAPI_MAGIC;
1946 capi->MAJOR_VERSION = XML_MAJOR_VERSION;
1947 capi->MINOR_VERSION = XML_MINOR_VERSION;
1948 capi->MICRO_VERSION = XML_MICRO_VERSION;
1949 capi->ErrorString = XML_ErrorString;
1950 capi->GetErrorCode = XML_GetErrorCode;
1951 capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
1952 capi->GetErrorLineNumber = XML_GetErrorLineNumber;
1953 capi->Parse = XML_Parse;
1954 capi->ParserCreate_MM = XML_ParserCreate_MM;
1955 capi->ParserFree = XML_ParserFree;
1956 capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
1957 capi->SetCommentHandler = XML_SetCommentHandler;
1958 capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1959 capi->SetElementHandler = XML_SetElementHandler;
1960 capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1961 capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1962 capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1963 capi->SetUserData = XML_SetUserData;
1964 capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1965 capi->SetEncoding = XML_SetEncoding;
1966 capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001967#if XML_COMBINED_VERSION >= 20100
Hai Shi7c83eaa2021-01-03 23:47:44 +08001968 capi->SetHashSalt = XML_SetHashSalt;
Christian Heimescb5778f2018-09-18 14:38:58 +02001969#else
Hai Shi7c83eaa2021-01-03 23:47:44 +08001970 capi->SetHashSalt = NULL;
Christian Heimescb5778f2018-09-18 14:38:58 +02001971#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001972
Benjamin Petersonb173f782009-05-05 22:31:58 +00001973 /* export using capsule */
Hai Shi7c83eaa2021-01-03 23:47:44 +08001974 PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
1975 pyexpat_destructor);
Mohamed Koubaa71842182020-11-04 11:37:23 -06001976 if (capi_object == NULL) {
Hai Shi7c83eaa2021-01-03 23:47:44 +08001977 PyMem_Free(capi);
Mohamed Koubaa71842182020-11-04 11:37:23 -06001978 return -1;
1979 }
1980
1981 if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
1982 Py_DECREF(capi_object);
1983 return -1;
1984 }
1985
1986 return 0;
1987}
1988
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06001989static int
1990pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
1991{
1992 pyexpat_state *state = pyexpat_get_state(module);
1993 Py_VISIT(state->xml_parse_type);
1994 Py_VISIT(state->error);
1995 return 0;
1996}
1997
1998static int
1999pyexpat_clear(PyObject *module)
2000{
2001 pyexpat_state *state = pyexpat_get_state(module);
2002 Py_CLEAR(state->xml_parse_type);
2003 Py_CLEAR(state->error);
2004 return 0;
2005}
2006
2007static void
2008pyexpat_free(void *module)
2009{
2010 pyexpat_clear((PyObject *)module);
2011}
2012
2013static PyModuleDef_Slot pyexpat_slots[] = {
2014 {Py_mod_exec, pyexpat_exec},
2015 {0, NULL}
2016};
2017
Mohamed Koubaa71842182020-11-04 11:37:23 -06002018static struct PyModuleDef pyexpatmodule = {
2019 PyModuleDef_HEAD_INIT,
2020 .m_name = MODULE_NAME,
2021 .m_doc = pyexpat_module_documentation,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002022 .m_size = sizeof(pyexpat_state),
Mohamed Koubaa71842182020-11-04 11:37:23 -06002023 .m_methods = pyexpat_methods,
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002024 .m_slots = pyexpat_slots,
2025 .m_traverse = pyexpat_traverse,
2026 .m_clear = pyexpat_clear,
2027 .m_free = pyexpat_free
Mohamed Koubaa71842182020-11-04 11:37:23 -06002028};
2029
2030PyMODINIT_FUNC
2031PyInit_pyexpat(void)
2032{
Mohamed Koubaac8a87ad2021-01-04 08:34:26 -06002033 return PyModuleDef_Init(&pyexpatmodule);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002034}
2035
Fred Drake6f987622000-08-25 18:03:30 +00002036static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002037clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002038{
Fred Drakecde79132001-04-25 16:01:30 +00002039 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002040
Fred Drake71b63ff2002-06-28 22:29:01 +00002041 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002042 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 self->handlers[i] = NULL;
2044 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02002045 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002047 }
Fred Drakecde79132001-04-25 16:01:30 +00002048 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002049}
2050
Tim Peters0c322792002-07-17 16:49:03 +00002051static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002052
2053#define HANDLER_INFO(name) \
2054 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2055
2056 HANDLER_INFO(StartElementHandler)
2057 HANDLER_INFO(EndElementHandler)
2058 HANDLER_INFO(ProcessingInstructionHandler)
2059 HANDLER_INFO(CharacterDataHandler)
2060 HANDLER_INFO(UnparsedEntityDeclHandler)
2061 HANDLER_INFO(NotationDeclHandler)
2062 HANDLER_INFO(StartNamespaceDeclHandler)
2063 HANDLER_INFO(EndNamespaceDeclHandler)
2064 HANDLER_INFO(CommentHandler)
2065 HANDLER_INFO(StartCdataSectionHandler)
2066 HANDLER_INFO(EndCdataSectionHandler)
2067 HANDLER_INFO(DefaultHandler)
2068 HANDLER_INFO(DefaultHandlerExpand)
2069 HANDLER_INFO(NotStandaloneHandler)
2070 HANDLER_INFO(ExternalEntityRefHandler)
2071 HANDLER_INFO(StartDoctypeDeclHandler)
2072 HANDLER_INFO(EndDoctypeDeclHandler)
2073 HANDLER_INFO(EntityDeclHandler)
2074 HANDLER_INFO(XmlDeclHandler)
2075 HANDLER_INFO(ElementDeclHandler)
2076 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00002077#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002078 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00002079#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002080
Serhiy Storchaka55f82492018-10-19 18:00:51 +03002081#undef HANDLER_INFO
2082
Fred Drake0582df92000-07-12 04:49:00 +00002083 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002084};