blob: 10d5aedf1cdb6e75370dbe8ca59eb2948792ecbb [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Serhiy Storchaka55f82492018-10-19 18:00:51 +03004#include "structmember.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
50static PyObject *ErrorObject;
51
52/* ----------------------------------------------------- */
53
54/* Declarations for objects of type xmlparser */
55
56typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000057 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058
Fred Drake0582df92000-07-12 04:49:00 +000059 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000060 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000062 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000063 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000064 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000068 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000069 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000070} xmlparseobject;
71
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030072#include "clinic/pyexpat.c.h"
73
Fred Drake2a3d7db2002-06-28 22:56:48 +000074#define CHARACTER_DATA_BUFFER_SIZE 8192
75
Jeremy Hylton938ace62002-07-17 16:30:39 +000076static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000077
Fred Drake117ac852002-09-24 16:24:54 +000078typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079typedef void* xmlhandler;
80
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000081struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000082 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030085 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000086};
87
Jeremy Hylton938ace62002-07-17 16:30:39 +000088static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089
Fred Drakebd6101c2001-02-14 18:29:45 +000090/* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +020094set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +000095{
Christian Heimes217cfd12007-12-02 14:31:20 +000096 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000097
Neal Norwitz2f5e9902006-03-08 06:36:45 +000098 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000100 return 0;
101 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000102 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000103 return 1;
104}
105
106/* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
Fred Drake85d835f2001-02-08 15:39:08 +0000109static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000110set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000111{
112 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100113 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000114 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000117
Victor Stinner499dfcf2011-03-21 13:26:24 +0100118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
Victor Stinner7bfb42d2016-12-05 17:04:32 +0100122 err = PyObject_CallFunctionObjArgs(ErrorObject, buffer, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100123 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000129 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000130 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000131 return NULL;
132}
133
Fred Drake71b63ff2002-06-28 22:29:01 +0000134static int
135have_handler(xmlparseobject *self, int type)
136{
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139}
140
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000141/* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
Fred Drake0582df92000-07-12 04:49:00 +0000144static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000145conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000146{
Fred Drake71b63ff2002-06-28 22:29:01 +0000147 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200151 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000152 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000154}
155
Fred Drake0582df92000-07-12 04:49:00 +0000156static PyObject *
157conv_string_len_to_unicode(const XML_Char *str, int len)
158{
Fred Drake71b63ff2002-06-28 22:29:01 +0000159 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200163 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000164 }
Fred Drake6f987622000-08-25 18:03:30 +0000165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000166}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000167
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000168/* Callback routines */
169
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000170static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000171
Martin v. Löwis069dde22003-01-21 10:58:18 +0000172/* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175static int
176error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181{
182 return 0;
183}
184
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185/* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193{
194 /* Do nothing. */
195}
196
Fred Drake6f987622000-08-25 18:03:30 +0000197static void
198flag_error(xmlparseobject *self)
199{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000200 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000203}
204
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000205static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200206call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000207 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200209 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000210
Fred Drakebd6101c2001-02-14 18:29:45 +0000211 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000212 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200213 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000214 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000215 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000217}
218
Fred Drakeb91a36b2002-06-27 19:40:48 +0000219static PyObject*
220string_intern(xmlparseobject *self, const char* str)
221{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000222 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000223 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000227 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000229 value = PyDict_GetItem(self->intern, result);
230 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000232 return result;
Zackery Spytz68def052018-10-19 00:57:38 -0600233 else {
234 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000235 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600236 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000237 }
238 Py_INCREF(value);
239 Py_DECREF(result);
240 return value;
241}
242
Fred Drake2a3d7db2002-06-28 22:56:48 +0000243/* Return 0 on success, -1 on exception.
244 * flag_error() will be called before return if needed.
245 */
246static int
247call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
248{
249 PyObject *args;
250 PyObject *temp;
251
Georg Brandlc01537f2010-10-15 16:26:08 +0000252 if (!have_handler(self, CharacterData))
253 return -1;
254
Fred Drake2a3d7db2002-06-28 22:56:48 +0000255 args = PyTuple_New(1);
256 if (args == NULL)
257 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000258 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000259 if (temp == NULL) {
260 Py_DECREF(args);
261 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262 XML_SetCharacterDataHandler(self->itself,
263 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000264 return -1;
265 }
266 PyTuple_SET_ITEM(args, 0, temp);
267 /* temp is now a borrowed reference; consider it unused. */
268 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200269 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000270 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000271 /* temp is an owned reference again, or NULL */
272 self->in_callback = 0;
273 Py_DECREF(args);
274 if (temp == NULL) {
275 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000276 XML_SetCharacterDataHandler(self->itself,
277 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000278 return -1;
279 }
280 Py_DECREF(temp);
281 return 0;
282}
283
284static int
285flush_character_buffer(xmlparseobject *self)
286{
287 int rc;
288 if (self->buffer == NULL || self->buffer_used == 0)
289 return 0;
290 rc = call_character_handler(self, self->buffer, self->buffer_used);
291 self->buffer_used = 0;
292 return rc;
293}
294
295static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000297{
298 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200299
300 if (PyErr_Occurred())
301 return;
302
Fred Drake2a3d7db2002-06-28 22:56:48 +0000303 if (self->buffer == NULL)
304 call_character_handler(self, data, len);
305 else {
306 if ((self->buffer_used + len) > self->buffer_size) {
307 if (flush_character_buffer(self) < 0)
308 return;
309 /* handler might have changed; drop the rest on the floor
310 * if there isn't a handler anymore
311 */
312 if (!have_handler(self, CharacterData))
313 return;
314 }
315 if (len > self->buffer_size) {
316 call_character_handler(self, data, len);
317 self->buffer_used = 0;
318 }
319 else {
320 memcpy(self->buffer + self->buffer_used,
321 data, len * sizeof(XML_Char));
322 self->buffer_used += len;
323 }
324 }
325}
326
Fred Drake85d835f2001-02-08 15:39:08 +0000327static void
328my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000329 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000330{
331 xmlparseobject *self = (xmlparseobject *)userData;
332
Fred Drake71b63ff2002-06-28 22:29:01 +0000333 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000334 PyObject *container, *rv, *args;
335 int i, max;
336
Victor Stinner9e09c262013-07-18 23:17:01 +0200337 if (PyErr_Occurred())
338 return;
339
Fred Drake2a3d7db2002-06-28 22:56:48 +0000340 if (flush_character_buffer(self) < 0)
341 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000342 /* Set max to the number of slots filled in atts[]; max/2 is
343 * the number of attributes we need to process.
344 */
345 if (self->specified_attributes) {
346 max = XML_GetSpecifiedAttributeCount(self->itself);
347 }
348 else {
349 max = 0;
350 while (atts[max] != NULL)
351 max += 2;
352 }
353 /* Build the container. */
354 if (self->ordered_attributes)
355 container = PyList_New(max);
356 else
357 container = PyDict_New();
358 if (container == NULL) {
359 flag_error(self);
360 return;
361 }
362 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000363 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000364 PyObject *v;
365 if (n == NULL) {
366 flag_error(self);
367 Py_DECREF(container);
368 return;
369 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000370 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000371 if (v == NULL) {
372 flag_error(self);
373 Py_DECREF(container);
374 Py_DECREF(n);
375 return;
376 }
377 if (self->ordered_attributes) {
378 PyList_SET_ITEM(container, i, n);
379 PyList_SET_ITEM(container, i+1, v);
380 }
381 else if (PyDict_SetItem(container, n, v)) {
382 flag_error(self);
383 Py_DECREF(n);
384 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600385 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000386 return;
387 }
388 else {
389 Py_DECREF(n);
390 Py_DECREF(v);
391 }
392 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000393 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000394 if (args == NULL) {
395 Py_DECREF(container);
396 return;
397 }
Zackery Spytz68def052018-10-19 00:57:38 -0600398 args = Py_BuildValue("(NN)", args, container);
399 if (args == NULL) {
400 return;
401 }
Fred Drake85d835f2001-02-08 15:39:08 +0000402 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000403 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200404 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000405 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000406 self->in_callback = 0;
407 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000408 if (rv == NULL) {
409 flag_error(self);
410 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000411 }
Fred Drake85d835f2001-02-08 15:39:08 +0000412 Py_DECREF(rv);
413 }
414}
415
416#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
417 RETURN, GETUSERDATA) \
418static RC \
419my_##NAME##Handler PARAMS {\
420 xmlparseobject *self = GETUSERDATA ; \
421 PyObject *args = NULL; \
422 PyObject *rv = NULL; \
423 INIT \
424\
Fred Drake71b63ff2002-06-28 22:29:01 +0000425 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200426 if (PyErr_Occurred()) \
427 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000428 if (flush_character_buffer(self) < 0) \
429 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000430 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000431 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000432 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200433 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000434 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000435 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000436 Py_DECREF(args); \
437 if (rv == NULL) { \
438 flag_error(self); \
439 return RETURN; \
440 } \
441 CONVERSION \
442 Py_DECREF(rv); \
443 } \
444 return RETURN; \
445}
446
Fred Drake6f987622000-08-25 18:03:30 +0000447#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
449 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000450
Fred Drake6f987622000-08-25 18:03:30 +0000451#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
453 rc = PyLong_AsLong(rv);, rc, \
454 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000455
Fred Drake71b63ff2002-06-28 22:29:01 +0000456VOID_HANDLER(EndElement,
457 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000458 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000459
Fred Drake6f987622000-08-25 18:03:30 +0000460VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000461 (void *userData,
462 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000463 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000464 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000465
Fred Drake6f987622000-08-25 18:03:30 +0000466VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000467 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000468 const XML_Char *entityName,
469 const XML_Char *base,
470 const XML_Char *systemId,
471 const XML_Char *publicId,
472 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000473 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000474 string_intern(self, entityName), string_intern(self, base),
475 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000476 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000477
Fred Drake85d835f2001-02-08 15:39:08 +0000478VOID_HANDLER(EntityDecl,
479 (void *userData,
480 const XML_Char *entityName,
481 int is_parameter_entity,
482 const XML_Char *value,
483 int value_length,
484 const XML_Char *base,
485 const XML_Char *systemId,
486 const XML_Char *publicId,
487 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000488 ("NiNNNNN",
489 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000490 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000491 string_intern(self, base), string_intern(self, systemId),
492 string_intern(self, publicId),
493 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000494
495VOID_HANDLER(XmlDecl,
496 (void *userData,
497 const XML_Char *version,
498 const XML_Char *encoding,
499 int standalone),
500 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000501 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000502 standalone))
503
504static PyObject *
505conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000506 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000507{
508 PyObject *result = NULL;
509 PyObject *children = PyTuple_New(model->numchildren);
510 int i;
511
512 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000513 assert(model->numchildren < INT_MAX);
514 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000515 PyObject *child = conv_content_model(&model->children[i],
516 conv_string);
517 if (child == NULL) {
518 Py_XDECREF(children);
519 return NULL;
520 }
521 PyTuple_SET_ITEM(children, i, child);
522 }
523 result = Py_BuildValue("(iiO&N)",
524 model->type, model->quant,
525 conv_string,model->name, children);
526 }
527 return result;
528}
529
Fred Drake06dd8cf2003-02-02 03:54:17 +0000530static void
531my_ElementDeclHandler(void *userData,
532 const XML_Char *name,
533 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000534{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000535 xmlparseobject *self = (xmlparseobject *)userData;
536 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000537
Fred Drake06dd8cf2003-02-02 03:54:17 +0000538 if (have_handler(self, ElementDecl)) {
539 PyObject *rv = NULL;
540 PyObject *modelobj, *nameobj;
541
Victor Stinner9e09c262013-07-18 23:17:01 +0200542 if (PyErr_Occurred())
543 return;
544
Fred Drake06dd8cf2003-02-02 03:54:17 +0000545 if (flush_character_buffer(self) < 0)
546 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000547 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000548 if (modelobj == NULL) {
549 flag_error(self);
550 goto finally;
551 }
552 nameobj = string_intern(self, name);
553 if (nameobj == NULL) {
554 Py_DECREF(modelobj);
555 flag_error(self);
556 goto finally;
557 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000558 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000559 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000560 flag_error(self);
561 goto finally;
562 }
563 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200564 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000565 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000566 self->in_callback = 0;
567 if (rv == NULL) {
568 flag_error(self);
569 goto finally;
570 }
571 Py_DECREF(rv);
572 }
573 finally:
574 Py_XDECREF(args);
575 XML_FreeContentModel(self->itself, model);
576 return;
577}
Fred Drake85d835f2001-02-08 15:39:08 +0000578
579VOID_HANDLER(AttlistDecl,
580 (void *userData,
581 const XML_Char *elname,
582 const XML_Char *attname,
583 const XML_Char *att_type,
584 const XML_Char *dflt,
585 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000586 ("(NNO&O&i)",
587 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000588 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000589 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000590
Martin v. Löwisc847f402003-01-21 11:09:21 +0000591#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000592VOID_HANDLER(SkippedEntity,
593 (void *userData,
594 const XML_Char *entityName,
595 int is_parameter_entity),
596 ("Ni",
597 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000598#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000599
Fred Drake71b63ff2002-06-28 22:29:01 +0000600VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 (void *userData,
602 const XML_Char *notationName,
603 const XML_Char *base,
604 const XML_Char *systemId,
605 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000606 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 string_intern(self, notationName), string_intern(self, base),
608 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000609
Fred Drake6f987622000-08-25 18:03:30 +0000610VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 (void *userData,
612 const XML_Char *prefix,
613 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000614 ("(NN)",
615 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000616
Fred Drake6f987622000-08-25 18:03:30 +0000617VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 (void *userData,
619 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000620 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000621
Fred Drake6f987622000-08-25 18:03:30 +0000622VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000623 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000624 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000625
Fred Drake6f987622000-08-25 18:03:30 +0000626VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000627 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000629
Fred Drake6f987622000-08-25 18:03:30 +0000630VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000631 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000633
Fred Drake6f987622000-08-25 18:03:30 +0000634VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 (void *userData, const XML_Char *s, int len),
636 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000637
Fred Drake6f987622000-08-25 18:03:30 +0000638VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 (void *userData, const XML_Char *s, int len),
640 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300641#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000642
Fred Drake71b63ff2002-06-28 22:29:01 +0000643INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 (void *userData),
645 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000646
Fred Drake6f987622000-08-25 18:03:30 +0000647RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 (XML_Parser parser,
649 const XML_Char *context,
650 const XML_Char *base,
651 const XML_Char *systemId,
652 const XML_Char *publicId),
653 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000654 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 conv_string_to_unicode ,context, string_intern(self, base),
656 string_intern(self, systemId), string_intern(self, publicId)),
657 rc = PyLong_AsLong(rv);, rc,
658 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000659
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000660/* XXX UnknownEncodingHandler */
661
Fred Drake85d835f2001-02-08 15:39:08 +0000662VOID_HANDLER(StartDoctypeDecl,
663 (void *userData, const XML_Char *doctypeName,
664 const XML_Char *sysid, const XML_Char *pubid,
665 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000666 ("(NNNi)", string_intern(self, doctypeName),
667 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000668 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000669
670VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000671
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000672/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400673/*[clinic input]
674class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
675[clinic start generated code]*/
676/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
677
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000678
Fred Drake71b63ff2002-06-28 22:29:01 +0000679static PyObject *
680get_parse_result(xmlparseobject *self, int rv)
681{
682 if (PyErr_Occurred()) {
683 return NULL;
684 }
685 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000686 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000687 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000688 if (flush_character_buffer(self) < 0) {
689 return NULL;
690 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000691 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000692}
693
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200694#define MAX_CHUNK_SIZE (1 << 20)
695
Brett Cannond0aeda82014-08-22 14:23:20 -0400696/*[clinic input]
697pyexpat.xmlparser.Parse
698
699 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200700 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400701 /
702
703Parse XML data.
704
705`isfinal' should be true at end of input.
706[clinic start generated code]*/
707
Brett Cannond0aeda82014-08-22 14:23:20 -0400708static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400709pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300710 int isfinal)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200711/*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400712{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200713 const char *s;
714 Py_ssize_t slen;
715 Py_buffer view;
716 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000717
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200718 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200719 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200720 s = PyUnicode_AsUTF8AndSize(data, &slen);
721 if (s == NULL)
722 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200723 /* Explicitly set UTF-8 encoding. Return code ignored. */
724 (void)XML_SetEncoding(self->itself, "utf-8");
725 }
726 else {
727 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
728 return NULL;
729 s = view.buf;
730 slen = view.len;
731 }
732
733 while (slen > MAX_CHUNK_SIZE) {
734 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
735 if (!rc)
736 goto done;
737 s += MAX_CHUNK_SIZE;
738 slen -= MAX_CHUNK_SIZE;
739 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200740 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
741 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300742 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200743
744done:
745 if (view.buf != NULL)
746 PyBuffer_Release(&view);
747 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000748}
749
Fred Drakeca1f4262000-09-21 20:10:23 +0000750/* File reading copied from cPickle */
751
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000752#define BUF_SIZE 2048
753
Fred Drake0582df92000-07-12 04:49:00 +0000754static int
755readinst(char *buf, int buf_size, PyObject *meth)
756{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000757 PyObject *str;
758 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200759 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000760
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000761 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000762 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000763 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764
Christian Heimes72b710a2008-05-26 13:28:38 +0000765 if (PyBytes_Check(str))
766 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000767 else if (PyByteArray_Check(str))
768 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000770 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000771 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000772 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000773 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000774 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000775 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000776 if (len > buf_size) {
777 PyErr_Format(PyExc_ValueError,
778 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000779 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000780 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000781 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000782 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000784 Py_DECREF(str);
785 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000786 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000787
788error:
789 Py_XDECREF(str);
790 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000791}
792
Brett Cannond0aeda82014-08-22 14:23:20 -0400793/*[clinic input]
794pyexpat.xmlparser.ParseFile
795
796 file: object
797 /
798
799Parse XML data from file-like object.
800[clinic start generated code]*/
801
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000802static PyObject *
Brett Cannond0aeda82014-08-22 14:23:20 -0400803pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300804/*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805{
Fred Drake0582df92000-07-12 04:49:00 +0000806 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000807 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200808 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000809
Brett Cannond0aeda82014-08-22 14:23:20 -0400810 readmethod = _PyObject_GetAttrId(file, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000811 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000812 PyErr_SetString(PyExc_TypeError,
813 "argument must have 'read' attribute");
814 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000815 }
816 for (;;) {
817 int bytes_read;
818 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000819 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000820 Py_XDECREF(readmethod);
Ned Deilye7d532f2014-03-27 16:39:58 -0700821 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000822 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000823
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000824 bytes_read = readinst(buf, BUF_SIZE, readmethod);
825 if (bytes_read < 0) {
826 Py_DECREF(readmethod);
827 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000828 }
829 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000830 if (PyErr_Occurred()) {
831 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000832 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000833 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000834
Fred Drake0582df92000-07-12 04:49:00 +0000835 if (!rv || bytes_read == 0)
836 break;
837 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000838 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000839 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000840}
841
Brett Cannond0aeda82014-08-22 14:23:20 -0400842/*[clinic input]
843pyexpat.xmlparser.SetBase
844
845 base: str
846 /
847
848Set the base URL for the parser.
849[clinic start generated code]*/
850
Brett Cannond0aeda82014-08-22 14:23:20 -0400851static PyObject *
852pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300853/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400854{
Fred Drake0582df92000-07-12 04:49:00 +0000855 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000857 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400858 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000859}
860
Brett Cannond0aeda82014-08-22 14:23:20 -0400861/*[clinic input]
862pyexpat.xmlparser.GetBase
863
864Return base URL string for the parser.
865[clinic start generated code]*/
866
Brett Cannond0aeda82014-08-22 14:23:20 -0400867static PyObject *
868pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300869/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000870{
Fred Drake0582df92000-07-12 04:49:00 +0000871 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000872}
873
Brett Cannond0aeda82014-08-22 14:23:20 -0400874/*[clinic input]
875pyexpat.xmlparser.GetInputContext
876
877Return the untranslated text of the input that caused the current event.
878
879If the event was generated by a large amount of text (such as a start tag
880for an element with many attributes), not all of the text may be available.
881[clinic start generated code]*/
882
Brett Cannond0aeda82014-08-22 14:23:20 -0400883static PyObject *
884pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300885/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000886{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000887 if (self->in_callback) {
888 int offset, size;
889 const char *buffer
890 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000891
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000892 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000893 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000894 size - offset);
895 else
896 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000897 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000898 else
899 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000900}
Fred Drakebd6101c2001-02-14 18:29:45 +0000901
Brett Cannond0aeda82014-08-22 14:23:20 -0400902/*[clinic input]
903pyexpat.xmlparser.ExternalEntityParserCreate
904
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700905 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400906 encoding: str = NULL
907 /
908
909Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
910[clinic start generated code]*/
911
Brett Cannond0aeda82014-08-22 14:23:20 -0400912static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400913pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
914 const char *context,
915 const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700916/*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400917{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000918 xmlparseobject *new_parser;
919 int i;
920
Martin v. Löwis894258c2001-09-23 10:20:10 +0000921 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000922 if (new_parser == NULL)
923 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000924 new_parser->buffer_size = self->buffer_size;
925 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000926 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000927 new_parser->ordered_attributes = self->ordered_attributes;
928 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000929 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000930 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000931 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000933 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000934 new_parser->intern = self->intern;
935 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000936 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000937
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000938 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200939 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000940 if (new_parser->buffer == NULL) {
941 Py_DECREF(new_parser);
942 return PyErr_NoMemory();
943 }
944 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000945 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000946 Py_DECREF(new_parser);
947 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000948 }
949
950 XML_SetUserData(new_parser->itself, (void *)new_parser);
951
952 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000953 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000954 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000955
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200956 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000957 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000958 Py_DECREF(new_parser);
959 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000960 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000961 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000962
963 /* then copy handlers from self */
964 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000965 PyObject *handler = self->handlers[i];
966 if (handler != NULL) {
967 Py_INCREF(handler);
968 new_parser->handlers[i] = handler;
969 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000970 handler_info[i].handler);
971 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000972 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000973 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000974}
975
Brett Cannond0aeda82014-08-22 14:23:20 -0400976/*[clinic input]
977pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000978
Brett Cannond0aeda82014-08-22 14:23:20 -0400979 flag: int
980 /
981
982Controls parsing of parameter entities (including the external DTD subset).
983
984Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
985XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
986XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
987was successful.
988[clinic start generated code]*/
989
Brett Cannond0aeda82014-08-22 14:23:20 -0400990static PyObject *
991pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300992/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400993{
994 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +0000995 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000996}
997
Martin v. Löwisc847f402003-01-21 11:09:21 +0000998
999#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001000/*[clinic input]
1001pyexpat.xmlparser.UseForeignDTD
1002
1003 flag: bool = True
1004 /
1005
1006Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1007
1008This readily allows the use of a 'default' document type controlled by the
1009application, while still getting the advantage of providing document type
1010information to the parser. 'flag' defaults to True if not provided.
1011[clinic start generated code]*/
1012
Brett Cannond0aeda82014-08-22 14:23:20 -04001013static PyObject *
1014pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001015/*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001016{
Martin v. Löwis069dde22003-01-21 10:58:18 +00001017 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001018
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001019 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001020 if (rc != XML_ERROR_NONE) {
1021 return set_error(self, rc);
1022 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001023 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001024}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001025#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001026
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001027static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001028 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1029 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1030 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1031 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1032 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1033 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1034 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001035#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001036 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001037#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001038 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001039};
1040
1041/* ---------- */
1042
1043
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001044
Fred Drake71b63ff2002-06-28 22:29:01 +00001045/* pyexpat international encoding support.
1046 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001047*/
1048
Fred Drake71b63ff2002-06-28 22:29:01 +00001049static int
1050PyUnknownEncodingHandler(void *encodingHandlerData,
1051 const XML_Char *name,
1052 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001053{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001054 static unsigned char template_buffer[256] = {0};
1055 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001056 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001057 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001058 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001059
Victor Stinner9e09c262013-07-18 23:17:01 +02001060 if (PyErr_Occurred())
1061 return XML_STATUS_ERROR;
1062
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001063 if (template_buffer[1] == 0) {
1064 for (i = 0; i < 256; i++)
1065 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001066 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001067
1068 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001069 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001070 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001071 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001072 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001073
1074 if (PyUnicode_GET_LENGTH(u) != 256) {
1075 Py_DECREF(u);
1076 PyErr_SetString(PyExc_ValueError,
1077 "multi-byte encodings are not supported");
1078 return XML_STATUS_ERROR;
1079 }
1080
1081 kind = PyUnicode_KIND(u);
1082 data = PyUnicode_DATA(u);
1083 for (i = 0; i < 256; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1086 info->map[i] = ch;
1087 else
1088 info->map[i] = -1;
1089 }
1090
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001091 info->data = NULL;
1092 info->convert = NULL;
1093 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001094 Py_DECREF(u);
1095
1096 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001097}
1098
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001099
1100static PyObject *
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001101newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001102{
1103 int i;
1104 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001105
Martin v. Löwis894258c2001-09-23 10:20:10 +00001106 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001107 if (self == NULL)
1108 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001109
Fred Drake2a3d7db2002-06-28 22:56:48 +00001110 self->buffer = NULL;
1111 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1112 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001113 self->ordered_attributes = 0;
1114 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001115 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001116 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001117 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001118 self->intern = intern;
1119 Py_XINCREF(self->intern);
1120 PyObject_GC_Track(self);
1121
Christian Heimesfa535f52013-07-07 17:35:11 +02001122 /* namespace_separator is either NULL or contains one char + \0 */
1123 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1124 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001125 if (self->itself == NULL) {
1126 PyErr_SetString(PyExc_RuntimeError,
1127 "XML_ParserCreate failed");
1128 Py_DECREF(self);
1129 return NULL;
1130 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001131#if XML_COMBINED_VERSION >= 20100
1132 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001133 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001134 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001135#endif
Fred Drake0582df92000-07-12 04:49:00 +00001136 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001137 XML_SetUnknownEncodingHandler(self->itself,
1138 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001139
Fred Drake2a3d7db2002-06-28 22:56:48 +00001140 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001141 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001142
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001143 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001144 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001145 Py_DECREF(self);
1146 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001147 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001148 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001149
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001150 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001151}
1152
1153
1154static void
Fred Drake0582df92000-07-12 04:49:00 +00001155xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001156{
Fred Drake0582df92000-07-12 04:49:00 +00001157 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001158 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001159 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001160 XML_ParserFree(self->itself);
1161 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001162
Fred Drake85d835f2001-02-08 15:39:08 +00001163 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001164 for (i = 0; handler_info[i].name != NULL; i++)
1165 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001166 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001167 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001168 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001169 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001170 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001171 self->buffer = NULL;
1172 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001173 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001174 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001175}
1176
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001177
1178static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001179xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001180{
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001181 int handlernum = hi - handler_info;
1182 PyObject *result = self->handlers[handlernum];
1183 if (result == NULL)
1184 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001185 Py_INCREF(result);
1186 return result;
1187}
1188
Fred Drake6f987622000-08-25 18:03:30 +00001189static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001190xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001191{
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001192 int handlernum = hi - handler_info;
Fred Drake85d835f2001-02-08 15:39:08 +00001193 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001194 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1195 return -1;
1196 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001197 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001198 /* If we're changing the character data handler, flush all
1199 * cached data with the old handler. Not sure there's a
1200 * "right" thing to do, though, but this probably won't
1201 * happen.
1202 */
1203 if (flush_character_buffer(self) < 0)
1204 return -1;
1205 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001206
1207 xmlhandler c_handler = NULL;
1208 if (v == Py_None) {
1209 /* If this is the character data handler, and a character
1210 data handler is already active, we need to be more
1211 careful. What we can safely do is replace the existing
1212 character data handler callback function with a no-op
1213 function that will refuse to call Python. The downside
1214 is that this doesn't completely remove the character
1215 data handler from the C layer if there's any callback
1216 active, so Expat does a little more work than it
1217 otherwise would, but that's really an odd case. A more
1218 elaborate system of handlers and state could remove the
1219 C handler more effectively. */
1220 if (handlernum == CharacterData && self->in_callback)
1221 c_handler = noop_character_data_handler;
1222 v = NULL;
1223 }
1224 else if (v != NULL) {
1225 Py_INCREF(v);
1226 c_handler = handler_info[handlernum].handler;
1227 }
1228 Py_XSETREF(self->handlers[handlernum], v);
1229 handler_info[handlernum].setter(self->itself, c_handler);
1230 return 0;
1231}
1232
1233#define INT_GETTER(name) \
1234 static PyObject * \
1235 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1236 { \
1237 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1238 }
1239INT_GETTER(ErrorCode)
1240INT_GETTER(ErrorLineNumber)
1241INT_GETTER(ErrorColumnNumber)
1242INT_GETTER(ErrorByteIndex)
1243INT_GETTER(CurrentLineNumber)
1244INT_GETTER(CurrentColumnNumber)
1245INT_GETTER(CurrentByteIndex)
1246
1247#undef INT_GETTER
1248
1249static PyObject *
1250xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1251{
1252 return PyBool_FromLong(self->buffer != NULL);
1253}
1254
1255static int
1256xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1257{
1258 if (v == NULL) {
1259 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1260 return -1;
1261 }
1262 int b = PyObject_IsTrue(v);
1263 if (b < 0)
1264 return -1;
1265 if (b) {
1266 if (self->buffer == NULL) {
1267 self->buffer = PyMem_Malloc(self->buffer_size);
1268 if (self->buffer == NULL) {
1269 PyErr_NoMemory();
1270 return -1;
1271 }
1272 self->buffer_used = 0;
1273 }
1274 }
1275 else if (self->buffer != NULL) {
1276 if (flush_character_buffer(self) < 0)
1277 return -1;
1278 PyMem_Free(self->buffer);
1279 self->buffer = NULL;
1280 }
1281 return 0;
1282}
1283
1284static PyObject *
1285xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1286{
1287 return PyLong_FromLong((long) self->buffer_size);
1288}
1289
1290static int
1291xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1292{
1293 if (v == NULL) {
1294 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1295 return -1;
1296 }
1297 long new_buffer_size;
1298 if (!PyLong_Check(v)) {
1299 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1300 return -1;
1301 }
1302
1303 new_buffer_size = PyLong_AsLong(v);
1304 if (new_buffer_size <= 0) {
1305 if (!PyErr_Occurred())
1306 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1307 return -1;
1308 }
1309
1310 /* trivial case -- no change */
1311 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001312 return 0;
1313 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001314
1315 /* check maximum */
1316 if (new_buffer_size > INT_MAX) {
1317 char errmsg[100];
1318 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1319 PyErr_SetString(PyExc_ValueError, errmsg);
1320 return -1;
1321 }
1322
1323 if (self->buffer != NULL) {
1324 /* there is already a buffer */
1325 if (self->buffer_used != 0) {
1326 if (flush_character_buffer(self) < 0) {
1327 return -1;
1328 }
1329 }
1330 /* free existing buffer */
1331 PyMem_Free(self->buffer);
1332 }
1333 self->buffer = PyMem_Malloc(new_buffer_size);
1334 if (self->buffer == NULL) {
1335 PyErr_NoMemory();
1336 return -1;
1337 }
1338 self->buffer_size = new_buffer_size;
1339 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001340}
1341
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001342static PyObject *
1343xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1344{
1345 return PyLong_FromLong((long) self->buffer_used);
1346}
1347
1348static PyObject *
1349xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1350{
1351 return PyBool_FromLong(self->ns_prefixes);
1352}
1353
1354static int
1355xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1356{
1357 if (v == NULL) {
1358 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1359 return -1;
1360 }
1361 int b = PyObject_IsTrue(v);
1362 if (b < 0)
1363 return -1;
1364 self->ns_prefixes = b;
1365 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1366 return 0;
1367}
1368
1369static PyObject *
1370xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1371{
1372 return PyBool_FromLong(self->ordered_attributes);
1373}
1374
1375static int
1376xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1377{
1378 if (v == NULL) {
1379 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1380 return -1;
1381 }
1382 int b = PyObject_IsTrue(v);
1383 if (b < 0)
1384 return -1;
1385 self->ordered_attributes = b;
1386 return 0;
1387}
1388
1389static PyObject *
1390xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1391{
1392 return PyBool_FromLong((long) self->specified_attributes);
1393}
1394
1395static int
1396xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1397{
1398 if (v == NULL) {
1399 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1400 return -1;
1401 }
1402 int b = PyObject_IsTrue(v);
1403 if (b < 0)
1404 return -1;
1405 self->specified_attributes = b;
1406 return 0;
1407}
1408
1409static PyMemberDef xmlparse_members[] = {
1410 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1411 {NULL}
1412};
1413
1414#define XMLPARSE_GETTER_DEF(name) \
1415 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1416#define XMLPARSE_GETTER_SETTER_DEF(name) \
1417 {#name, (getter)xmlparse_##name##_getter, \
1418 (setter)xmlparse_##name##_setter, NULL},
1419
1420static PyGetSetDef xmlparse_getsetlist[] = {
1421 XMLPARSE_GETTER_DEF(ErrorCode)
1422 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1423 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1424 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1425 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1426 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1427 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1428 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1429 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1430 XMLPARSE_GETTER_DEF(buffer_used)
1431 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1432 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1433 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1434 {NULL},
1435};
1436
1437#undef XMLPARSE_GETTER_DEF
1438#undef XMLPARSE_GETTER_SETTER_DEF
1439
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001440static int
1441xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1442{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001443 int i;
1444 for (i = 0; handler_info[i].name != NULL; i++)
1445 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001446 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001447}
1448
1449static int
1450xmlparse_clear(xmlparseobject *op)
1451{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001452 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001453 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001454 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001455}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001456
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001457PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001458
1459static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 PyVarObject_HEAD_INIT(NULL, 0)
1461 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001462 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 0, /*tp_itemsize*/
1464 /* methods */
1465 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1466 (printfunc)0, /*tp_print*/
1467 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001468 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 0, /*tp_reserved*/
1470 (reprfunc)0, /*tp_repr*/
1471 0, /*tp_as_number*/
1472 0, /*tp_as_sequence*/
1473 0, /*tp_as_mapping*/
1474 (hashfunc)0, /*tp_hash*/
1475 (ternaryfunc)0, /*tp_call*/
1476 (reprfunc)0, /*tp_str*/
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001477 (getattrofunc)0, /* tp_getattro */
1478 (setattrofunc)0, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1482 (traverseproc)xmlparse_traverse, /* tp_traverse */
1483 (inquiry)xmlparse_clear, /* tp_clear */
1484 0, /* tp_richcompare */
1485 0, /* tp_weaklistoffset */
1486 0, /* tp_iter */
1487 0, /* tp_iternext */
1488 xmlparse_methods, /* tp_methods */
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001489 xmlparse_members, /* tp_members */
1490 xmlparse_getsetlist, /* tp_getset */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001491};
1492
1493/* End of code for xmlparser objects */
1494/* -------------------------------------------------------- */
1495
Brett Cannond0aeda82014-08-22 14:23:20 -04001496/*[clinic input]
1497pyexpat.ParserCreate
1498
Larry Hastingsdbfdc382015-05-04 06:59:46 -07001499 encoding: str(accept={str, NoneType}) = NULL
1500 namespace_separator: str(accept={str, NoneType}) = NULL
Brett Cannond0aeda82014-08-22 14:23:20 -04001501 intern: object = NULL
1502
1503Return a new XML parser object.
1504[clinic start generated code]*/
1505
Brett Cannond0aeda82014-08-22 14:23:20 -04001506static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001507pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001508 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001509/*[clinic end generated code: output=295c0cf01ab1146c input=23d29704acad385d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001510{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001511 PyObject *result;
1512 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001513
Fred Drakecde79132001-04-25 16:01:30 +00001514 if (namespace_separator != NULL
1515 && strlen(namespace_separator) > 1) {
1516 PyErr_SetString(PyExc_ValueError,
1517 "namespace_separator must be at most one"
1518 " character, omitted, or None");
1519 return NULL;
1520 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001521 /* Explicitly passing None means no interning is desired.
1522 Not passing anything means that a new dictionary is used. */
1523 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001525 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 intern = PyDict_New();
1527 if (!intern)
1528 return NULL;
1529 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001530 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001531 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1533 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001534 }
1535
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001536 result = newxmlparseobject(encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001537 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001539 }
1540 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001541}
1542
Brett Cannond0aeda82014-08-22 14:23:20 -04001543/*[clinic input]
1544pyexpat.ErrorString
1545
1546 code: long
1547 /
1548
1549Returns string error for given number.
1550[clinic start generated code]*/
1551
Brett Cannond0aeda82014-08-22 14:23:20 -04001552static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001553pyexpat_ErrorString_impl(PyObject *module, long code)
1554/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001555{
Fred Drake0582df92000-07-12 04:49:00 +00001556 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001557}
1558
1559/* List of methods defined in the module */
1560
1561static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001562 PYEXPAT_PARSERCREATE_METHODDEF
1563 PYEXPAT_ERRORSTRING_METHODDEF
1564 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001565};
1566
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001567/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001568
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001569PyDoc_STRVAR(pyexpat_module_documentation,
1570"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001571
Fred Drakecde79132001-04-25 16:01:30 +00001572/* Initialization function for the module */
1573
1574#ifndef MODULE_NAME
1575#define MODULE_NAME "pyexpat"
1576#endif
1577
1578#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001579#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001580#endif
1581
Martin v. Löwis1a214512008-06-11 05:26:20 +00001582static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 PyModuleDef_HEAD_INIT,
1584 MODULE_NAME,
1585 pyexpat_module_documentation,
1586 -1,
1587 pyexpat_methods,
1588 NULL,
1589 NULL,
1590 NULL,
1591 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001592};
1593
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001594static int init_handler_descrs(void)
1595{
1596 int i;
1597 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1598 for (i = 0; handler_info[i].name != NULL; i++) {
1599 struct HandlerInfo *hi = &handler_info[i];
1600 hi->getset.name = hi->name;
1601 hi->getset.get = (getter)xmlparse_handler_getter;
1602 hi->getset.set = (setter)xmlparse_handler_setter;
1603 hi->getset.closure = &handler_info[i];
1604
1605 PyObject *descr;
1606 if (PyDict_GetItemString(Xmlparsetype.tp_dict, hi->name))
1607 continue;
1608 descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
1609
1610 if (descr == NULL)
1611 return -1;
1612 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1613 Py_DECREF(descr);
1614 return -1;
1615 }
1616 Py_DECREF(descr);
1617 }
1618 return 0;
1619}
1620
Martin v. Löwis069dde22003-01-21 10:58:18 +00001621PyMODINIT_FUNC
1622MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001623{
1624 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001625 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001626 PyObject *errors_module;
1627 PyObject *modelmod_name;
1628 PyObject *model_module;
Georg Brandlb4dac712010-10-15 14:46:48 +00001629 PyObject *tmpnum, *tmpstr;
1630 PyObject *codes_dict;
1631 PyObject *rev_codes_dict;
1632 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001633 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001634 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001635
Fred Drake6f987622000-08-25 18:03:30 +00001636 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001637 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001638 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001639 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001640 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001641
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001642 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001644
Fred Drake0582df92000-07-12 04:49:00 +00001645 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001646 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001647 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001649
Fred Drake0582df92000-07-12 04:49:00 +00001650 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001651 if (ErrorObject == NULL) {
1652 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001653 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001654 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001655 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001656 }
1657 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001658 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001659 Py_INCREF(ErrorObject);
1660 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001661 Py_INCREF(&Xmlparsetype);
1662 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001663
Fred Drake738293d2000-12-21 17:25:07 +00001664 PyModule_AddStringConstant(m, "EXPAT_VERSION",
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001665 XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001666 {
1667 XML_Expat_Version info = XML_ExpatVersionInfo();
1668 PyModule_AddObject(m, "version_info",
1669 Py_BuildValue("(iii)", info.major,
1670 info.minor, info.micro));
1671 }
Fred Drake0582df92000-07-12 04:49:00 +00001672 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001673 compiled, this should check and set native_encoding
1674 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001675 */
Fred Drake93adb692000-09-23 04:55:48 +00001676 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001677
Fred Drake93adb692000-09-23 04:55:48 +00001678 d = PyModule_GetDict(m);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001679 if (d == NULL) {
1680 Py_DECREF(m);
1681 return NULL;
1682 }
Fred Drake6f987622000-08-25 18:03:30 +00001683 errors_module = PyDict_GetItem(d, errmod_name);
1684 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001685 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001686 if (errors_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001687 _PyImport_SetModule(errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001688 /* gives away the reference to errors_module */
1689 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001690 }
1691 }
Fred Drake6f987622000-08-25 18:03:30 +00001692 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001693 model_module = PyDict_GetItem(d, modelmod_name);
1694 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001695 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001696 if (model_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001697 _PyImport_SetModule(modelmod_name, model_module);
Fred Drake85d835f2001-02-08 15:39:08 +00001698 /* gives away the reference to model_module */
1699 PyModule_AddObject(m, "model", model_module);
1700 }
1701 }
1702 Py_DECREF(modelmod_name);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001703 if (errors_module == NULL || model_module == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +00001704 /* Don't core dump later! */
Christian Heimes7a5457b2016-09-09 00:13:35 +02001705 Py_DECREF(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001706 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001707 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708
Martin v. Löwisc847f402003-01-21 11:09:21 +00001709#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001710 {
1711 const XML_Feature *features = XML_GetFeatureList();
1712 PyObject *list = PyList_New(0);
1713 if (list == NULL)
1714 /* just ignore it */
1715 PyErr_Clear();
1716 else {
1717 int i = 0;
1718 for (; features[i].feature != XML_FEATURE_END; ++i) {
1719 int ok;
1720 PyObject *item = Py_BuildValue("si", features[i].name,
1721 features[i].value);
1722 if (item == NULL) {
1723 Py_DECREF(list);
1724 list = NULL;
1725 break;
1726 }
1727 ok = PyList_Append(list, item);
1728 Py_DECREF(item);
1729 if (ok < 0) {
1730 PyErr_Clear();
1731 break;
1732 }
1733 }
1734 if (list != NULL)
1735 PyModule_AddObject(m, "features", list);
1736 }
1737 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001738#endif
Fred Drake6f987622000-08-25 18:03:30 +00001739
Georg Brandlb4dac712010-10-15 14:46:48 +00001740 codes_dict = PyDict_New();
1741 rev_codes_dict = PyDict_New();
1742 if (codes_dict == NULL || rev_codes_dict == NULL) {
1743 Py_XDECREF(codes_dict);
1744 Py_XDECREF(rev_codes_dict);
1745 return NULL;
1746 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001747
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001748#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001749 if (PyModule_AddStringConstant(errors_module, #name, \
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001750 XML_ErrorString(name)) < 0) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001751 return NULL; \
1752 tmpnum = PyLong_FromLong(name); \
1753 if (tmpnum == NULL) return NULL; \
1754 res = PyDict_SetItemString(codes_dict, \
1755 XML_ErrorString(name), tmpnum); \
1756 if (res < 0) return NULL; \
1757 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1758 if (tmpstr == NULL) return NULL; \
1759 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1760 Py_DECREF(tmpstr); \
1761 Py_DECREF(tmpnum); \
1762 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001763
Fred Drake0582df92000-07-12 04:49:00 +00001764 MYCONST(XML_ERROR_NO_MEMORY);
1765 MYCONST(XML_ERROR_SYNTAX);
1766 MYCONST(XML_ERROR_NO_ELEMENTS);
1767 MYCONST(XML_ERROR_INVALID_TOKEN);
1768 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1769 MYCONST(XML_ERROR_PARTIAL_CHAR);
1770 MYCONST(XML_ERROR_TAG_MISMATCH);
1771 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1772 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1773 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1774 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1775 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1776 MYCONST(XML_ERROR_ASYNC_ENTITY);
1777 MYCONST(XML_ERROR_BAD_CHAR_REF);
1778 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1779 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1780 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1781 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1782 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001783 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1784 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1785 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001786 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1787 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1788 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1789 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1790 /* Added in Expat 1.95.7. */
1791 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1792 /* Added in Expat 1.95.8. */
1793 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1794 MYCONST(XML_ERROR_INCOMPLETE_PE);
1795 MYCONST(XML_ERROR_XML_DECL);
1796 MYCONST(XML_ERROR_TEXT_DECL);
1797 MYCONST(XML_ERROR_PUBLICID);
1798 MYCONST(XML_ERROR_SUSPENDED);
1799 MYCONST(XML_ERROR_NOT_SUSPENDED);
1800 MYCONST(XML_ERROR_ABORTED);
1801 MYCONST(XML_ERROR_FINISHED);
1802 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001803
Georg Brandlb4dac712010-10-15 14:46:48 +00001804 if (PyModule_AddStringConstant(errors_module, "__doc__",
1805 "Constants used to describe "
1806 "error conditions.") < 0)
1807 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001808
Georg Brandlb4dac712010-10-15 14:46:48 +00001809 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1810 return NULL;
1811 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1812 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001813
Fred Drake93adb692000-09-23 04:55:48 +00001814#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001815
Fred Drake85d835f2001-02-08 15:39:08 +00001816#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001817 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1818 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1819 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001820#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001821
Fred Drake85d835f2001-02-08 15:39:08 +00001822#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1823 PyModule_AddStringConstant(model_module, "__doc__",
1824 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001825
Fred Drake85d835f2001-02-08 15:39:08 +00001826 MYCONST(XML_CTYPE_EMPTY);
1827 MYCONST(XML_CTYPE_ANY);
1828 MYCONST(XML_CTYPE_MIXED);
1829 MYCONST(XML_CTYPE_NAME);
1830 MYCONST(XML_CTYPE_CHOICE);
1831 MYCONST(XML_CTYPE_SEQ);
1832
1833 MYCONST(XML_CQUANT_NONE);
1834 MYCONST(XML_CQUANT_OPT);
1835 MYCONST(XML_CQUANT_REP);
1836 MYCONST(XML_CQUANT_PLUS);
1837#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001838
1839 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001840 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001841 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001842 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1843 capi.MINOR_VERSION = XML_MINOR_VERSION;
1844 capi.MICRO_VERSION = XML_MICRO_VERSION;
1845 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001846 capi.GetErrorCode = XML_GetErrorCode;
1847 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1848 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001849 capi.Parse = XML_Parse;
1850 capi.ParserCreate_MM = XML_ParserCreate_MM;
1851 capi.ParserFree = XML_ParserFree;
1852 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1853 capi.SetCommentHandler = XML_SetCommentHandler;
1854 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1855 capi.SetElementHandler = XML_SetElementHandler;
1856 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1857 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1858 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1859 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001860 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001861 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001862 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001863#if XML_COMBINED_VERSION >= 20100
1864 capi.SetHashSalt = XML_SetHashSalt;
1865#else
1866 capi.SetHashSalt = NULL;
1867#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868
Benjamin Petersonb173f782009-05-05 22:31:58 +00001869 /* export using capsule */
1870 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001871 if (capi_object)
1872 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001873 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001874}
1875
Fred Drake6f987622000-08-25 18:03:30 +00001876static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001877clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001878{
Fred Drakecde79132001-04-25 16:01:30 +00001879 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001880
Fred Drake71b63ff2002-06-28 22:29:01 +00001881 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001882 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 self->handlers[i] = NULL;
1884 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001885 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001887 }
Fred Drakecde79132001-04-25 16:01:30 +00001888 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001889}
1890
Tim Peters0c322792002-07-17 16:49:03 +00001891static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001892
1893#define HANDLER_INFO(name) \
1894 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1895
1896 HANDLER_INFO(StartElementHandler)
1897 HANDLER_INFO(EndElementHandler)
1898 HANDLER_INFO(ProcessingInstructionHandler)
1899 HANDLER_INFO(CharacterDataHandler)
1900 HANDLER_INFO(UnparsedEntityDeclHandler)
1901 HANDLER_INFO(NotationDeclHandler)
1902 HANDLER_INFO(StartNamespaceDeclHandler)
1903 HANDLER_INFO(EndNamespaceDeclHandler)
1904 HANDLER_INFO(CommentHandler)
1905 HANDLER_INFO(StartCdataSectionHandler)
1906 HANDLER_INFO(EndCdataSectionHandler)
1907 HANDLER_INFO(DefaultHandler)
1908 HANDLER_INFO(DefaultHandlerExpand)
1909 HANDLER_INFO(NotStandaloneHandler)
1910 HANDLER_INFO(ExternalEntityRefHandler)
1911 HANDLER_INFO(StartDoctypeDeclHandler)
1912 HANDLER_INFO(EndDoctypeDeclHandler)
1913 HANDLER_INFO(EntityDeclHandler)
1914 HANDLER_INFO(XmlDeclHandler)
1915 HANDLER_INFO(ElementDeclHandler)
1916 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001917#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001918 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001919#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001920
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001921#undef HANDLER_INFO
1922
Fred Drake0582df92000-07-12 04:49:00 +00001923 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001924};