blob: 9384081f9ffcb916feb7df0dc42e5308d988fe71 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Serhiy Storchaka55f82492018-10-19 18:00:51 +03004#include "structmember.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
50static PyObject *ErrorObject;
51
52/* ----------------------------------------------------- */
53
54/* Declarations for objects of type xmlparser */
55
56typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000057 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058
Fred Drake0582df92000-07-12 04:49:00 +000059 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000060 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000062 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000063 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000064 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000068 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000069 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000070} xmlparseobject;
71
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030072#include "clinic/pyexpat.c.h"
73
Fred Drake2a3d7db2002-06-28 22:56:48 +000074#define CHARACTER_DATA_BUFFER_SIZE 8192
75
Jeremy Hylton938ace62002-07-17 16:30:39 +000076static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000077
Fred Drake117ac852002-09-24 16:24:54 +000078typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079typedef void* xmlhandler;
80
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000081struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000082 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030085 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000086};
87
Jeremy Hylton938ace62002-07-17 16:30:39 +000088static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089
Fred Drakebd6101c2001-02-14 18:29:45 +000090/* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +020094set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +000095{
Christian Heimes217cfd12007-12-02 14:31:20 +000096 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000097
Neal Norwitz2f5e9902006-03-08 06:36:45 +000098 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000100 return 0;
101 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000102 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000103 return 1;
104}
105
106/* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
Fred Drake85d835f2001-02-08 15:39:08 +0000109static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000110set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000111{
112 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100113 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000114 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000117
Victor Stinner499dfcf2011-03-21 13:26:24 +0100118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
Victor Stinner7bfb42d2016-12-05 17:04:32 +0100122 err = PyObject_CallFunctionObjArgs(ErrorObject, buffer, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100123 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000129 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000130 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000131 return NULL;
132}
133
Fred Drake71b63ff2002-06-28 22:29:01 +0000134static int
135have_handler(xmlparseobject *self, int type)
136{
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139}
140
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000141/* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
Fred Drake0582df92000-07-12 04:49:00 +0000144static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000145conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000146{
Fred Drake71b63ff2002-06-28 22:29:01 +0000147 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200151 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000152 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000154}
155
Fred Drake0582df92000-07-12 04:49:00 +0000156static PyObject *
157conv_string_len_to_unicode(const XML_Char *str, int len)
158{
Fred Drake71b63ff2002-06-28 22:29:01 +0000159 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200163 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000164 }
Fred Drake6f987622000-08-25 18:03:30 +0000165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000166}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000167
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000168/* Callback routines */
169
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000170static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000171
Martin v. Löwis069dde22003-01-21 10:58:18 +0000172/* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175static int
176error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181{
182 return 0;
183}
184
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185/* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193{
194 /* Do nothing. */
195}
196
Fred Drake6f987622000-08-25 18:03:30 +0000197static void
198flag_error(xmlparseobject *self)
199{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000200 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000203}
204
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000205static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200206call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000207 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200209 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000210
Fred Drakebd6101c2001-02-14 18:29:45 +0000211 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000212 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200213 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000214 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000215 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000217}
218
Fred Drakeb91a36b2002-06-27 19:40:48 +0000219static PyObject*
220string_intern(xmlparseobject *self, const char* str)
221{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000222 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000223 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000227 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000229 value = PyDict_GetItem(self->intern, result);
230 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000232 return result;
Zackery Spytz68def052018-10-19 00:57:38 -0600233 else {
234 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000235 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600236 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000237 }
238 Py_INCREF(value);
239 Py_DECREF(result);
240 return value;
241}
242
Fred Drake2a3d7db2002-06-28 22:56:48 +0000243/* Return 0 on success, -1 on exception.
244 * flag_error() will be called before return if needed.
245 */
246static int
247call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
248{
249 PyObject *args;
250 PyObject *temp;
251
Georg Brandlc01537f2010-10-15 16:26:08 +0000252 if (!have_handler(self, CharacterData))
253 return -1;
254
Fred Drake2a3d7db2002-06-28 22:56:48 +0000255 args = PyTuple_New(1);
256 if (args == NULL)
257 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000258 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000259 if (temp == NULL) {
260 Py_DECREF(args);
261 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262 XML_SetCharacterDataHandler(self->itself,
263 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000264 return -1;
265 }
266 PyTuple_SET_ITEM(args, 0, temp);
267 /* temp is now a borrowed reference; consider it unused. */
268 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200269 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000270 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000271 /* temp is an owned reference again, or NULL */
272 self->in_callback = 0;
273 Py_DECREF(args);
274 if (temp == NULL) {
275 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000276 XML_SetCharacterDataHandler(self->itself,
277 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000278 return -1;
279 }
280 Py_DECREF(temp);
281 return 0;
282}
283
284static int
285flush_character_buffer(xmlparseobject *self)
286{
287 int rc;
288 if (self->buffer == NULL || self->buffer_used == 0)
289 return 0;
290 rc = call_character_handler(self, self->buffer, self->buffer_used);
291 self->buffer_used = 0;
292 return rc;
293}
294
295static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000297{
298 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200299
300 if (PyErr_Occurred())
301 return;
302
Fred Drake2a3d7db2002-06-28 22:56:48 +0000303 if (self->buffer == NULL)
304 call_character_handler(self, data, len);
305 else {
306 if ((self->buffer_used + len) > self->buffer_size) {
307 if (flush_character_buffer(self) < 0)
308 return;
309 /* handler might have changed; drop the rest on the floor
310 * if there isn't a handler anymore
311 */
312 if (!have_handler(self, CharacterData))
313 return;
314 }
315 if (len > self->buffer_size) {
316 call_character_handler(self, data, len);
317 self->buffer_used = 0;
318 }
319 else {
320 memcpy(self->buffer + self->buffer_used,
321 data, len * sizeof(XML_Char));
322 self->buffer_used += len;
323 }
324 }
325}
326
Fred Drake85d835f2001-02-08 15:39:08 +0000327static void
328my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000329 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000330{
331 xmlparseobject *self = (xmlparseobject *)userData;
332
Fred Drake71b63ff2002-06-28 22:29:01 +0000333 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000334 PyObject *container, *rv, *args;
335 int i, max;
336
Victor Stinner9e09c262013-07-18 23:17:01 +0200337 if (PyErr_Occurred())
338 return;
339
Fred Drake2a3d7db2002-06-28 22:56:48 +0000340 if (flush_character_buffer(self) < 0)
341 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000342 /* Set max to the number of slots filled in atts[]; max/2 is
343 * the number of attributes we need to process.
344 */
345 if (self->specified_attributes) {
346 max = XML_GetSpecifiedAttributeCount(self->itself);
347 }
348 else {
349 max = 0;
350 while (atts[max] != NULL)
351 max += 2;
352 }
353 /* Build the container. */
354 if (self->ordered_attributes)
355 container = PyList_New(max);
356 else
357 container = PyDict_New();
358 if (container == NULL) {
359 flag_error(self);
360 return;
361 }
362 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000363 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000364 PyObject *v;
365 if (n == NULL) {
366 flag_error(self);
367 Py_DECREF(container);
368 return;
369 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000370 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000371 if (v == NULL) {
372 flag_error(self);
373 Py_DECREF(container);
374 Py_DECREF(n);
375 return;
376 }
377 if (self->ordered_attributes) {
378 PyList_SET_ITEM(container, i, n);
379 PyList_SET_ITEM(container, i+1, v);
380 }
381 else if (PyDict_SetItem(container, n, v)) {
382 flag_error(self);
383 Py_DECREF(n);
384 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600385 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000386 return;
387 }
388 else {
389 Py_DECREF(n);
390 Py_DECREF(v);
391 }
392 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000393 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000394 if (args == NULL) {
395 Py_DECREF(container);
396 return;
397 }
Zackery Spytz68def052018-10-19 00:57:38 -0600398 args = Py_BuildValue("(NN)", args, container);
399 if (args == NULL) {
400 return;
401 }
Fred Drake85d835f2001-02-08 15:39:08 +0000402 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000403 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200404 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000405 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000406 self->in_callback = 0;
407 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000408 if (rv == NULL) {
409 flag_error(self);
410 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000411 }
Fred Drake85d835f2001-02-08 15:39:08 +0000412 Py_DECREF(rv);
413 }
414}
415
416#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
417 RETURN, GETUSERDATA) \
418static RC \
419my_##NAME##Handler PARAMS {\
420 xmlparseobject *self = GETUSERDATA ; \
421 PyObject *args = NULL; \
422 PyObject *rv = NULL; \
423 INIT \
424\
Fred Drake71b63ff2002-06-28 22:29:01 +0000425 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200426 if (PyErr_Occurred()) \
427 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000428 if (flush_character_buffer(self) < 0) \
429 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000430 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000431 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000432 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200433 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000434 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000435 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000436 Py_DECREF(args); \
437 if (rv == NULL) { \
438 flag_error(self); \
439 return RETURN; \
440 } \
441 CONVERSION \
442 Py_DECREF(rv); \
443 } \
444 return RETURN; \
445}
446
Fred Drake6f987622000-08-25 18:03:30 +0000447#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
449 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000450
Fred Drake6f987622000-08-25 18:03:30 +0000451#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
453 rc = PyLong_AsLong(rv);, rc, \
454 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000455
Fred Drake71b63ff2002-06-28 22:29:01 +0000456VOID_HANDLER(EndElement,
457 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000458 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000459
Fred Drake6f987622000-08-25 18:03:30 +0000460VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000461 (void *userData,
462 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000463 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000464 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000465
Fred Drake6f987622000-08-25 18:03:30 +0000466VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000467 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000468 const XML_Char *entityName,
469 const XML_Char *base,
470 const XML_Char *systemId,
471 const XML_Char *publicId,
472 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000473 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000474 string_intern(self, entityName), string_intern(self, base),
475 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000476 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000477
Fred Drake85d835f2001-02-08 15:39:08 +0000478VOID_HANDLER(EntityDecl,
479 (void *userData,
480 const XML_Char *entityName,
481 int is_parameter_entity,
482 const XML_Char *value,
483 int value_length,
484 const XML_Char *base,
485 const XML_Char *systemId,
486 const XML_Char *publicId,
487 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000488 ("NiNNNNN",
489 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000490 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000491 string_intern(self, base), string_intern(self, systemId),
492 string_intern(self, publicId),
493 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000494
495VOID_HANDLER(XmlDecl,
496 (void *userData,
497 const XML_Char *version,
498 const XML_Char *encoding,
499 int standalone),
500 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000501 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000502 standalone))
503
504static PyObject *
505conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000506 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000507{
508 PyObject *result = NULL;
509 PyObject *children = PyTuple_New(model->numchildren);
510 int i;
511
512 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000513 assert(model->numchildren < INT_MAX);
514 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000515 PyObject *child = conv_content_model(&model->children[i],
516 conv_string);
517 if (child == NULL) {
518 Py_XDECREF(children);
519 return NULL;
520 }
521 PyTuple_SET_ITEM(children, i, child);
522 }
523 result = Py_BuildValue("(iiO&N)",
524 model->type, model->quant,
525 conv_string,model->name, children);
526 }
527 return result;
528}
529
Fred Drake06dd8cf2003-02-02 03:54:17 +0000530static void
531my_ElementDeclHandler(void *userData,
532 const XML_Char *name,
533 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000534{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000535 xmlparseobject *self = (xmlparseobject *)userData;
536 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000537
Fred Drake06dd8cf2003-02-02 03:54:17 +0000538 if (have_handler(self, ElementDecl)) {
539 PyObject *rv = NULL;
540 PyObject *modelobj, *nameobj;
541
Victor Stinner9e09c262013-07-18 23:17:01 +0200542 if (PyErr_Occurred())
543 return;
544
Fred Drake06dd8cf2003-02-02 03:54:17 +0000545 if (flush_character_buffer(self) < 0)
546 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000547 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000548 if (modelobj == NULL) {
549 flag_error(self);
550 goto finally;
551 }
552 nameobj = string_intern(self, name);
553 if (nameobj == NULL) {
554 Py_DECREF(modelobj);
555 flag_error(self);
556 goto finally;
557 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000558 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000559 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000560 flag_error(self);
561 goto finally;
562 }
563 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200564 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000565 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000566 self->in_callback = 0;
567 if (rv == NULL) {
568 flag_error(self);
569 goto finally;
570 }
571 Py_DECREF(rv);
572 }
573 finally:
574 Py_XDECREF(args);
575 XML_FreeContentModel(self->itself, model);
576 return;
577}
Fred Drake85d835f2001-02-08 15:39:08 +0000578
579VOID_HANDLER(AttlistDecl,
580 (void *userData,
581 const XML_Char *elname,
582 const XML_Char *attname,
583 const XML_Char *att_type,
584 const XML_Char *dflt,
585 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000586 ("(NNO&O&i)",
587 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000588 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000589 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000590
Martin v. Löwisc847f402003-01-21 11:09:21 +0000591#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000592VOID_HANDLER(SkippedEntity,
593 (void *userData,
594 const XML_Char *entityName,
595 int is_parameter_entity),
596 ("Ni",
597 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000598#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000599
Fred Drake71b63ff2002-06-28 22:29:01 +0000600VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 (void *userData,
602 const XML_Char *notationName,
603 const XML_Char *base,
604 const XML_Char *systemId,
605 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000606 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 string_intern(self, notationName), string_intern(self, base),
608 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000609
Fred Drake6f987622000-08-25 18:03:30 +0000610VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 (void *userData,
612 const XML_Char *prefix,
613 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000614 ("(NN)",
615 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000616
Fred Drake6f987622000-08-25 18:03:30 +0000617VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 (void *userData,
619 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000620 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000621
Fred Drake6f987622000-08-25 18:03:30 +0000622VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000623 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000624 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000625
Fred Drake6f987622000-08-25 18:03:30 +0000626VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000627 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000629
Fred Drake6f987622000-08-25 18:03:30 +0000630VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000631 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000633
Fred Drake6f987622000-08-25 18:03:30 +0000634VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 (void *userData, const XML_Char *s, int len),
636 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000637
Fred Drake6f987622000-08-25 18:03:30 +0000638VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 (void *userData, const XML_Char *s, int len),
640 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300641#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000642
Fred Drake71b63ff2002-06-28 22:29:01 +0000643INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 (void *userData),
645 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000646
Fred Drake6f987622000-08-25 18:03:30 +0000647RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 (XML_Parser parser,
649 const XML_Char *context,
650 const XML_Char *base,
651 const XML_Char *systemId,
652 const XML_Char *publicId),
653 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000654 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 conv_string_to_unicode ,context, string_intern(self, base),
656 string_intern(self, systemId), string_intern(self, publicId)),
657 rc = PyLong_AsLong(rv);, rc,
658 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000659
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000660/* XXX UnknownEncodingHandler */
661
Fred Drake85d835f2001-02-08 15:39:08 +0000662VOID_HANDLER(StartDoctypeDecl,
663 (void *userData, const XML_Char *doctypeName,
664 const XML_Char *sysid, const XML_Char *pubid,
665 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000666 ("(NNNi)", string_intern(self, doctypeName),
667 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000668 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000669
670VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000671
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000672/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400673/*[clinic input]
674class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
675[clinic start generated code]*/
676/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
677
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000678
Fred Drake71b63ff2002-06-28 22:29:01 +0000679static PyObject *
680get_parse_result(xmlparseobject *self, int rv)
681{
682 if (PyErr_Occurred()) {
683 return NULL;
684 }
685 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000686 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000687 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000688 if (flush_character_buffer(self) < 0) {
689 return NULL;
690 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000691 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000692}
693
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200694#define MAX_CHUNK_SIZE (1 << 20)
695
Brett Cannond0aeda82014-08-22 14:23:20 -0400696/*[clinic input]
697pyexpat.xmlparser.Parse
698
699 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200700 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400701 /
702
703Parse XML data.
704
705`isfinal' should be true at end of input.
706[clinic start generated code]*/
707
Brett Cannond0aeda82014-08-22 14:23:20 -0400708static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400709pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300710 int isfinal)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200711/*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400712{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200713 const char *s;
714 Py_ssize_t slen;
715 Py_buffer view;
716 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000717
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200718 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200719 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200720 s = PyUnicode_AsUTF8AndSize(data, &slen);
721 if (s == NULL)
722 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200723 /* Explicitly set UTF-8 encoding. Return code ignored. */
724 (void)XML_SetEncoding(self->itself, "utf-8");
725 }
726 else {
727 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
728 return NULL;
729 s = view.buf;
730 slen = view.len;
731 }
732
733 while (slen > MAX_CHUNK_SIZE) {
734 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
735 if (!rc)
736 goto done;
737 s += MAX_CHUNK_SIZE;
738 slen -= MAX_CHUNK_SIZE;
739 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200740 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
741 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300742 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200743
744done:
745 if (view.buf != NULL)
746 PyBuffer_Release(&view);
747 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000748}
749
Fred Drakeca1f4262000-09-21 20:10:23 +0000750/* File reading copied from cPickle */
751
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000752#define BUF_SIZE 2048
753
Fred Drake0582df92000-07-12 04:49:00 +0000754static int
755readinst(char *buf, int buf_size, PyObject *meth)
756{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000757 PyObject *str;
758 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200759 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000760
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000761 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000762 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000763 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764
Christian Heimes72b710a2008-05-26 13:28:38 +0000765 if (PyBytes_Check(str))
766 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000767 else if (PyByteArray_Check(str))
768 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000770 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000771 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000772 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000773 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000774 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000775 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000776 if (len > buf_size) {
777 PyErr_Format(PyExc_ValueError,
778 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000779 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000780 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000781 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000782 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000784 Py_DECREF(str);
785 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000786 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000787
788error:
789 Py_XDECREF(str);
790 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000791}
792
Brett Cannond0aeda82014-08-22 14:23:20 -0400793/*[clinic input]
794pyexpat.xmlparser.ParseFile
795
796 file: object
797 /
798
799Parse XML data from file-like object.
800[clinic start generated code]*/
801
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000802static PyObject *
Brett Cannond0aeda82014-08-22 14:23:20 -0400803pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300804/*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805{
Fred Drake0582df92000-07-12 04:49:00 +0000806 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000807 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200808 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000809
Brett Cannond0aeda82014-08-22 14:23:20 -0400810 readmethod = _PyObject_GetAttrId(file, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000811 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000812 PyErr_SetString(PyExc_TypeError,
813 "argument must have 'read' attribute");
814 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000815 }
816 for (;;) {
817 int bytes_read;
818 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000819 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000820 Py_XDECREF(readmethod);
Ned Deilye7d532f2014-03-27 16:39:58 -0700821 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000822 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000823
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000824 bytes_read = readinst(buf, BUF_SIZE, readmethod);
825 if (bytes_read < 0) {
826 Py_DECREF(readmethod);
827 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000828 }
829 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000830 if (PyErr_Occurred()) {
831 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000832 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000833 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000834
Fred Drake0582df92000-07-12 04:49:00 +0000835 if (!rv || bytes_read == 0)
836 break;
837 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000838 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000839 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000840}
841
Brett Cannond0aeda82014-08-22 14:23:20 -0400842/*[clinic input]
843pyexpat.xmlparser.SetBase
844
845 base: str
846 /
847
848Set the base URL for the parser.
849[clinic start generated code]*/
850
Brett Cannond0aeda82014-08-22 14:23:20 -0400851static PyObject *
852pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300853/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400854{
Fred Drake0582df92000-07-12 04:49:00 +0000855 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000857 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400858 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000859}
860
Brett Cannond0aeda82014-08-22 14:23:20 -0400861/*[clinic input]
862pyexpat.xmlparser.GetBase
863
864Return base URL string for the parser.
865[clinic start generated code]*/
866
Brett Cannond0aeda82014-08-22 14:23:20 -0400867static PyObject *
868pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300869/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000870{
Fred Drake0582df92000-07-12 04:49:00 +0000871 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000872}
873
Brett Cannond0aeda82014-08-22 14:23:20 -0400874/*[clinic input]
875pyexpat.xmlparser.GetInputContext
876
877Return the untranslated text of the input that caused the current event.
878
879If the event was generated by a large amount of text (such as a start tag
880for an element with many attributes), not all of the text may be available.
881[clinic start generated code]*/
882
Brett Cannond0aeda82014-08-22 14:23:20 -0400883static PyObject *
884pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300885/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000886{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000887 if (self->in_callback) {
888 int offset, size;
889 const char *buffer
890 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000891
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000892 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000893 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000894 size - offset);
895 else
896 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000897 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000898 else
899 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000900}
Fred Drakebd6101c2001-02-14 18:29:45 +0000901
Brett Cannond0aeda82014-08-22 14:23:20 -0400902/*[clinic input]
903pyexpat.xmlparser.ExternalEntityParserCreate
904
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700905 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400906 encoding: str = NULL
907 /
908
909Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
910[clinic start generated code]*/
911
Brett Cannond0aeda82014-08-22 14:23:20 -0400912static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400913pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
914 const char *context,
915 const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700916/*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400917{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000918 xmlparseobject *new_parser;
919 int i;
920
Martin v. Löwis894258c2001-09-23 10:20:10 +0000921 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000922 if (new_parser == NULL)
923 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000924 new_parser->buffer_size = self->buffer_size;
925 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000926 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000927 new_parser->ordered_attributes = self->ordered_attributes;
928 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000929 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000930 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000931 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000933 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000934 new_parser->intern = self->intern;
935 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000936 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000937
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000938 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200939 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000940 if (new_parser->buffer == NULL) {
941 Py_DECREF(new_parser);
942 return PyErr_NoMemory();
943 }
944 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000945 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000946 Py_DECREF(new_parser);
947 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000948 }
949
950 XML_SetUserData(new_parser->itself, (void *)new_parser);
951
952 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000953 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000954 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000955
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200956 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000957 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000958 Py_DECREF(new_parser);
959 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000960 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000961 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000962
963 /* then copy handlers from self */
964 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000965 PyObject *handler = self->handlers[i];
966 if (handler != NULL) {
967 Py_INCREF(handler);
968 new_parser->handlers[i] = handler;
969 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000970 handler_info[i].handler);
971 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000972 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000973 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000974}
975
Brett Cannond0aeda82014-08-22 14:23:20 -0400976/*[clinic input]
977pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000978
Brett Cannond0aeda82014-08-22 14:23:20 -0400979 flag: int
980 /
981
982Controls parsing of parameter entities (including the external DTD subset).
983
984Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
985XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
986XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
987was successful.
988[clinic start generated code]*/
989
Brett Cannond0aeda82014-08-22 14:23:20 -0400990static PyObject *
991pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300992/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400993{
994 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +0000995 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000996}
997
Martin v. Löwisc847f402003-01-21 11:09:21 +0000998
999#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001000/*[clinic input]
1001pyexpat.xmlparser.UseForeignDTD
1002
1003 flag: bool = True
1004 /
1005
1006Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1007
1008This readily allows the use of a 'default' document type controlled by the
1009application, while still getting the advantage of providing document type
1010information to the parser. 'flag' defaults to True if not provided.
1011[clinic start generated code]*/
1012
Brett Cannond0aeda82014-08-22 14:23:20 -04001013static PyObject *
1014pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001015/*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001016{
Martin v. Löwis069dde22003-01-21 10:58:18 +00001017 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001018
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001019 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001020 if (rc != XML_ERROR_NONE) {
1021 return set_error(self, rc);
1022 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001023 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001024}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001025#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001026
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001027static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001028 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1029 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1030 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1031 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1032 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1033 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1034 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001035#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001036 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001037#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001038 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001039};
1040
1041/* ---------- */
1042
1043
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001044
Fred Drake71b63ff2002-06-28 22:29:01 +00001045/* pyexpat international encoding support.
1046 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001047*/
1048
Fred Drake71b63ff2002-06-28 22:29:01 +00001049static int
1050PyUnknownEncodingHandler(void *encodingHandlerData,
1051 const XML_Char *name,
1052 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001053{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001054 static unsigned char template_buffer[256] = {0};
1055 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001056 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001057 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001058 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001059
Victor Stinner9e09c262013-07-18 23:17:01 +02001060 if (PyErr_Occurred())
1061 return XML_STATUS_ERROR;
1062
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001063 if (template_buffer[1] == 0) {
1064 for (i = 0; i < 256; i++)
1065 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001066 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001067
1068 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001069 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001070 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001071 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001072 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001073
1074 if (PyUnicode_GET_LENGTH(u) != 256) {
1075 Py_DECREF(u);
1076 PyErr_SetString(PyExc_ValueError,
1077 "multi-byte encodings are not supported");
1078 return XML_STATUS_ERROR;
1079 }
1080
1081 kind = PyUnicode_KIND(u);
1082 data = PyUnicode_DATA(u);
1083 for (i = 0; i < 256; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1086 info->map[i] = ch;
1087 else
1088 info->map[i] = -1;
1089 }
1090
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001091 info->data = NULL;
1092 info->convert = NULL;
1093 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001094 Py_DECREF(u);
1095
1096 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001097}
1098
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001099
1100static PyObject *
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001101newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001102{
1103 int i;
1104 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001105
Martin v. Löwis894258c2001-09-23 10:20:10 +00001106 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001107 if (self == NULL)
1108 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001109
Fred Drake2a3d7db2002-06-28 22:56:48 +00001110 self->buffer = NULL;
1111 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1112 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001113 self->ordered_attributes = 0;
1114 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001115 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001116 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001117 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001118 self->intern = intern;
1119 Py_XINCREF(self->intern);
1120 PyObject_GC_Track(self);
1121
Christian Heimesfa535f52013-07-07 17:35:11 +02001122 /* namespace_separator is either NULL or contains one char + \0 */
1123 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1124 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001125 if (self->itself == NULL) {
1126 PyErr_SetString(PyExc_RuntimeError,
1127 "XML_ParserCreate failed");
1128 Py_DECREF(self);
1129 return NULL;
1130 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001131#if XML_COMBINED_VERSION >= 20100
1132 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001133 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001134 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001135#endif
Fred Drake0582df92000-07-12 04:49:00 +00001136 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001137 XML_SetUnknownEncodingHandler(self->itself,
1138 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001139
Fred Drake2a3d7db2002-06-28 22:56:48 +00001140 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001141 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001142
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001143 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001144 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001145 Py_DECREF(self);
1146 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001147 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001148 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001149
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001150 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001151}
1152
1153
1154static void
Fred Drake0582df92000-07-12 04:49:00 +00001155xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001156{
Fred Drake0582df92000-07-12 04:49:00 +00001157 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001158 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001159 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001160 XML_ParserFree(self->itself);
1161 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001162
Fred Drake85d835f2001-02-08 15:39:08 +00001163 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001164 for (i = 0; handler_info[i].name != NULL; i++)
1165 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001166 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001167 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001168 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001169 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001170 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001171 self->buffer = NULL;
1172 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001173 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001174 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001175}
1176
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001177
1178static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001179xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001180{
Victor Stinner28f468c2018-11-22 13:21:43 +01001181 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1182 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001183 PyObject *result = self->handlers[handlernum];
1184 if (result == NULL)
1185 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001186 Py_INCREF(result);
1187 return result;
1188}
1189
Fred Drake6f987622000-08-25 18:03:30 +00001190static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001191xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001192{
Victor Stinner28f468c2018-11-22 13:21:43 +01001193 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1194 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001195 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001196 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1197 return -1;
1198 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001199 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001200 /* If we're changing the character data handler, flush all
1201 * cached data with the old handler. Not sure there's a
1202 * "right" thing to do, though, but this probably won't
1203 * happen.
1204 */
1205 if (flush_character_buffer(self) < 0)
1206 return -1;
1207 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001208
1209 xmlhandler c_handler = NULL;
1210 if (v == Py_None) {
1211 /* If this is the character data handler, and a character
1212 data handler is already active, we need to be more
1213 careful. What we can safely do is replace the existing
1214 character data handler callback function with a no-op
1215 function that will refuse to call Python. The downside
1216 is that this doesn't completely remove the character
1217 data handler from the C layer if there's any callback
1218 active, so Expat does a little more work than it
1219 otherwise would, but that's really an odd case. A more
1220 elaborate system of handlers and state could remove the
1221 C handler more effectively. */
1222 if (handlernum == CharacterData && self->in_callback)
1223 c_handler = noop_character_data_handler;
1224 v = NULL;
1225 }
1226 else if (v != NULL) {
1227 Py_INCREF(v);
1228 c_handler = handler_info[handlernum].handler;
1229 }
1230 Py_XSETREF(self->handlers[handlernum], v);
1231 handler_info[handlernum].setter(self->itself, c_handler);
1232 return 0;
1233}
1234
1235#define INT_GETTER(name) \
1236 static PyObject * \
1237 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1238 { \
1239 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1240 }
1241INT_GETTER(ErrorCode)
1242INT_GETTER(ErrorLineNumber)
1243INT_GETTER(ErrorColumnNumber)
1244INT_GETTER(ErrorByteIndex)
1245INT_GETTER(CurrentLineNumber)
1246INT_GETTER(CurrentColumnNumber)
1247INT_GETTER(CurrentByteIndex)
1248
1249#undef INT_GETTER
1250
1251static PyObject *
1252xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1253{
1254 return PyBool_FromLong(self->buffer != NULL);
1255}
1256
1257static int
1258xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1259{
1260 if (v == NULL) {
1261 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1262 return -1;
1263 }
1264 int b = PyObject_IsTrue(v);
1265 if (b < 0)
1266 return -1;
1267 if (b) {
1268 if (self->buffer == NULL) {
1269 self->buffer = PyMem_Malloc(self->buffer_size);
1270 if (self->buffer == NULL) {
1271 PyErr_NoMemory();
1272 return -1;
1273 }
1274 self->buffer_used = 0;
1275 }
1276 }
1277 else if (self->buffer != NULL) {
1278 if (flush_character_buffer(self) < 0)
1279 return -1;
1280 PyMem_Free(self->buffer);
1281 self->buffer = NULL;
1282 }
1283 return 0;
1284}
1285
1286static PyObject *
1287xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1288{
1289 return PyLong_FromLong((long) self->buffer_size);
1290}
1291
1292static int
1293xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1294{
1295 if (v == NULL) {
1296 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1297 return -1;
1298 }
1299 long new_buffer_size;
1300 if (!PyLong_Check(v)) {
1301 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1302 return -1;
1303 }
1304
1305 new_buffer_size = PyLong_AsLong(v);
1306 if (new_buffer_size <= 0) {
1307 if (!PyErr_Occurred())
1308 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1309 return -1;
1310 }
1311
1312 /* trivial case -- no change */
1313 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001314 return 0;
1315 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001316
1317 /* check maximum */
1318 if (new_buffer_size > INT_MAX) {
1319 char errmsg[100];
1320 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1321 PyErr_SetString(PyExc_ValueError, errmsg);
1322 return -1;
1323 }
1324
1325 if (self->buffer != NULL) {
1326 /* there is already a buffer */
1327 if (self->buffer_used != 0) {
1328 if (flush_character_buffer(self) < 0) {
1329 return -1;
1330 }
1331 }
1332 /* free existing buffer */
1333 PyMem_Free(self->buffer);
1334 }
1335 self->buffer = PyMem_Malloc(new_buffer_size);
1336 if (self->buffer == NULL) {
1337 PyErr_NoMemory();
1338 return -1;
1339 }
1340 self->buffer_size = new_buffer_size;
1341 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001342}
1343
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001344static PyObject *
1345xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1346{
1347 return PyLong_FromLong((long) self->buffer_used);
1348}
1349
1350static PyObject *
1351xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1352{
1353 return PyBool_FromLong(self->ns_prefixes);
1354}
1355
1356static int
1357xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1358{
1359 if (v == NULL) {
1360 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1361 return -1;
1362 }
1363 int b = PyObject_IsTrue(v);
1364 if (b < 0)
1365 return -1;
1366 self->ns_prefixes = b;
1367 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1368 return 0;
1369}
1370
1371static PyObject *
1372xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1373{
1374 return PyBool_FromLong(self->ordered_attributes);
1375}
1376
1377static int
1378xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1379{
1380 if (v == NULL) {
1381 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1382 return -1;
1383 }
1384 int b = PyObject_IsTrue(v);
1385 if (b < 0)
1386 return -1;
1387 self->ordered_attributes = b;
1388 return 0;
1389}
1390
1391static PyObject *
1392xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1393{
1394 return PyBool_FromLong((long) self->specified_attributes);
1395}
1396
1397static int
1398xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1399{
1400 if (v == NULL) {
1401 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1402 return -1;
1403 }
1404 int b = PyObject_IsTrue(v);
1405 if (b < 0)
1406 return -1;
1407 self->specified_attributes = b;
1408 return 0;
1409}
1410
1411static PyMemberDef xmlparse_members[] = {
1412 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1413 {NULL}
1414};
1415
1416#define XMLPARSE_GETTER_DEF(name) \
1417 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1418#define XMLPARSE_GETTER_SETTER_DEF(name) \
1419 {#name, (getter)xmlparse_##name##_getter, \
1420 (setter)xmlparse_##name##_setter, NULL},
1421
1422static PyGetSetDef xmlparse_getsetlist[] = {
1423 XMLPARSE_GETTER_DEF(ErrorCode)
1424 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1425 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1426 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1427 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1428 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1429 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1430 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1431 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1432 XMLPARSE_GETTER_DEF(buffer_used)
1433 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1434 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1435 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1436 {NULL},
1437};
1438
1439#undef XMLPARSE_GETTER_DEF
1440#undef XMLPARSE_GETTER_SETTER_DEF
1441
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001442static int
1443xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1444{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001445 int i;
1446 for (i = 0; handler_info[i].name != NULL; i++)
1447 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001448 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001449}
1450
1451static int
1452xmlparse_clear(xmlparseobject *op)
1453{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001454 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001455 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001456 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001457}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001458
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001459PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001460
1461static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 PyVarObject_HEAD_INIT(NULL, 0)
1463 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001464 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 0, /*tp_itemsize*/
1466 /* methods */
1467 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1468 (printfunc)0, /*tp_print*/
1469 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001470 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 0, /*tp_reserved*/
1472 (reprfunc)0, /*tp_repr*/
1473 0, /*tp_as_number*/
1474 0, /*tp_as_sequence*/
1475 0, /*tp_as_mapping*/
1476 (hashfunc)0, /*tp_hash*/
1477 (ternaryfunc)0, /*tp_call*/
1478 (reprfunc)0, /*tp_str*/
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001479 (getattrofunc)0, /* tp_getattro */
1480 (setattrofunc)0, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1484 (traverseproc)xmlparse_traverse, /* tp_traverse */
1485 (inquiry)xmlparse_clear, /* tp_clear */
1486 0, /* tp_richcompare */
1487 0, /* tp_weaklistoffset */
1488 0, /* tp_iter */
1489 0, /* tp_iternext */
1490 xmlparse_methods, /* tp_methods */
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001491 xmlparse_members, /* tp_members */
1492 xmlparse_getsetlist, /* tp_getset */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001493};
1494
1495/* End of code for xmlparser objects */
1496/* -------------------------------------------------------- */
1497
Brett Cannond0aeda82014-08-22 14:23:20 -04001498/*[clinic input]
1499pyexpat.ParserCreate
1500
Larry Hastingsdbfdc382015-05-04 06:59:46 -07001501 encoding: str(accept={str, NoneType}) = NULL
1502 namespace_separator: str(accept={str, NoneType}) = NULL
Brett Cannond0aeda82014-08-22 14:23:20 -04001503 intern: object = NULL
1504
1505Return a new XML parser object.
1506[clinic start generated code]*/
1507
Brett Cannond0aeda82014-08-22 14:23:20 -04001508static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001509pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001510 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001511/*[clinic end generated code: output=295c0cf01ab1146c input=23d29704acad385d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001512{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001513 PyObject *result;
1514 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001515
Fred Drakecde79132001-04-25 16:01:30 +00001516 if (namespace_separator != NULL
1517 && strlen(namespace_separator) > 1) {
1518 PyErr_SetString(PyExc_ValueError,
1519 "namespace_separator must be at most one"
1520 " character, omitted, or None");
1521 return NULL;
1522 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001523 /* Explicitly passing None means no interning is desired.
1524 Not passing anything means that a new dictionary is used. */
1525 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001527 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 intern = PyDict_New();
1529 if (!intern)
1530 return NULL;
1531 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001532 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001533 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1535 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001536 }
1537
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001538 result = newxmlparseobject(encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001539 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001541 }
1542 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001543}
1544
Brett Cannond0aeda82014-08-22 14:23:20 -04001545/*[clinic input]
1546pyexpat.ErrorString
1547
1548 code: long
1549 /
1550
1551Returns string error for given number.
1552[clinic start generated code]*/
1553
Brett Cannond0aeda82014-08-22 14:23:20 -04001554static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001555pyexpat_ErrorString_impl(PyObject *module, long code)
1556/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001557{
Fred Drake0582df92000-07-12 04:49:00 +00001558 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001559}
1560
1561/* List of methods defined in the module */
1562
1563static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001564 PYEXPAT_PARSERCREATE_METHODDEF
1565 PYEXPAT_ERRORSTRING_METHODDEF
1566 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001567};
1568
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001569/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001570
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001571PyDoc_STRVAR(pyexpat_module_documentation,
1572"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001573
Fred Drakecde79132001-04-25 16:01:30 +00001574/* Initialization function for the module */
1575
1576#ifndef MODULE_NAME
1577#define MODULE_NAME "pyexpat"
1578#endif
1579
1580#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001581#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001582#endif
1583
Martin v. Löwis1a214512008-06-11 05:26:20 +00001584static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 PyModuleDef_HEAD_INIT,
1586 MODULE_NAME,
1587 pyexpat_module_documentation,
1588 -1,
1589 pyexpat_methods,
1590 NULL,
1591 NULL,
1592 NULL,
1593 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001594};
1595
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001596static int init_handler_descrs(void)
1597{
1598 int i;
1599 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1600 for (i = 0; handler_info[i].name != NULL; i++) {
1601 struct HandlerInfo *hi = &handler_info[i];
1602 hi->getset.name = hi->name;
1603 hi->getset.get = (getter)xmlparse_handler_getter;
1604 hi->getset.set = (setter)xmlparse_handler_setter;
1605 hi->getset.closure = &handler_info[i];
1606
1607 PyObject *descr;
1608 if (PyDict_GetItemString(Xmlparsetype.tp_dict, hi->name))
1609 continue;
1610 descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
1611
1612 if (descr == NULL)
1613 return -1;
1614 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1615 Py_DECREF(descr);
1616 return -1;
1617 }
1618 Py_DECREF(descr);
1619 }
1620 return 0;
1621}
1622
Martin v. Löwis069dde22003-01-21 10:58:18 +00001623PyMODINIT_FUNC
1624MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001625{
1626 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001627 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001628 PyObject *errors_module;
1629 PyObject *modelmod_name;
1630 PyObject *model_module;
Georg Brandlb4dac712010-10-15 14:46:48 +00001631 PyObject *tmpnum, *tmpstr;
1632 PyObject *codes_dict;
1633 PyObject *rev_codes_dict;
1634 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001635 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001636 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001637
Fred Drake6f987622000-08-25 18:03:30 +00001638 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001639 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001640 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001641 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001642 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001643
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001644 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001646
Fred Drake0582df92000-07-12 04:49:00 +00001647 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001648 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001649 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001651
Fred Drake0582df92000-07-12 04:49:00 +00001652 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001653 if (ErrorObject == NULL) {
1654 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001655 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001656 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001657 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001658 }
1659 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001660 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001661 Py_INCREF(ErrorObject);
1662 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001663 Py_INCREF(&Xmlparsetype);
1664 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001665
Fred Drake738293d2000-12-21 17:25:07 +00001666 PyModule_AddStringConstant(m, "EXPAT_VERSION",
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001667 XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001668 {
1669 XML_Expat_Version info = XML_ExpatVersionInfo();
1670 PyModule_AddObject(m, "version_info",
1671 Py_BuildValue("(iii)", info.major,
1672 info.minor, info.micro));
1673 }
Fred Drake0582df92000-07-12 04:49:00 +00001674 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001675 compiled, this should check and set native_encoding
1676 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001677 */
Fred Drake93adb692000-09-23 04:55:48 +00001678 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001679
Fred Drake93adb692000-09-23 04:55:48 +00001680 d = PyModule_GetDict(m);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001681 if (d == NULL) {
1682 Py_DECREF(m);
1683 return NULL;
1684 }
Fred Drake6f987622000-08-25 18:03:30 +00001685 errors_module = PyDict_GetItem(d, errmod_name);
1686 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001687 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001688 if (errors_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001689 _PyImport_SetModule(errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001690 /* gives away the reference to errors_module */
1691 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001692 }
1693 }
Fred Drake6f987622000-08-25 18:03:30 +00001694 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001695 model_module = PyDict_GetItem(d, modelmod_name);
1696 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001697 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001698 if (model_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001699 _PyImport_SetModule(modelmod_name, model_module);
Fred Drake85d835f2001-02-08 15:39:08 +00001700 /* gives away the reference to model_module */
1701 PyModule_AddObject(m, "model", model_module);
1702 }
1703 }
1704 Py_DECREF(modelmod_name);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001705 if (errors_module == NULL || model_module == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +00001706 /* Don't core dump later! */
Christian Heimes7a5457b2016-09-09 00:13:35 +02001707 Py_DECREF(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001708 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001709 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710
Martin v. Löwisc847f402003-01-21 11:09:21 +00001711#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001712 {
1713 const XML_Feature *features = XML_GetFeatureList();
1714 PyObject *list = PyList_New(0);
1715 if (list == NULL)
1716 /* just ignore it */
1717 PyErr_Clear();
1718 else {
1719 int i = 0;
1720 for (; features[i].feature != XML_FEATURE_END; ++i) {
1721 int ok;
1722 PyObject *item = Py_BuildValue("si", features[i].name,
1723 features[i].value);
1724 if (item == NULL) {
1725 Py_DECREF(list);
1726 list = NULL;
1727 break;
1728 }
1729 ok = PyList_Append(list, item);
1730 Py_DECREF(item);
1731 if (ok < 0) {
1732 PyErr_Clear();
1733 break;
1734 }
1735 }
1736 if (list != NULL)
1737 PyModule_AddObject(m, "features", list);
1738 }
1739 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001740#endif
Fred Drake6f987622000-08-25 18:03:30 +00001741
Georg Brandlb4dac712010-10-15 14:46:48 +00001742 codes_dict = PyDict_New();
1743 rev_codes_dict = PyDict_New();
1744 if (codes_dict == NULL || rev_codes_dict == NULL) {
1745 Py_XDECREF(codes_dict);
1746 Py_XDECREF(rev_codes_dict);
1747 return NULL;
1748 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001749
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001750#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001751 if (PyModule_AddStringConstant(errors_module, #name, \
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001752 XML_ErrorString(name)) < 0) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001753 return NULL; \
1754 tmpnum = PyLong_FromLong(name); \
1755 if (tmpnum == NULL) return NULL; \
1756 res = PyDict_SetItemString(codes_dict, \
1757 XML_ErrorString(name), tmpnum); \
1758 if (res < 0) return NULL; \
1759 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1760 if (tmpstr == NULL) return NULL; \
1761 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1762 Py_DECREF(tmpstr); \
1763 Py_DECREF(tmpnum); \
1764 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001765
Fred Drake0582df92000-07-12 04:49:00 +00001766 MYCONST(XML_ERROR_NO_MEMORY);
1767 MYCONST(XML_ERROR_SYNTAX);
1768 MYCONST(XML_ERROR_NO_ELEMENTS);
1769 MYCONST(XML_ERROR_INVALID_TOKEN);
1770 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1771 MYCONST(XML_ERROR_PARTIAL_CHAR);
1772 MYCONST(XML_ERROR_TAG_MISMATCH);
1773 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1774 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1775 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1776 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1777 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1778 MYCONST(XML_ERROR_ASYNC_ENTITY);
1779 MYCONST(XML_ERROR_BAD_CHAR_REF);
1780 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1781 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1782 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1783 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1784 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001785 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1786 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1787 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001788 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1789 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1790 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1791 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1792 /* Added in Expat 1.95.7. */
1793 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1794 /* Added in Expat 1.95.8. */
1795 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1796 MYCONST(XML_ERROR_INCOMPLETE_PE);
1797 MYCONST(XML_ERROR_XML_DECL);
1798 MYCONST(XML_ERROR_TEXT_DECL);
1799 MYCONST(XML_ERROR_PUBLICID);
1800 MYCONST(XML_ERROR_SUSPENDED);
1801 MYCONST(XML_ERROR_NOT_SUSPENDED);
1802 MYCONST(XML_ERROR_ABORTED);
1803 MYCONST(XML_ERROR_FINISHED);
1804 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001805
Georg Brandlb4dac712010-10-15 14:46:48 +00001806 if (PyModule_AddStringConstant(errors_module, "__doc__",
1807 "Constants used to describe "
1808 "error conditions.") < 0)
1809 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001810
Georg Brandlb4dac712010-10-15 14:46:48 +00001811 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1812 return NULL;
1813 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1814 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001815
Fred Drake93adb692000-09-23 04:55:48 +00001816#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001817
Fred Drake85d835f2001-02-08 15:39:08 +00001818#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001819 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1820 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1821 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001822#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001823
Fred Drake85d835f2001-02-08 15:39:08 +00001824#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1825 PyModule_AddStringConstant(model_module, "__doc__",
1826 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001827
Fred Drake85d835f2001-02-08 15:39:08 +00001828 MYCONST(XML_CTYPE_EMPTY);
1829 MYCONST(XML_CTYPE_ANY);
1830 MYCONST(XML_CTYPE_MIXED);
1831 MYCONST(XML_CTYPE_NAME);
1832 MYCONST(XML_CTYPE_CHOICE);
1833 MYCONST(XML_CTYPE_SEQ);
1834
1835 MYCONST(XML_CQUANT_NONE);
1836 MYCONST(XML_CQUANT_OPT);
1837 MYCONST(XML_CQUANT_REP);
1838 MYCONST(XML_CQUANT_PLUS);
1839#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001840
1841 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001842 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001843 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001844 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1845 capi.MINOR_VERSION = XML_MINOR_VERSION;
1846 capi.MICRO_VERSION = XML_MICRO_VERSION;
1847 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001848 capi.GetErrorCode = XML_GetErrorCode;
1849 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1850 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001851 capi.Parse = XML_Parse;
1852 capi.ParserCreate_MM = XML_ParserCreate_MM;
1853 capi.ParserFree = XML_ParserFree;
1854 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1855 capi.SetCommentHandler = XML_SetCommentHandler;
1856 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1857 capi.SetElementHandler = XML_SetElementHandler;
1858 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1859 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1860 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1861 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001862 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001863 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001864 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001865#if XML_COMBINED_VERSION >= 20100
1866 capi.SetHashSalt = XML_SetHashSalt;
1867#else
1868 capi.SetHashSalt = NULL;
1869#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870
Benjamin Petersonb173f782009-05-05 22:31:58 +00001871 /* export using capsule */
1872 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001873 if (capi_object)
1874 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001875 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001876}
1877
Fred Drake6f987622000-08-25 18:03:30 +00001878static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001879clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001880{
Fred Drakecde79132001-04-25 16:01:30 +00001881 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001882
Fred Drake71b63ff2002-06-28 22:29:01 +00001883 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001884 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 self->handlers[i] = NULL;
1886 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001887 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001889 }
Fred Drakecde79132001-04-25 16:01:30 +00001890 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001891}
1892
Tim Peters0c322792002-07-17 16:49:03 +00001893static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001894
1895#define HANDLER_INFO(name) \
1896 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1897
1898 HANDLER_INFO(StartElementHandler)
1899 HANDLER_INFO(EndElementHandler)
1900 HANDLER_INFO(ProcessingInstructionHandler)
1901 HANDLER_INFO(CharacterDataHandler)
1902 HANDLER_INFO(UnparsedEntityDeclHandler)
1903 HANDLER_INFO(NotationDeclHandler)
1904 HANDLER_INFO(StartNamespaceDeclHandler)
1905 HANDLER_INFO(EndNamespaceDeclHandler)
1906 HANDLER_INFO(CommentHandler)
1907 HANDLER_INFO(StartCdataSectionHandler)
1908 HANDLER_INFO(EndCdataSectionHandler)
1909 HANDLER_INFO(DefaultHandler)
1910 HANDLER_INFO(DefaultHandlerExpand)
1911 HANDLER_INFO(NotStandaloneHandler)
1912 HANDLER_INFO(ExternalEntityRefHandler)
1913 HANDLER_INFO(StartDoctypeDeclHandler)
1914 HANDLER_INFO(EndDoctypeDeclHandler)
1915 HANDLER_INFO(EntityDeclHandler)
1916 HANDLER_INFO(XmlDeclHandler)
1917 HANDLER_INFO(ElementDeclHandler)
1918 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001919#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001920 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001921#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001922
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001923#undef HANDLER_INFO
1924
Fred Drake0582df92000-07-12 04:49:00 +00001925 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001926};