blob: 12ae66d945bda8cb6b85944fd3de9e02f35245ac [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Victor Stinner4a21e572020-04-15 02:35:41 +02004#include "structmember.h" // PyMemberDef
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
50static PyObject *ErrorObject;
51
52/* ----------------------------------------------------- */
53
54/* Declarations for objects of type xmlparser */
55
56typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000057 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058
Fred Drake0582df92000-07-12 04:49:00 +000059 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000060 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000062 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000063 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000064 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000068 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000069 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000070} xmlparseobject;
71
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030072#include "clinic/pyexpat.c.h"
73
Fred Drake2a3d7db2002-06-28 22:56:48 +000074#define CHARACTER_DATA_BUFFER_SIZE 8192
75
Jeremy Hylton938ace62002-07-17 16:30:39 +000076static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000077
Fred Drake117ac852002-09-24 16:24:54 +000078typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079typedef void* xmlhandler;
80
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000081struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000082 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030085 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000086};
87
Jeremy Hylton938ace62002-07-17 16:30:39 +000088static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089
Fred Drakebd6101c2001-02-14 18:29:45 +000090/* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +020094set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +000095{
Christian Heimes217cfd12007-12-02 14:31:20 +000096 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000097
Neal Norwitz2f5e9902006-03-08 06:36:45 +000098 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000100 return 0;
101 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000102 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000103 return 1;
104}
105
106/* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
Fred Drake85d835f2001-02-08 15:39:08 +0000109static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000110set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000111{
112 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100113 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000114 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000117
Victor Stinner499dfcf2011-03-21 13:26:24 +0100118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
Petr Viktorinffd97532020-02-11 17:46:57 +0100122 err = PyObject_CallOneArg(ErrorObject, buffer);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100123 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000129 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000130 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000131 return NULL;
132}
133
Fred Drake71b63ff2002-06-28 22:29:01 +0000134static int
135have_handler(xmlparseobject *self, int type)
136{
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139}
140
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000141/* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
Fred Drake0582df92000-07-12 04:49:00 +0000144static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000145conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000146{
Fred Drake71b63ff2002-06-28 22:29:01 +0000147 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200151 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000152 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000154}
155
Fred Drake0582df92000-07-12 04:49:00 +0000156static PyObject *
157conv_string_len_to_unicode(const XML_Char *str, int len)
158{
Fred Drake71b63ff2002-06-28 22:29:01 +0000159 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200163 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000164 }
Fred Drake6f987622000-08-25 18:03:30 +0000165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000166}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000167
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000168/* Callback routines */
169
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000170static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000171
Martin v. Löwis069dde22003-01-21 10:58:18 +0000172/* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175static int
176error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181{
182 return 0;
183}
184
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185/* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193{
194 /* Do nothing. */
195}
196
Fred Drake6f987622000-08-25 18:03:30 +0000197static void
198flag_error(xmlparseobject *self)
199{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000200 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000203}
204
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000205static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200206call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000207 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200209 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000210
Jeroen Demeyer1dbd0842019-07-11 17:57:32 +0200211 res = PyObject_Call(func, args, NULL);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000212 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200213 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000214 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000215 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000217}
218
Fred Drakeb91a36b2002-06-27 19:40:48 +0000219static PyObject*
220string_intern(xmlparseobject *self, const char* str)
221{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000222 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000223 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000227 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200229 value = PyDict_GetItemWithError(self->intern, result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000230 if (!value) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200231 if (!PyErr_Occurred() &&
232 PyDict_SetItem(self->intern, result, result) == 0)
233 {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000234 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200235 }
Zackery Spytz68def052018-10-19 00:57:38 -0600236 else {
237 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000238 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600239 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000240 }
241 Py_INCREF(value);
242 Py_DECREF(result);
243 return value;
244}
245
Fred Drake2a3d7db2002-06-28 22:56:48 +0000246/* Return 0 on success, -1 on exception.
247 * flag_error() will be called before return if needed.
248 */
249static int
250call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
251{
252 PyObject *args;
253 PyObject *temp;
254
Georg Brandlc01537f2010-10-15 16:26:08 +0000255 if (!have_handler(self, CharacterData))
256 return -1;
257
Fred Drake2a3d7db2002-06-28 22:56:48 +0000258 args = PyTuple_New(1);
259 if (args == NULL)
260 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000261 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000262 if (temp == NULL) {
263 Py_DECREF(args);
264 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 XML_SetCharacterDataHandler(self->itself,
266 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000267 return -1;
268 }
269 PyTuple_SET_ITEM(args, 0, temp);
270 /* temp is now a borrowed reference; consider it unused. */
271 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200272 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000273 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000274 /* temp is an owned reference again, or NULL */
275 self->in_callback = 0;
276 Py_DECREF(args);
277 if (temp == NULL) {
278 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000279 XML_SetCharacterDataHandler(self->itself,
280 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000281 return -1;
282 }
283 Py_DECREF(temp);
284 return 0;
285}
286
287static int
288flush_character_buffer(xmlparseobject *self)
289{
290 int rc;
291 if (self->buffer == NULL || self->buffer_used == 0)
292 return 0;
293 rc = call_character_handler(self, self->buffer, self->buffer_used);
294 self->buffer_used = 0;
295 return rc;
296}
297
298static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000300{
301 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200302
303 if (PyErr_Occurred())
304 return;
305
Fred Drake2a3d7db2002-06-28 22:56:48 +0000306 if (self->buffer == NULL)
307 call_character_handler(self, data, len);
308 else {
309 if ((self->buffer_used + len) > self->buffer_size) {
310 if (flush_character_buffer(self) < 0)
311 return;
312 /* handler might have changed; drop the rest on the floor
313 * if there isn't a handler anymore
314 */
315 if (!have_handler(self, CharacterData))
316 return;
317 }
318 if (len > self->buffer_size) {
319 call_character_handler(self, data, len);
320 self->buffer_used = 0;
321 }
322 else {
323 memcpy(self->buffer + self->buffer_used,
324 data, len * sizeof(XML_Char));
325 self->buffer_used += len;
326 }
327 }
328}
329
Fred Drake85d835f2001-02-08 15:39:08 +0000330static void
331my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000332 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000333{
334 xmlparseobject *self = (xmlparseobject *)userData;
335
Fred Drake71b63ff2002-06-28 22:29:01 +0000336 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000337 PyObject *container, *rv, *args;
338 int i, max;
339
Victor Stinner9e09c262013-07-18 23:17:01 +0200340 if (PyErr_Occurred())
341 return;
342
Fred Drake2a3d7db2002-06-28 22:56:48 +0000343 if (flush_character_buffer(self) < 0)
344 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000345 /* Set max to the number of slots filled in atts[]; max/2 is
346 * the number of attributes we need to process.
347 */
348 if (self->specified_attributes) {
349 max = XML_GetSpecifiedAttributeCount(self->itself);
350 }
351 else {
352 max = 0;
353 while (atts[max] != NULL)
354 max += 2;
355 }
356 /* Build the container. */
357 if (self->ordered_attributes)
358 container = PyList_New(max);
359 else
360 container = PyDict_New();
361 if (container == NULL) {
362 flag_error(self);
363 return;
364 }
365 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000366 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000367 PyObject *v;
368 if (n == NULL) {
369 flag_error(self);
370 Py_DECREF(container);
371 return;
372 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000373 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000374 if (v == NULL) {
375 flag_error(self);
376 Py_DECREF(container);
377 Py_DECREF(n);
378 return;
379 }
380 if (self->ordered_attributes) {
381 PyList_SET_ITEM(container, i, n);
382 PyList_SET_ITEM(container, i+1, v);
383 }
384 else if (PyDict_SetItem(container, n, v)) {
385 flag_error(self);
386 Py_DECREF(n);
387 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600388 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000389 return;
390 }
391 else {
392 Py_DECREF(n);
393 Py_DECREF(v);
394 }
395 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000396 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000397 if (args == NULL) {
398 Py_DECREF(container);
399 return;
400 }
Zackery Spytz68def052018-10-19 00:57:38 -0600401 args = Py_BuildValue("(NN)", args, container);
402 if (args == NULL) {
403 return;
404 }
Fred Drake85d835f2001-02-08 15:39:08 +0000405 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000406 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200407 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000408 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000409 self->in_callback = 0;
410 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000411 if (rv == NULL) {
412 flag_error(self);
413 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000414 }
Fred Drake85d835f2001-02-08 15:39:08 +0000415 Py_DECREF(rv);
416 }
417}
418
419#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
420 RETURN, GETUSERDATA) \
421static RC \
422my_##NAME##Handler PARAMS {\
423 xmlparseobject *self = GETUSERDATA ; \
424 PyObject *args = NULL; \
425 PyObject *rv = NULL; \
426 INIT \
427\
Fred Drake71b63ff2002-06-28 22:29:01 +0000428 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200429 if (PyErr_Occurred()) \
430 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000431 if (flush_character_buffer(self) < 0) \
432 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000433 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000434 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000435 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200436 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000437 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000438 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000439 Py_DECREF(args); \
440 if (rv == NULL) { \
441 flag_error(self); \
442 return RETURN; \
443 } \
444 CONVERSION \
445 Py_DECREF(rv); \
446 } \
447 return RETURN; \
448}
449
Fred Drake6f987622000-08-25 18:03:30 +0000450#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
452 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000453
Fred Drake6f987622000-08-25 18:03:30 +0000454#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
456 rc = PyLong_AsLong(rv);, rc, \
457 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000458
Fred Drake71b63ff2002-06-28 22:29:01 +0000459VOID_HANDLER(EndElement,
460 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000461 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000462
Fred Drake6f987622000-08-25 18:03:30 +0000463VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000464 (void *userData,
465 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000466 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000467 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000468
Fred Drake6f987622000-08-25 18:03:30 +0000469VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000470 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000471 const XML_Char *entityName,
472 const XML_Char *base,
473 const XML_Char *systemId,
474 const XML_Char *publicId,
475 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000476 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000477 string_intern(self, entityName), string_intern(self, base),
478 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000479 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000480
Fred Drake85d835f2001-02-08 15:39:08 +0000481VOID_HANDLER(EntityDecl,
482 (void *userData,
483 const XML_Char *entityName,
484 int is_parameter_entity,
485 const XML_Char *value,
486 int value_length,
487 const XML_Char *base,
488 const XML_Char *systemId,
489 const XML_Char *publicId,
490 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000491 ("NiNNNNN",
492 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000493 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000494 string_intern(self, base), string_intern(self, systemId),
495 string_intern(self, publicId),
496 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000497
498VOID_HANDLER(XmlDecl,
499 (void *userData,
500 const XML_Char *version,
501 const XML_Char *encoding,
502 int standalone),
503 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000504 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000505 standalone))
506
507static PyObject *
508conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000509 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000510{
511 PyObject *result = NULL;
512 PyObject *children = PyTuple_New(model->numchildren);
513 int i;
514
515 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000516 assert(model->numchildren < INT_MAX);
517 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000518 PyObject *child = conv_content_model(&model->children[i],
519 conv_string);
520 if (child == NULL) {
521 Py_XDECREF(children);
522 return NULL;
523 }
524 PyTuple_SET_ITEM(children, i, child);
525 }
526 result = Py_BuildValue("(iiO&N)",
527 model->type, model->quant,
528 conv_string,model->name, children);
529 }
530 return result;
531}
532
Fred Drake06dd8cf2003-02-02 03:54:17 +0000533static void
534my_ElementDeclHandler(void *userData,
535 const XML_Char *name,
536 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000537{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000538 xmlparseobject *self = (xmlparseobject *)userData;
539 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000540
Fred Drake06dd8cf2003-02-02 03:54:17 +0000541 if (have_handler(self, ElementDecl)) {
542 PyObject *rv = NULL;
543 PyObject *modelobj, *nameobj;
544
Victor Stinner9e09c262013-07-18 23:17:01 +0200545 if (PyErr_Occurred())
546 return;
547
Fred Drake06dd8cf2003-02-02 03:54:17 +0000548 if (flush_character_buffer(self) < 0)
549 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000550 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000551 if (modelobj == NULL) {
552 flag_error(self);
553 goto finally;
554 }
555 nameobj = string_intern(self, name);
556 if (nameobj == NULL) {
557 Py_DECREF(modelobj);
558 flag_error(self);
559 goto finally;
560 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000561 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000562 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000563 flag_error(self);
564 goto finally;
565 }
566 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200567 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000568 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000569 self->in_callback = 0;
570 if (rv == NULL) {
571 flag_error(self);
572 goto finally;
573 }
574 Py_DECREF(rv);
575 }
576 finally:
577 Py_XDECREF(args);
578 XML_FreeContentModel(self->itself, model);
579 return;
580}
Fred Drake85d835f2001-02-08 15:39:08 +0000581
582VOID_HANDLER(AttlistDecl,
583 (void *userData,
584 const XML_Char *elname,
585 const XML_Char *attname,
586 const XML_Char *att_type,
587 const XML_Char *dflt,
588 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000589 ("(NNO&O&i)",
590 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000591 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000592 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000593
Martin v. Löwisc847f402003-01-21 11:09:21 +0000594#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000595VOID_HANDLER(SkippedEntity,
596 (void *userData,
597 const XML_Char *entityName,
598 int is_parameter_entity),
599 ("Ni",
600 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000601#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000602
Fred Drake71b63ff2002-06-28 22:29:01 +0000603VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 (void *userData,
605 const XML_Char *notationName,
606 const XML_Char *base,
607 const XML_Char *systemId,
608 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000609 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 string_intern(self, notationName), string_intern(self, base),
611 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000612
Fred Drake6f987622000-08-25 18:03:30 +0000613VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 (void *userData,
615 const XML_Char *prefix,
616 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000617 ("(NN)",
618 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000619
Fred Drake6f987622000-08-25 18:03:30 +0000620VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 (void *userData,
622 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000623 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000624
Fred Drake6f987622000-08-25 18:03:30 +0000625VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000627 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000628
Fred Drake6f987622000-08-25 18:03:30 +0000629VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000630 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000632
Fred Drake6f987622000-08-25 18:03:30 +0000633VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000634 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000636
Fred Drake6f987622000-08-25 18:03:30 +0000637VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 (void *userData, const XML_Char *s, int len),
639 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000640
Fred Drake6f987622000-08-25 18:03:30 +0000641VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 (void *userData, const XML_Char *s, int len),
643 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300644#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000645
Fred Drake71b63ff2002-06-28 22:29:01 +0000646INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 (void *userData),
648 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000649
Fred Drake6f987622000-08-25 18:03:30 +0000650RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 (XML_Parser parser,
652 const XML_Char *context,
653 const XML_Char *base,
654 const XML_Char *systemId,
655 const XML_Char *publicId),
656 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000657 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 conv_string_to_unicode ,context, string_intern(self, base),
659 string_intern(self, systemId), string_intern(self, publicId)),
660 rc = PyLong_AsLong(rv);, rc,
661 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000662
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000663/* XXX UnknownEncodingHandler */
664
Fred Drake85d835f2001-02-08 15:39:08 +0000665VOID_HANDLER(StartDoctypeDecl,
666 (void *userData, const XML_Char *doctypeName,
667 const XML_Char *sysid, const XML_Char *pubid,
668 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000669 ("(NNNi)", string_intern(self, doctypeName),
670 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000671 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000672
673VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000674
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000675/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400676/*[clinic input]
677class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
678[clinic start generated code]*/
679/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
680
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000681
Fred Drake71b63ff2002-06-28 22:29:01 +0000682static PyObject *
683get_parse_result(xmlparseobject *self, int rv)
684{
685 if (PyErr_Occurred()) {
686 return NULL;
687 }
688 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000689 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000690 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000691 if (flush_character_buffer(self) < 0) {
692 return NULL;
693 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000694 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000695}
696
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200697#define MAX_CHUNK_SIZE (1 << 20)
698
Brett Cannond0aeda82014-08-22 14:23:20 -0400699/*[clinic input]
700pyexpat.xmlparser.Parse
701
702 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200703 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400704 /
705
706Parse XML data.
707
708`isfinal' should be true at end of input.
709[clinic start generated code]*/
710
Brett Cannond0aeda82014-08-22 14:23:20 -0400711static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400712pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300713 int isfinal)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200714/*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400715{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200716 const char *s;
717 Py_ssize_t slen;
718 Py_buffer view;
719 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200721 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200722 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200723 s = PyUnicode_AsUTF8AndSize(data, &slen);
724 if (s == NULL)
725 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200726 /* Explicitly set UTF-8 encoding. Return code ignored. */
727 (void)XML_SetEncoding(self->itself, "utf-8");
728 }
729 else {
730 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
731 return NULL;
732 s = view.buf;
733 slen = view.len;
734 }
735
736 while (slen > MAX_CHUNK_SIZE) {
737 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
738 if (!rc)
739 goto done;
740 s += MAX_CHUNK_SIZE;
741 slen -= MAX_CHUNK_SIZE;
742 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200743 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
744 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300745 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200746
747done:
748 if (view.buf != NULL)
749 PyBuffer_Release(&view);
750 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000751}
752
Fred Drakeca1f4262000-09-21 20:10:23 +0000753/* File reading copied from cPickle */
754
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000755#define BUF_SIZE 2048
756
Fred Drake0582df92000-07-12 04:49:00 +0000757static int
758readinst(char *buf, int buf_size, PyObject *meth)
759{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000760 PyObject *str;
761 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200762 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000764 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000765 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000766 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(str))
769 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000770 else if (PyByteArray_Check(str))
771 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000773 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000774 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000775 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000776 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000777 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000778 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000779 if (len > buf_size) {
780 PyErr_Format(PyExc_ValueError,
781 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000782 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000783 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000784 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000785 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000787 Py_DECREF(str);
788 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000789 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000790
791error:
792 Py_XDECREF(str);
793 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000794}
795
Brett Cannond0aeda82014-08-22 14:23:20 -0400796/*[clinic input]
797pyexpat.xmlparser.ParseFile
798
799 file: object
800 /
801
802Parse XML data from file-like object.
803[clinic start generated code]*/
804
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805static PyObject *
Brett Cannond0aeda82014-08-22 14:23:20 -0400806pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300807/*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000808{
Fred Drake0582df92000-07-12 04:49:00 +0000809 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000810 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200811 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000812
Serhiy Storchaka41c57b32019-09-01 12:03:39 +0300813 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
814 return NULL;
815 }
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000816 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000817 PyErr_SetString(PyExc_TypeError,
818 "argument must have 'read' attribute");
819 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000820 }
821 for (;;) {
822 int bytes_read;
823 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000824 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000825 Py_XDECREF(readmethod);
Ned Deilye7d532f2014-03-27 16:39:58 -0700826 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000827 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000828
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000829 bytes_read = readinst(buf, BUF_SIZE, readmethod);
830 if (bytes_read < 0) {
831 Py_DECREF(readmethod);
832 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000833 }
834 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000835 if (PyErr_Occurred()) {
836 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000837 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000838 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000839
Fred Drake0582df92000-07-12 04:49:00 +0000840 if (!rv || bytes_read == 0)
841 break;
842 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000843 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000844 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845}
846
Brett Cannond0aeda82014-08-22 14:23:20 -0400847/*[clinic input]
848pyexpat.xmlparser.SetBase
849
850 base: str
851 /
852
853Set the base URL for the parser.
854[clinic start generated code]*/
855
Brett Cannond0aeda82014-08-22 14:23:20 -0400856static PyObject *
857pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300858/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400859{
Fred Drake0582df92000-07-12 04:49:00 +0000860 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000862 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400863 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000864}
865
Brett Cannond0aeda82014-08-22 14:23:20 -0400866/*[clinic input]
867pyexpat.xmlparser.GetBase
868
869Return base URL string for the parser.
870[clinic start generated code]*/
871
Brett Cannond0aeda82014-08-22 14:23:20 -0400872static PyObject *
873pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300874/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000875{
Fred Drake0582df92000-07-12 04:49:00 +0000876 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000877}
878
Brett Cannond0aeda82014-08-22 14:23:20 -0400879/*[clinic input]
880pyexpat.xmlparser.GetInputContext
881
882Return the untranslated text of the input that caused the current event.
883
884If the event was generated by a large amount of text (such as a start tag
885for an element with many attributes), not all of the text may be available.
886[clinic start generated code]*/
887
Brett Cannond0aeda82014-08-22 14:23:20 -0400888static PyObject *
889pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300890/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000891{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000892 if (self->in_callback) {
893 int offset, size;
894 const char *buffer
895 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000896
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000897 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000898 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000899 size - offset);
900 else
901 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000902 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000903 else
904 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000905}
Fred Drakebd6101c2001-02-14 18:29:45 +0000906
Brett Cannond0aeda82014-08-22 14:23:20 -0400907/*[clinic input]
908pyexpat.xmlparser.ExternalEntityParserCreate
909
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700910 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400911 encoding: str = NULL
912 /
913
914Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
915[clinic start generated code]*/
916
Brett Cannond0aeda82014-08-22 14:23:20 -0400917static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400918pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
919 const char *context,
920 const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700921/*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400922{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000923 xmlparseobject *new_parser;
924 int i;
925
Martin v. Löwis894258c2001-09-23 10:20:10 +0000926 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000927 if (new_parser == NULL)
928 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000929 new_parser->buffer_size = self->buffer_size;
930 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000931 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000932 new_parser->ordered_attributes = self->ordered_attributes;
933 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000934 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000935 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000936 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000938 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000939 new_parser->intern = self->intern;
940 Py_XINCREF(new_parser->intern);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000941
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000942 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200943 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000944 if (new_parser->buffer == NULL) {
945 Py_DECREF(new_parser);
946 return PyErr_NoMemory();
947 }
948 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000949 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000950 Py_DECREF(new_parser);
951 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000952 }
953
954 XML_SetUserData(new_parser->itself, (void *)new_parser);
955
956 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000957 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000958 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000959
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200960 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000961 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000962 Py_DECREF(new_parser);
963 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000964 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000965 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000966
967 /* then copy handlers from self */
968 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000969 PyObject *handler = self->handlers[i];
970 if (handler != NULL) {
971 Py_INCREF(handler);
972 new_parser->handlers[i] = handler;
973 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000974 handler_info[i].handler);
975 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000976 }
Victor Stinner1b184552019-10-08 00:09:31 +0200977
978 PyObject_GC_Track(new_parser);
Fred Drake71b63ff2002-06-28 22:29:01 +0000979 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000980}
981
Brett Cannond0aeda82014-08-22 14:23:20 -0400982/*[clinic input]
983pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000984
Brett Cannond0aeda82014-08-22 14:23:20 -0400985 flag: int
986 /
987
988Controls parsing of parameter entities (including the external DTD subset).
989
990Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
991XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
992XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
993was successful.
994[clinic start generated code]*/
995
Brett Cannond0aeda82014-08-22 14:23:20 -0400996static PyObject *
997pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300998/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400999{
1000 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001001 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001002}
1003
Martin v. Löwisc847f402003-01-21 11:09:21 +00001004
1005#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001006/*[clinic input]
1007pyexpat.xmlparser.UseForeignDTD
1008
1009 flag: bool = True
1010 /
1011
1012Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1013
1014This readily allows the use of a 'default' document type controlled by the
1015application, while still getting the advantage of providing document type
1016information to the parser. 'flag' defaults to True if not provided.
1017[clinic start generated code]*/
1018
Brett Cannond0aeda82014-08-22 14:23:20 -04001019static PyObject *
1020pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001021/*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001022{
Martin v. Löwis069dde22003-01-21 10:58:18 +00001023 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001024
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001025 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001026 if (rc != XML_ERROR_NONE) {
1027 return set_error(self, rc);
1028 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001029 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001030}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001031#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001032
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001033static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001034 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1035 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1036 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1037 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1038 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1039 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1040 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001041#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001042 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001043#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001044 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001045};
1046
1047/* ---------- */
1048
1049
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001050
Fred Drake71b63ff2002-06-28 22:29:01 +00001051/* pyexpat international encoding support.
1052 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001053*/
1054
Fred Drake71b63ff2002-06-28 22:29:01 +00001055static int
1056PyUnknownEncodingHandler(void *encodingHandlerData,
1057 const XML_Char *name,
1058 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001059{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001060 static unsigned char template_buffer[256] = {0};
1061 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001062 int i;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001063 const void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001064 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001065
Victor Stinner9e09c262013-07-18 23:17:01 +02001066 if (PyErr_Occurred())
1067 return XML_STATUS_ERROR;
1068
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001069 if (template_buffer[1] == 0) {
1070 for (i = 0; i < 256; i++)
1071 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001072 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001073
1074 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001075 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001076 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001077 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001078 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001079
1080 if (PyUnicode_GET_LENGTH(u) != 256) {
1081 Py_DECREF(u);
1082 PyErr_SetString(PyExc_ValueError,
1083 "multi-byte encodings are not supported");
1084 return XML_STATUS_ERROR;
1085 }
1086
1087 kind = PyUnicode_KIND(u);
1088 data = PyUnicode_DATA(u);
1089 for (i = 0; i < 256; i++) {
1090 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1091 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1092 info->map[i] = ch;
1093 else
1094 info->map[i] = -1;
1095 }
1096
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001097 info->data = NULL;
1098 info->convert = NULL;
1099 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001100 Py_DECREF(u);
1101
1102 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001103}
1104
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001105
1106static PyObject *
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001107newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001108{
1109 int i;
1110 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001111
Martin v. Löwis894258c2001-09-23 10:20:10 +00001112 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001113 if (self == NULL)
1114 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001115
Fred Drake2a3d7db2002-06-28 22:56:48 +00001116 self->buffer = NULL;
1117 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1118 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001119 self->ordered_attributes = 0;
1120 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001121 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001122 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001123 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001124 self->intern = intern;
1125 Py_XINCREF(self->intern);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001126
Christian Heimesfa535f52013-07-07 17:35:11 +02001127 /* namespace_separator is either NULL or contains one char + \0 */
1128 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1129 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001130 if (self->itself == NULL) {
1131 PyErr_SetString(PyExc_RuntimeError,
1132 "XML_ParserCreate failed");
1133 Py_DECREF(self);
1134 return NULL;
1135 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001136#if XML_COMBINED_VERSION >= 20100
1137 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001138 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001139 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001140#endif
Fred Drake0582df92000-07-12 04:49:00 +00001141 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001142 XML_SetUnknownEncodingHandler(self->itself,
1143 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001144
Fred Drake2a3d7db2002-06-28 22:56:48 +00001145 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001146 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001147
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001148 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001149 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001150 Py_DECREF(self);
1151 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001152 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001153 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001154
Victor Stinner1b184552019-10-08 00:09:31 +02001155 PyObject_GC_Track(self);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001156 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001157}
1158
1159
1160static void
Fred Drake0582df92000-07-12 04:49:00 +00001161xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001162{
Fred Drake0582df92000-07-12 04:49:00 +00001163 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001164 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001165 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001166 XML_ParserFree(self->itself);
1167 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001168
Fred Drake85d835f2001-02-08 15:39:08 +00001169 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001170 for (i = 0; handler_info[i].name != NULL; i++)
1171 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001172 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001173 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001174 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001175 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001176 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001177 self->buffer = NULL;
1178 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001179 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001180 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001181}
1182
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001183
1184static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001185xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001186{
Victor Stinner28f468c2018-11-22 13:21:43 +01001187 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1188 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001189 PyObject *result = self->handlers[handlernum];
1190 if (result == NULL)
1191 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001192 Py_INCREF(result);
1193 return result;
1194}
1195
Fred Drake6f987622000-08-25 18:03:30 +00001196static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001197xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001198{
Victor Stinner28f468c2018-11-22 13:21:43 +01001199 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1200 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001201 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001202 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1203 return -1;
1204 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001205 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001206 /* If we're changing the character data handler, flush all
1207 * cached data with the old handler. Not sure there's a
1208 * "right" thing to do, though, but this probably won't
1209 * happen.
1210 */
1211 if (flush_character_buffer(self) < 0)
1212 return -1;
1213 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001214
1215 xmlhandler c_handler = NULL;
1216 if (v == Py_None) {
1217 /* If this is the character data handler, and a character
1218 data handler is already active, we need to be more
1219 careful. What we can safely do is replace the existing
1220 character data handler callback function with a no-op
1221 function that will refuse to call Python. The downside
1222 is that this doesn't completely remove the character
1223 data handler from the C layer if there's any callback
1224 active, so Expat does a little more work than it
1225 otherwise would, but that's really an odd case. A more
1226 elaborate system of handlers and state could remove the
1227 C handler more effectively. */
1228 if (handlernum == CharacterData && self->in_callback)
1229 c_handler = noop_character_data_handler;
1230 v = NULL;
1231 }
1232 else if (v != NULL) {
1233 Py_INCREF(v);
1234 c_handler = handler_info[handlernum].handler;
1235 }
1236 Py_XSETREF(self->handlers[handlernum], v);
1237 handler_info[handlernum].setter(self->itself, c_handler);
1238 return 0;
1239}
1240
1241#define INT_GETTER(name) \
1242 static PyObject * \
1243 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1244 { \
1245 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1246 }
1247INT_GETTER(ErrorCode)
1248INT_GETTER(ErrorLineNumber)
1249INT_GETTER(ErrorColumnNumber)
1250INT_GETTER(ErrorByteIndex)
1251INT_GETTER(CurrentLineNumber)
1252INT_GETTER(CurrentColumnNumber)
1253INT_GETTER(CurrentByteIndex)
1254
1255#undef INT_GETTER
1256
1257static PyObject *
1258xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1259{
1260 return PyBool_FromLong(self->buffer != NULL);
1261}
1262
1263static int
1264xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1265{
1266 if (v == NULL) {
1267 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1268 return -1;
1269 }
1270 int b = PyObject_IsTrue(v);
1271 if (b < 0)
1272 return -1;
1273 if (b) {
1274 if (self->buffer == NULL) {
1275 self->buffer = PyMem_Malloc(self->buffer_size);
1276 if (self->buffer == NULL) {
1277 PyErr_NoMemory();
1278 return -1;
1279 }
1280 self->buffer_used = 0;
1281 }
1282 }
1283 else if (self->buffer != NULL) {
1284 if (flush_character_buffer(self) < 0)
1285 return -1;
1286 PyMem_Free(self->buffer);
1287 self->buffer = NULL;
1288 }
1289 return 0;
1290}
1291
1292static PyObject *
1293xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1294{
1295 return PyLong_FromLong((long) self->buffer_size);
1296}
1297
1298static int
1299xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1300{
1301 if (v == NULL) {
1302 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1303 return -1;
1304 }
1305 long new_buffer_size;
1306 if (!PyLong_Check(v)) {
1307 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1308 return -1;
1309 }
1310
1311 new_buffer_size = PyLong_AsLong(v);
1312 if (new_buffer_size <= 0) {
1313 if (!PyErr_Occurred())
1314 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1315 return -1;
1316 }
1317
1318 /* trivial case -- no change */
1319 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001320 return 0;
1321 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001322
1323 /* check maximum */
1324 if (new_buffer_size > INT_MAX) {
1325 char errmsg[100];
1326 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1327 PyErr_SetString(PyExc_ValueError, errmsg);
1328 return -1;
1329 }
1330
1331 if (self->buffer != NULL) {
1332 /* there is already a buffer */
1333 if (self->buffer_used != 0) {
1334 if (flush_character_buffer(self) < 0) {
1335 return -1;
1336 }
1337 }
1338 /* free existing buffer */
1339 PyMem_Free(self->buffer);
1340 }
1341 self->buffer = PyMem_Malloc(new_buffer_size);
1342 if (self->buffer == NULL) {
1343 PyErr_NoMemory();
1344 return -1;
1345 }
1346 self->buffer_size = new_buffer_size;
1347 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001348}
1349
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001350static PyObject *
1351xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1352{
1353 return PyLong_FromLong((long) self->buffer_used);
1354}
1355
1356static PyObject *
1357xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1358{
1359 return PyBool_FromLong(self->ns_prefixes);
1360}
1361
1362static int
1363xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1364{
1365 if (v == NULL) {
1366 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1367 return -1;
1368 }
1369 int b = PyObject_IsTrue(v);
1370 if (b < 0)
1371 return -1;
1372 self->ns_prefixes = b;
1373 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1374 return 0;
1375}
1376
1377static PyObject *
1378xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1379{
1380 return PyBool_FromLong(self->ordered_attributes);
1381}
1382
1383static int
1384xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1385{
1386 if (v == NULL) {
1387 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1388 return -1;
1389 }
1390 int b = PyObject_IsTrue(v);
1391 if (b < 0)
1392 return -1;
1393 self->ordered_attributes = b;
1394 return 0;
1395}
1396
1397static PyObject *
1398xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1399{
1400 return PyBool_FromLong((long) self->specified_attributes);
1401}
1402
1403static int
1404xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1405{
1406 if (v == NULL) {
1407 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1408 return -1;
1409 }
1410 int b = PyObject_IsTrue(v);
1411 if (b < 0)
1412 return -1;
1413 self->specified_attributes = b;
1414 return 0;
1415}
1416
1417static PyMemberDef xmlparse_members[] = {
1418 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1419 {NULL}
1420};
1421
1422#define XMLPARSE_GETTER_DEF(name) \
1423 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1424#define XMLPARSE_GETTER_SETTER_DEF(name) \
1425 {#name, (getter)xmlparse_##name##_getter, \
1426 (setter)xmlparse_##name##_setter, NULL},
1427
1428static PyGetSetDef xmlparse_getsetlist[] = {
1429 XMLPARSE_GETTER_DEF(ErrorCode)
1430 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1431 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1432 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1433 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1434 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1435 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1436 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1437 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1438 XMLPARSE_GETTER_DEF(buffer_used)
1439 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1440 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1441 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1442 {NULL},
1443};
1444
1445#undef XMLPARSE_GETTER_DEF
1446#undef XMLPARSE_GETTER_SETTER_DEF
1447
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001448static int
1449xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1450{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451 int i;
1452 for (i = 0; handler_info[i].name != NULL; i++)
1453 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001454 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001455}
1456
1457static int
1458xmlparse_clear(xmlparseobject *op)
1459{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001460 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001461 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001462 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001463}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001464
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001465PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001466
1467static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 PyVarObject_HEAD_INIT(NULL, 0)
1469 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001470 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 0, /*tp_itemsize*/
1472 /* methods */
1473 (destructor)xmlparse_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001474 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001476 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001477 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 (reprfunc)0, /*tp_repr*/
1479 0, /*tp_as_number*/
1480 0, /*tp_as_sequence*/
1481 0, /*tp_as_mapping*/
1482 (hashfunc)0, /*tp_hash*/
1483 (ternaryfunc)0, /*tp_call*/
1484 (reprfunc)0, /*tp_str*/
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001485 (getattrofunc)0, /* tp_getattro */
1486 (setattrofunc)0, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1490 (traverseproc)xmlparse_traverse, /* tp_traverse */
1491 (inquiry)xmlparse_clear, /* tp_clear */
1492 0, /* tp_richcompare */
1493 0, /* tp_weaklistoffset */
1494 0, /* tp_iter */
1495 0, /* tp_iternext */
1496 xmlparse_methods, /* tp_methods */
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001497 xmlparse_members, /* tp_members */
1498 xmlparse_getsetlist, /* tp_getset */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001499};
1500
1501/* End of code for xmlparser objects */
1502/* -------------------------------------------------------- */
1503
Brett Cannond0aeda82014-08-22 14:23:20 -04001504/*[clinic input]
1505pyexpat.ParserCreate
1506
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001507 encoding: str(accept={str, NoneType}) = None
1508 namespace_separator: str(accept={str, NoneType}) = None
Brett Cannond0aeda82014-08-22 14:23:20 -04001509 intern: object = NULL
1510
1511Return a new XML parser object.
1512[clinic start generated code]*/
1513
Brett Cannond0aeda82014-08-22 14:23:20 -04001514static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001515pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001516 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001517/*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001518{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001519 PyObject *result;
1520 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001521
Fred Drakecde79132001-04-25 16:01:30 +00001522 if (namespace_separator != NULL
1523 && strlen(namespace_separator) > 1) {
1524 PyErr_SetString(PyExc_ValueError,
1525 "namespace_separator must be at most one"
1526 " character, omitted, or None");
1527 return NULL;
1528 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001529 /* Explicitly passing None means no interning is desired.
1530 Not passing anything means that a new dictionary is used. */
1531 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001533 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 intern = PyDict_New();
1535 if (!intern)
1536 return NULL;
1537 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001538 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001539 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1541 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001542 }
1543
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001544 result = newxmlparseobject(encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001545 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001547 }
1548 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001549}
1550
Brett Cannond0aeda82014-08-22 14:23:20 -04001551/*[clinic input]
1552pyexpat.ErrorString
1553
1554 code: long
1555 /
1556
1557Returns string error for given number.
1558[clinic start generated code]*/
1559
Brett Cannond0aeda82014-08-22 14:23:20 -04001560static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001561pyexpat_ErrorString_impl(PyObject *module, long code)
1562/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001563{
Fred Drake0582df92000-07-12 04:49:00 +00001564 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001565}
1566
1567/* List of methods defined in the module */
1568
1569static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001570 PYEXPAT_PARSERCREATE_METHODDEF
1571 PYEXPAT_ERRORSTRING_METHODDEF
1572 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001573};
1574
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001575/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001576
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001577PyDoc_STRVAR(pyexpat_module_documentation,
1578"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001579
Fred Drakecde79132001-04-25 16:01:30 +00001580/* Initialization function for the module */
1581
1582#ifndef MODULE_NAME
1583#define MODULE_NAME "pyexpat"
1584#endif
1585
1586#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001587#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001588#endif
1589
Martin v. Löwis1a214512008-06-11 05:26:20 +00001590static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 PyModuleDef_HEAD_INIT,
1592 MODULE_NAME,
1593 pyexpat_module_documentation,
1594 -1,
1595 pyexpat_methods,
1596 NULL,
1597 NULL,
1598 NULL,
1599 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001600};
1601
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001602static int init_handler_descrs(void)
1603{
1604 int i;
1605 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1606 for (i = 0; handler_info[i].name != NULL; i++) {
1607 struct HandlerInfo *hi = &handler_info[i];
1608 hi->getset.name = hi->name;
1609 hi->getset.get = (getter)xmlparse_handler_getter;
1610 hi->getset.set = (setter)xmlparse_handler_setter;
1611 hi->getset.closure = &handler_info[i];
1612
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001613 PyObject *descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001614 if (descr == NULL)
1615 return -1;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001616
1617 if (PyDict_GetItemWithError(Xmlparsetype.tp_dict, PyDescr_NAME(descr))) {
1618 Py_DECREF(descr);
1619 continue;
1620 }
1621 else if (PyErr_Occurred()) {
1622 Py_DECREF(descr);
1623 return -1;
1624 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001625 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1626 Py_DECREF(descr);
1627 return -1;
1628 }
1629 Py_DECREF(descr);
1630 }
1631 return 0;
1632}
1633
Martin v. Löwis069dde22003-01-21 10:58:18 +00001634PyMODINIT_FUNC
1635MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001636{
1637 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001638 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001639 PyObject *errors_module;
1640 PyObject *modelmod_name;
1641 PyObject *model_module;
Georg Brandlb4dac712010-10-15 14:46:48 +00001642 PyObject *tmpnum, *tmpstr;
1643 PyObject *codes_dict;
1644 PyObject *rev_codes_dict;
1645 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001646 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001647 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001648
Fred Drake6f987622000-08-25 18:03:30 +00001649 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001650 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001651 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001652 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001653 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001654
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001655 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001657
Fred Drake0582df92000-07-12 04:49:00 +00001658 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001659 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001660 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001662
Fred Drake0582df92000-07-12 04:49:00 +00001663 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001664 if (ErrorObject == NULL) {
1665 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001666 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001667 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001668 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001669 }
1670 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001671 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001672 Py_INCREF(ErrorObject);
1673 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001674 Py_INCREF(&Xmlparsetype);
1675 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001676
Fred Drake738293d2000-12-21 17:25:07 +00001677 PyModule_AddStringConstant(m, "EXPAT_VERSION",
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001678 XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001679 {
1680 XML_Expat_Version info = XML_ExpatVersionInfo();
1681 PyModule_AddObject(m, "version_info",
1682 Py_BuildValue("(iii)", info.major,
1683 info.minor, info.micro));
1684 }
Fred Drake0582df92000-07-12 04:49:00 +00001685 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001686 compiled, this should check and set native_encoding
1687 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001688 */
Fred Drake93adb692000-09-23 04:55:48 +00001689 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001690
Fred Drake93adb692000-09-23 04:55:48 +00001691 d = PyModule_GetDict(m);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001692 if (d == NULL) {
1693 Py_DECREF(m);
1694 return NULL;
1695 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001696 errors_module = PyDict_GetItemWithError(d, errmod_name);
1697 if (errors_module == NULL && !PyErr_Occurred()) {
Fred Drakecde79132001-04-25 16:01:30 +00001698 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001699 if (errors_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001700 _PyImport_SetModule(errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001701 /* gives away the reference to errors_module */
1702 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001703 }
1704 }
Fred Drake6f987622000-08-25 18:03:30 +00001705 Py_DECREF(errmod_name);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001706 model_module = PyDict_GetItemWithError(d, modelmod_name);
1707 if (model_module == NULL && !PyErr_Occurred()) {
Fred Drakecde79132001-04-25 16:01:30 +00001708 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001709 if (model_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001710 _PyImport_SetModule(modelmod_name, model_module);
Fred Drake85d835f2001-02-08 15:39:08 +00001711 /* gives away the reference to model_module */
1712 PyModule_AddObject(m, "model", model_module);
1713 }
1714 }
1715 Py_DECREF(modelmod_name);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001716 if (errors_module == NULL || model_module == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +00001717 /* Don't core dump later! */
Christian Heimes7a5457b2016-09-09 00:13:35 +02001718 Py_DECREF(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001719 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001720 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721
Martin v. Löwisc847f402003-01-21 11:09:21 +00001722#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001723 {
1724 const XML_Feature *features = XML_GetFeatureList();
1725 PyObject *list = PyList_New(0);
1726 if (list == NULL)
1727 /* just ignore it */
1728 PyErr_Clear();
1729 else {
1730 int i = 0;
1731 for (; features[i].feature != XML_FEATURE_END; ++i) {
1732 int ok;
1733 PyObject *item = Py_BuildValue("si", features[i].name,
1734 features[i].value);
1735 if (item == NULL) {
1736 Py_DECREF(list);
1737 list = NULL;
1738 break;
1739 }
1740 ok = PyList_Append(list, item);
1741 Py_DECREF(item);
1742 if (ok < 0) {
1743 PyErr_Clear();
1744 break;
1745 }
1746 }
1747 if (list != NULL)
1748 PyModule_AddObject(m, "features", list);
1749 }
1750 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001751#endif
Fred Drake6f987622000-08-25 18:03:30 +00001752
Georg Brandlb4dac712010-10-15 14:46:48 +00001753 codes_dict = PyDict_New();
1754 rev_codes_dict = PyDict_New();
1755 if (codes_dict == NULL || rev_codes_dict == NULL) {
1756 Py_XDECREF(codes_dict);
1757 Py_XDECREF(rev_codes_dict);
1758 return NULL;
1759 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001760
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001761#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001762 if (PyModule_AddStringConstant(errors_module, #name, \
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001763 XML_ErrorString(name)) < 0) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001764 return NULL; \
1765 tmpnum = PyLong_FromLong(name); \
1766 if (tmpnum == NULL) return NULL; \
1767 res = PyDict_SetItemString(codes_dict, \
1768 XML_ErrorString(name), tmpnum); \
1769 if (res < 0) return NULL; \
1770 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1771 if (tmpstr == NULL) return NULL; \
1772 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1773 Py_DECREF(tmpstr); \
1774 Py_DECREF(tmpnum); \
1775 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001776
Fred Drake0582df92000-07-12 04:49:00 +00001777 MYCONST(XML_ERROR_NO_MEMORY);
1778 MYCONST(XML_ERROR_SYNTAX);
1779 MYCONST(XML_ERROR_NO_ELEMENTS);
1780 MYCONST(XML_ERROR_INVALID_TOKEN);
1781 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1782 MYCONST(XML_ERROR_PARTIAL_CHAR);
1783 MYCONST(XML_ERROR_TAG_MISMATCH);
1784 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1785 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1786 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1787 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1788 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1789 MYCONST(XML_ERROR_ASYNC_ENTITY);
1790 MYCONST(XML_ERROR_BAD_CHAR_REF);
1791 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1792 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1793 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1794 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1795 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001796 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1797 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1798 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001799 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1800 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1801 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1802 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1803 /* Added in Expat 1.95.7. */
1804 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1805 /* Added in Expat 1.95.8. */
1806 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1807 MYCONST(XML_ERROR_INCOMPLETE_PE);
1808 MYCONST(XML_ERROR_XML_DECL);
1809 MYCONST(XML_ERROR_TEXT_DECL);
1810 MYCONST(XML_ERROR_PUBLICID);
1811 MYCONST(XML_ERROR_SUSPENDED);
1812 MYCONST(XML_ERROR_NOT_SUSPENDED);
1813 MYCONST(XML_ERROR_ABORTED);
1814 MYCONST(XML_ERROR_FINISHED);
1815 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001816
Georg Brandlb4dac712010-10-15 14:46:48 +00001817 if (PyModule_AddStringConstant(errors_module, "__doc__",
1818 "Constants used to describe "
1819 "error conditions.") < 0)
1820 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001821
Georg Brandlb4dac712010-10-15 14:46:48 +00001822 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1823 return NULL;
1824 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1825 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001826
Fred Drake93adb692000-09-23 04:55:48 +00001827#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001828
Fred Drake85d835f2001-02-08 15:39:08 +00001829#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001830 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1831 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1832 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001833#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001834
Fred Drake85d835f2001-02-08 15:39:08 +00001835#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1836 PyModule_AddStringConstant(model_module, "__doc__",
1837 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001838
Fred Drake85d835f2001-02-08 15:39:08 +00001839 MYCONST(XML_CTYPE_EMPTY);
1840 MYCONST(XML_CTYPE_ANY);
1841 MYCONST(XML_CTYPE_MIXED);
1842 MYCONST(XML_CTYPE_NAME);
1843 MYCONST(XML_CTYPE_CHOICE);
1844 MYCONST(XML_CTYPE_SEQ);
1845
1846 MYCONST(XML_CQUANT_NONE);
1847 MYCONST(XML_CQUANT_OPT);
1848 MYCONST(XML_CQUANT_REP);
1849 MYCONST(XML_CQUANT_PLUS);
1850#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001851
1852 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001853 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001854 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001855 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1856 capi.MINOR_VERSION = XML_MINOR_VERSION;
1857 capi.MICRO_VERSION = XML_MICRO_VERSION;
1858 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001859 capi.GetErrorCode = XML_GetErrorCode;
1860 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1861 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001862 capi.Parse = XML_Parse;
1863 capi.ParserCreate_MM = XML_ParserCreate_MM;
1864 capi.ParserFree = XML_ParserFree;
1865 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1866 capi.SetCommentHandler = XML_SetCommentHandler;
1867 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1868 capi.SetElementHandler = XML_SetElementHandler;
1869 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1870 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1871 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1872 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001873 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001874 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001875 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001876#if XML_COMBINED_VERSION >= 20100
1877 capi.SetHashSalt = XML_SetHashSalt;
1878#else
1879 capi.SetHashSalt = NULL;
1880#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881
Benjamin Petersonb173f782009-05-05 22:31:58 +00001882 /* export using capsule */
1883 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001884 if (capi_object)
1885 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001886 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001887}
1888
Fred Drake6f987622000-08-25 18:03:30 +00001889static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001890clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001891{
Fred Drakecde79132001-04-25 16:01:30 +00001892 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001893
Fred Drake71b63ff2002-06-28 22:29:01 +00001894 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001895 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 self->handlers[i] = NULL;
1897 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001898 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001900 }
Fred Drakecde79132001-04-25 16:01:30 +00001901 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001902}
1903
Tim Peters0c322792002-07-17 16:49:03 +00001904static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001905
1906#define HANDLER_INFO(name) \
1907 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1908
1909 HANDLER_INFO(StartElementHandler)
1910 HANDLER_INFO(EndElementHandler)
1911 HANDLER_INFO(ProcessingInstructionHandler)
1912 HANDLER_INFO(CharacterDataHandler)
1913 HANDLER_INFO(UnparsedEntityDeclHandler)
1914 HANDLER_INFO(NotationDeclHandler)
1915 HANDLER_INFO(StartNamespaceDeclHandler)
1916 HANDLER_INFO(EndNamespaceDeclHandler)
1917 HANDLER_INFO(CommentHandler)
1918 HANDLER_INFO(StartCdataSectionHandler)
1919 HANDLER_INFO(EndCdataSectionHandler)
1920 HANDLER_INFO(DefaultHandler)
1921 HANDLER_INFO(DefaultHandlerExpand)
1922 HANDLER_INFO(NotStandaloneHandler)
1923 HANDLER_INFO(ExternalEntityRefHandler)
1924 HANDLER_INFO(StartDoctypeDeclHandler)
1925 HANDLER_INFO(EndDoctypeDeclHandler)
1926 HANDLER_INFO(EntityDeclHandler)
1927 HANDLER_INFO(XmlDeclHandler)
1928 HANDLER_INFO(ElementDeclHandler)
1929 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001930#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001931 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001932#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001933
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001934#undef HANDLER_INFO
1935
Fred Drake0582df92000-07-12 04:49:00 +00001936 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001937};