blob: d9123354bbed2b4a7b7f6c160b12adc463ff5728 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Serhiy Storchaka55f82492018-10-19 18:00:51 +03004#include "structmember.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00005#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Fredrik Lundhc3345042005-12-13 19:49:55 +00008#include "pyexpat.h"
9
Brett Cannond0aeda82014-08-22 14:23:20 -040010/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
Martin v. Löwisc847f402003-01-21 11:09:21 +000017#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
Christian Heimesfa535f52013-07-07 17:35:11 +020019static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
Fred Drake0582df92000-07-12 04:49:00 +000022enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000037 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000040 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000044#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000045 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000046#endif
Fred Drake85d835f2001-02-08 15:39:08 +000047 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000048};
49
50static PyObject *ErrorObject;
51
52/* ----------------------------------------------------- */
53
54/* Declarations for objects of type xmlparser */
55
56typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000057 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058
Fred Drake0582df92000-07-12 04:49:00 +000059 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000060 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000062 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000063 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000064 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000068 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000069 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000070} xmlparseobject;
71
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030072#include "clinic/pyexpat.c.h"
73
Fred Drake2a3d7db2002-06-28 22:56:48 +000074#define CHARACTER_DATA_BUFFER_SIZE 8192
75
Jeremy Hylton938ace62002-07-17 16:30:39 +000076static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000077
Fred Drake117ac852002-09-24 16:24:54 +000078typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079typedef void* xmlhandler;
80
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000081struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000082 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
Serhiy Storchaka55f82492018-10-19 18:00:51 +030085 PyGetSetDef getset;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000086};
87
Jeremy Hylton938ace62002-07-17 16:30:39 +000088static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089
Fred Drakebd6101c2001-02-14 18:29:45 +000090/* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +020094set_error_attr(PyObject *err, const char *name, int value)
Fred Drakebd6101c2001-02-14 18:29:45 +000095{
Christian Heimes217cfd12007-12-02 14:31:20 +000096 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000097
Neal Norwitz2f5e9902006-03-08 06:36:45 +000098 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000100 return 0;
101 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000102 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000103 return 1;
104}
105
106/* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
Fred Drake85d835f2001-02-08 15:39:08 +0000109static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000110set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000111{
112 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100113 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000114 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000117
Victor Stinner499dfcf2011-03-21 13:26:24 +0100118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200122 err = _PyObject_CallOneArg(ErrorObject, buffer);
Victor Stinner499dfcf2011-03-21 13:26:24 +0100123 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000129 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000130 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000131 return NULL;
132}
133
Fred Drake71b63ff2002-06-28 22:29:01 +0000134static int
135have_handler(xmlparseobject *self, int type)
136{
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139}
140
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000141/* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
Fred Drake0582df92000-07-12 04:49:00 +0000144static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000145conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000146{
Fred Drake71b63ff2002-06-28 22:29:01 +0000147 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200151 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000152 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000154}
155
Fred Drake0582df92000-07-12 04:49:00 +0000156static PyObject *
157conv_string_len_to_unicode(const XML_Char *str, int len)
158{
Fred Drake71b63ff2002-06-28 22:29:01 +0000159 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200163 Py_RETURN_NONE;
Fred Drake0582df92000-07-12 04:49:00 +0000164 }
Fred Drake6f987622000-08-25 18:03:30 +0000165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000166}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000167
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000168/* Callback routines */
169
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000170static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000171
Martin v. Löwis069dde22003-01-21 10:58:18 +0000172/* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175static int
176error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181{
182 return 0;
183}
184
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185/* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193{
194 /* Do nothing. */
195}
196
Fred Drake6f987622000-08-25 18:03:30 +0000197static void
198flag_error(xmlparseobject *self)
199{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000200 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000203}
204
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000205static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200206call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
Fred Drake39689c52004-08-13 03:12:57 +0000207 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208{
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200209 PyObject *res;
Fred Drakebd6101c2001-02-14 18:29:45 +0000210
Jeroen Demeyer1dbd0842019-07-11 17:57:32 +0200211 res = PyObject_Call(func, args, NULL);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000212 if (res == NULL) {
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200213 _PyTraceback_Add(funcname, __FILE__, lineno);
Fred Drake39689c52004-08-13 03:12:57 +0000214 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000215 }
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000217}
218
Fred Drakeb91a36b2002-06-27 19:40:48 +0000219static PyObject*
220string_intern(xmlparseobject *self, const char* str)
221{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000222 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000223 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000227 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200229 value = PyDict_GetItemWithError(self->intern, result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000230 if (!value) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200231 if (!PyErr_Occurred() &&
232 PyDict_SetItem(self->intern, result, result) == 0)
233 {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000234 return result;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200235 }
Zackery Spytz68def052018-10-19 00:57:38 -0600236 else {
237 Py_DECREF(result);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000238 return NULL;
Zackery Spytz68def052018-10-19 00:57:38 -0600239 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000240 }
241 Py_INCREF(value);
242 Py_DECREF(result);
243 return value;
244}
245
Fred Drake2a3d7db2002-06-28 22:56:48 +0000246/* Return 0 on success, -1 on exception.
247 * flag_error() will be called before return if needed.
248 */
249static int
250call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
251{
252 PyObject *args;
253 PyObject *temp;
254
Georg Brandlc01537f2010-10-15 16:26:08 +0000255 if (!have_handler(self, CharacterData))
256 return -1;
257
Fred Drake2a3d7db2002-06-28 22:56:48 +0000258 args = PyTuple_New(1);
259 if (args == NULL)
260 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000261 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000262 if (temp == NULL) {
263 Py_DECREF(args);
264 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 XML_SetCharacterDataHandler(self->itself,
266 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000267 return -1;
268 }
269 PyTuple_SET_ITEM(args, 0, temp);
270 /* temp is now a borrowed reference; consider it unused. */
271 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200272 temp = call_with_frame("CharacterData", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000273 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000274 /* temp is an owned reference again, or NULL */
275 self->in_callback = 0;
276 Py_DECREF(args);
277 if (temp == NULL) {
278 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000279 XML_SetCharacterDataHandler(self->itself,
280 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000281 return -1;
282 }
283 Py_DECREF(temp);
284 return 0;
285}
286
287static int
288flush_character_buffer(xmlparseobject *self)
289{
290 int rc;
291 if (self->buffer == NULL || self->buffer_used == 0)
292 return 0;
293 rc = call_character_handler(self, self->buffer, self->buffer_used);
294 self->buffer_used = 0;
295 return rc;
296}
297
298static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000300{
301 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200302
303 if (PyErr_Occurred())
304 return;
305
Fred Drake2a3d7db2002-06-28 22:56:48 +0000306 if (self->buffer == NULL)
307 call_character_handler(self, data, len);
308 else {
309 if ((self->buffer_used + len) > self->buffer_size) {
310 if (flush_character_buffer(self) < 0)
311 return;
312 /* handler might have changed; drop the rest on the floor
313 * if there isn't a handler anymore
314 */
315 if (!have_handler(self, CharacterData))
316 return;
317 }
318 if (len > self->buffer_size) {
319 call_character_handler(self, data, len);
320 self->buffer_used = 0;
321 }
322 else {
323 memcpy(self->buffer + self->buffer_used,
324 data, len * sizeof(XML_Char));
325 self->buffer_used += len;
326 }
327 }
328}
329
Fred Drake85d835f2001-02-08 15:39:08 +0000330static void
331my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000332 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000333{
334 xmlparseobject *self = (xmlparseobject *)userData;
335
Fred Drake71b63ff2002-06-28 22:29:01 +0000336 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000337 PyObject *container, *rv, *args;
338 int i, max;
339
Victor Stinner9e09c262013-07-18 23:17:01 +0200340 if (PyErr_Occurred())
341 return;
342
Fred Drake2a3d7db2002-06-28 22:56:48 +0000343 if (flush_character_buffer(self) < 0)
344 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000345 /* Set max to the number of slots filled in atts[]; max/2 is
346 * the number of attributes we need to process.
347 */
348 if (self->specified_attributes) {
349 max = XML_GetSpecifiedAttributeCount(self->itself);
350 }
351 else {
352 max = 0;
353 while (atts[max] != NULL)
354 max += 2;
355 }
356 /* Build the container. */
357 if (self->ordered_attributes)
358 container = PyList_New(max);
359 else
360 container = PyDict_New();
361 if (container == NULL) {
362 flag_error(self);
363 return;
364 }
365 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000366 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000367 PyObject *v;
368 if (n == NULL) {
369 flag_error(self);
370 Py_DECREF(container);
371 return;
372 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000373 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000374 if (v == NULL) {
375 flag_error(self);
376 Py_DECREF(container);
377 Py_DECREF(n);
378 return;
379 }
380 if (self->ordered_attributes) {
381 PyList_SET_ITEM(container, i, n);
382 PyList_SET_ITEM(container, i+1, v);
383 }
384 else if (PyDict_SetItem(container, n, v)) {
385 flag_error(self);
386 Py_DECREF(n);
387 Py_DECREF(v);
Zackery Spytz68def052018-10-19 00:57:38 -0600388 Py_DECREF(container);
Fred Drake85d835f2001-02-08 15:39:08 +0000389 return;
390 }
391 else {
392 Py_DECREF(n);
393 Py_DECREF(v);
394 }
395 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000396 args = string_intern(self, name);
Fred Drake85d835f2001-02-08 15:39:08 +0000397 if (args == NULL) {
398 Py_DECREF(container);
399 return;
400 }
Zackery Spytz68def052018-10-19 00:57:38 -0600401 args = Py_BuildValue("(NN)", args, container);
402 if (args == NULL) {
403 return;
404 }
Fred Drake85d835f2001-02-08 15:39:08 +0000405 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000406 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200407 rv = call_with_frame("StartElement", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000408 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000409 self->in_callback = 0;
410 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000411 if (rv == NULL) {
412 flag_error(self);
413 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000414 }
Fred Drake85d835f2001-02-08 15:39:08 +0000415 Py_DECREF(rv);
416 }
417}
418
419#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
420 RETURN, GETUSERDATA) \
421static RC \
422my_##NAME##Handler PARAMS {\
423 xmlparseobject *self = GETUSERDATA ; \
424 PyObject *args = NULL; \
425 PyObject *rv = NULL; \
426 INIT \
427\
Fred Drake71b63ff2002-06-28 22:29:01 +0000428 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200429 if (PyErr_Occurred()) \
430 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000431 if (flush_character_buffer(self) < 0) \
432 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000433 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000434 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000435 self->in_callback = 1; \
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200436 rv = call_with_frame(#NAME,__LINE__, \
Fred Drake39689c52004-08-13 03:12:57 +0000437 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000438 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000439 Py_DECREF(args); \
440 if (rv == NULL) { \
441 flag_error(self); \
442 return RETURN; \
443 } \
444 CONVERSION \
445 Py_DECREF(rv); \
446 } \
447 return RETURN; \
448}
449
Fred Drake6f987622000-08-25 18:03:30 +0000450#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
452 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000453
Fred Drake6f987622000-08-25 18:03:30 +0000454#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
456 rc = PyLong_AsLong(rv);, rc, \
457 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000458
Fred Drake71b63ff2002-06-28 22:29:01 +0000459VOID_HANDLER(EndElement,
460 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000461 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000462
Fred Drake6f987622000-08-25 18:03:30 +0000463VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000464 (void *userData,
465 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000466 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000467 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000468
Fred Drake6f987622000-08-25 18:03:30 +0000469VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000470 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000471 const XML_Char *entityName,
472 const XML_Char *base,
473 const XML_Char *systemId,
474 const XML_Char *publicId,
475 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000476 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000477 string_intern(self, entityName), string_intern(self, base),
478 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000479 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000480
Fred Drake85d835f2001-02-08 15:39:08 +0000481VOID_HANDLER(EntityDecl,
482 (void *userData,
483 const XML_Char *entityName,
484 int is_parameter_entity,
485 const XML_Char *value,
486 int value_length,
487 const XML_Char *base,
488 const XML_Char *systemId,
489 const XML_Char *publicId,
490 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000491 ("NiNNNNN",
492 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000493 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000494 string_intern(self, base), string_intern(self, systemId),
495 string_intern(self, publicId),
496 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000497
498VOID_HANDLER(XmlDecl,
499 (void *userData,
500 const XML_Char *version,
501 const XML_Char *encoding,
502 int standalone),
503 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000504 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000505 standalone))
506
507static PyObject *
508conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000509 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000510{
511 PyObject *result = NULL;
512 PyObject *children = PyTuple_New(model->numchildren);
513 int i;
514
515 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000516 assert(model->numchildren < INT_MAX);
517 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000518 PyObject *child = conv_content_model(&model->children[i],
519 conv_string);
520 if (child == NULL) {
521 Py_XDECREF(children);
522 return NULL;
523 }
524 PyTuple_SET_ITEM(children, i, child);
525 }
526 result = Py_BuildValue("(iiO&N)",
527 model->type, model->quant,
528 conv_string,model->name, children);
529 }
530 return result;
531}
532
Fred Drake06dd8cf2003-02-02 03:54:17 +0000533static void
534my_ElementDeclHandler(void *userData,
535 const XML_Char *name,
536 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000537{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000538 xmlparseobject *self = (xmlparseobject *)userData;
539 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000540
Fred Drake06dd8cf2003-02-02 03:54:17 +0000541 if (have_handler(self, ElementDecl)) {
542 PyObject *rv = NULL;
543 PyObject *modelobj, *nameobj;
544
Victor Stinner9e09c262013-07-18 23:17:01 +0200545 if (PyErr_Occurred())
546 return;
547
Fred Drake06dd8cf2003-02-02 03:54:17 +0000548 if (flush_character_buffer(self) < 0)
549 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000550 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000551 if (modelobj == NULL) {
552 flag_error(self);
553 goto finally;
554 }
555 nameobj = string_intern(self, name);
556 if (nameobj == NULL) {
557 Py_DECREF(modelobj);
558 flag_error(self);
559 goto finally;
560 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000561 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000562 if (args == NULL) {
Fred Drake06dd8cf2003-02-02 03:54:17 +0000563 flag_error(self);
564 goto finally;
565 }
566 self->in_callback = 1;
Antoine Pitrou0ddbf472014-10-08 20:00:09 +0200567 rv = call_with_frame("ElementDecl", __LINE__,
Fred Drake39689c52004-08-13 03:12:57 +0000568 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000569 self->in_callback = 0;
570 if (rv == NULL) {
571 flag_error(self);
572 goto finally;
573 }
574 Py_DECREF(rv);
575 }
576 finally:
577 Py_XDECREF(args);
578 XML_FreeContentModel(self->itself, model);
579 return;
580}
Fred Drake85d835f2001-02-08 15:39:08 +0000581
582VOID_HANDLER(AttlistDecl,
583 (void *userData,
584 const XML_Char *elname,
585 const XML_Char *attname,
586 const XML_Char *att_type,
587 const XML_Char *dflt,
588 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000589 ("(NNO&O&i)",
590 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000591 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000592 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000593
Martin v. Löwisc847f402003-01-21 11:09:21 +0000594#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000595VOID_HANDLER(SkippedEntity,
596 (void *userData,
597 const XML_Char *entityName,
598 int is_parameter_entity),
599 ("Ni",
600 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000601#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000602
Fred Drake71b63ff2002-06-28 22:29:01 +0000603VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 (void *userData,
605 const XML_Char *notationName,
606 const XML_Char *base,
607 const XML_Char *systemId,
608 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000609 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 string_intern(self, notationName), string_intern(self, base),
611 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000612
Fred Drake6f987622000-08-25 18:03:30 +0000613VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 (void *userData,
615 const XML_Char *prefix,
616 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000617 ("(NN)",
618 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000619
Fred Drake6f987622000-08-25 18:03:30 +0000620VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 (void *userData,
622 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000623 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000624
Fred Drake6f987622000-08-25 18:03:30 +0000625VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000627 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000628
Fred Drake6f987622000-08-25 18:03:30 +0000629VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000630 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000632
Fred Drake6f987622000-08-25 18:03:30 +0000633VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000634 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000636
Fred Drake6f987622000-08-25 18:03:30 +0000637VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 (void *userData, const XML_Char *s, int len),
639 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000640
Fred Drake6f987622000-08-25 18:03:30 +0000641VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 (void *userData, const XML_Char *s, int len),
643 ("(N)", (conv_string_len_to_unicode(s,len))))
Serhiy Storchaka55f82492018-10-19 18:00:51 +0300644#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000645
Fred Drake71b63ff2002-06-28 22:29:01 +0000646INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 (void *userData),
648 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000649
Fred Drake6f987622000-08-25 18:03:30 +0000650RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 (XML_Parser parser,
652 const XML_Char *context,
653 const XML_Char *base,
654 const XML_Char *systemId,
655 const XML_Char *publicId),
656 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000657 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 conv_string_to_unicode ,context, string_intern(self, base),
659 string_intern(self, systemId), string_intern(self, publicId)),
660 rc = PyLong_AsLong(rv);, rc,
661 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000662
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000663/* XXX UnknownEncodingHandler */
664
Fred Drake85d835f2001-02-08 15:39:08 +0000665VOID_HANDLER(StartDoctypeDecl,
666 (void *userData, const XML_Char *doctypeName,
667 const XML_Char *sysid, const XML_Char *pubid,
668 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000669 ("(NNNi)", string_intern(self, doctypeName),
670 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000671 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000672
673VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000674
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000675/* ---------------------------------------------------------------- */
Brett Cannond0aeda82014-08-22 14:23:20 -0400676/*[clinic input]
677class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
678[clinic start generated code]*/
679/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
680
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000681
Fred Drake71b63ff2002-06-28 22:29:01 +0000682static PyObject *
683get_parse_result(xmlparseobject *self, int rv)
684{
685 if (PyErr_Occurred()) {
686 return NULL;
687 }
688 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000689 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000690 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000691 if (flush_character_buffer(self) < 0) {
692 return NULL;
693 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000694 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000695}
696
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200697#define MAX_CHUNK_SIZE (1 << 20)
698
Brett Cannond0aeda82014-08-22 14:23:20 -0400699/*[clinic input]
700pyexpat.xmlparser.Parse
701
702 data: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200703 isfinal: bool(accept={int}) = False
Brett Cannond0aeda82014-08-22 14:23:20 -0400704 /
705
706Parse XML data.
707
708`isfinal' should be true at end of input.
709[clinic start generated code]*/
710
Brett Cannond0aeda82014-08-22 14:23:20 -0400711static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400712pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300713 int isfinal)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200714/*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400715{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200716 const char *s;
717 Py_ssize_t slen;
718 Py_buffer view;
719 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200721 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200722 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200723 s = PyUnicode_AsUTF8AndSize(data, &slen);
724 if (s == NULL)
725 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200726 /* Explicitly set UTF-8 encoding. Return code ignored. */
727 (void)XML_SetEncoding(self->itself, "utf-8");
728 }
729 else {
730 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
731 return NULL;
732 s = view.buf;
733 slen = view.len;
734 }
735
736 while (slen > MAX_CHUNK_SIZE) {
737 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
738 if (!rc)
739 goto done;
740 s += MAX_CHUNK_SIZE;
741 slen -= MAX_CHUNK_SIZE;
742 }
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200743 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
744 assert(slen <= INT_MAX);
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +0300745 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200746
747done:
748 if (view.buf != NULL)
749 PyBuffer_Release(&view);
750 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000751}
752
Fred Drakeca1f4262000-09-21 20:10:23 +0000753/* File reading copied from cPickle */
754
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000755#define BUF_SIZE 2048
756
Fred Drake0582df92000-07-12 04:49:00 +0000757static int
758readinst(char *buf, int buf_size, PyObject *meth)
759{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000760 PyObject *str;
761 Py_ssize_t len;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200762 const char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000764 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000765 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000766 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767
Christian Heimes72b710a2008-05-26 13:28:38 +0000768 if (PyBytes_Check(str))
769 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000770 else if (PyByteArray_Check(str))
771 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000773 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000774 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000775 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000776 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000777 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000778 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000779 if (len > buf_size) {
780 PyErr_Format(PyExc_ValueError,
781 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000782 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000783 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000784 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000785 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000787 Py_DECREF(str);
788 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000789 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000790
791error:
792 Py_XDECREF(str);
793 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000794}
795
Brett Cannond0aeda82014-08-22 14:23:20 -0400796/*[clinic input]
797pyexpat.xmlparser.ParseFile
798
799 file: object
800 /
801
802Parse XML data from file-like object.
803[clinic start generated code]*/
804
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805static PyObject *
Brett Cannond0aeda82014-08-22 14:23:20 -0400806pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300807/*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000808{
Fred Drake0582df92000-07-12 04:49:00 +0000809 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000810 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200811 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000812
Serhiy Storchaka41c57b32019-09-01 12:03:39 +0300813 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
814 return NULL;
815 }
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000816 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000817 PyErr_SetString(PyExc_TypeError,
818 "argument must have 'read' attribute");
819 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000820 }
821 for (;;) {
822 int bytes_read;
823 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000824 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000825 Py_XDECREF(readmethod);
Ned Deilye7d532f2014-03-27 16:39:58 -0700826 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000827 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000828
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000829 bytes_read = readinst(buf, BUF_SIZE, readmethod);
830 if (bytes_read < 0) {
831 Py_DECREF(readmethod);
832 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000833 }
834 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000835 if (PyErr_Occurred()) {
836 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000837 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000838 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000839
Fred Drake0582df92000-07-12 04:49:00 +0000840 if (!rv || bytes_read == 0)
841 break;
842 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000843 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000844 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845}
846
Brett Cannond0aeda82014-08-22 14:23:20 -0400847/*[clinic input]
848pyexpat.xmlparser.SetBase
849
850 base: str
851 /
852
853Set the base URL for the parser.
854[clinic start generated code]*/
855
Brett Cannond0aeda82014-08-22 14:23:20 -0400856static PyObject *
857pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300858/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400859{
Fred Drake0582df92000-07-12 04:49:00 +0000860 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000862 }
Brett Cannond0aeda82014-08-22 14:23:20 -0400863 Py_RETURN_NONE;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000864}
865
Brett Cannond0aeda82014-08-22 14:23:20 -0400866/*[clinic input]
867pyexpat.xmlparser.GetBase
868
869Return base URL string for the parser.
870[clinic start generated code]*/
871
Brett Cannond0aeda82014-08-22 14:23:20 -0400872static PyObject *
873pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300874/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
Fred Drake0582df92000-07-12 04:49:00 +0000875{
Fred Drake0582df92000-07-12 04:49:00 +0000876 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000877}
878
Brett Cannond0aeda82014-08-22 14:23:20 -0400879/*[clinic input]
880pyexpat.xmlparser.GetInputContext
881
882Return the untranslated text of the input that caused the current event.
883
884If the event was generated by a large amount of text (such as a start tag
885for an element with many attributes), not all of the text may be available.
886[clinic start generated code]*/
887
Brett Cannond0aeda82014-08-22 14:23:20 -0400888static PyObject *
889pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300890/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000891{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000892 if (self->in_callback) {
893 int offset, size;
894 const char *buffer
895 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000896
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000897 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000898 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000899 size - offset);
900 else
901 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000902 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000903 else
904 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000905}
Fred Drakebd6101c2001-02-14 18:29:45 +0000906
Brett Cannond0aeda82014-08-22 14:23:20 -0400907/*[clinic input]
908pyexpat.xmlparser.ExternalEntityParserCreate
909
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700910 context: str(accept={str, NoneType})
Brett Cannond0aeda82014-08-22 14:23:20 -0400911 encoding: str = NULL
912 /
913
914Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
915[clinic start generated code]*/
916
Brett Cannond0aeda82014-08-22 14:23:20 -0400917static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400918pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
919 const char *context,
920 const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700921/*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400922{
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000923 xmlparseobject *new_parser;
924 int i;
925
Martin v. Löwis894258c2001-09-23 10:20:10 +0000926 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000927 if (new_parser == NULL)
928 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000929 new_parser->buffer_size = self->buffer_size;
930 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000931 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000932 new_parser->ordered_attributes = self->ordered_attributes;
933 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000934 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000935 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000936 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000938 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000939 new_parser->intern = self->intern;
940 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000941 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000942
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000943 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +0200944 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000945 if (new_parser->buffer == NULL) {
946 Py_DECREF(new_parser);
947 return PyErr_NoMemory();
948 }
949 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000950 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +0000951 Py_DECREF(new_parser);
952 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000953 }
954
955 XML_SetUserData(new_parser->itself, (void *)new_parser);
956
957 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000958 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +0000959 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000960
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200961 new_parser->handlers = PyMem_New(PyObject *, i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000962 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +0000963 Py_DECREF(new_parser);
964 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000965 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000966 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000967
968 /* then copy handlers from self */
969 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000970 PyObject *handler = self->handlers[i];
971 if (handler != NULL) {
972 Py_INCREF(handler);
973 new_parser->handlers[i] = handler;
974 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +0000975 handler_info[i].handler);
976 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000977 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000978 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000979}
980
Brett Cannond0aeda82014-08-22 14:23:20 -0400981/*[clinic input]
982pyexpat.xmlparser.SetParamEntityParsing
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000983
Brett Cannond0aeda82014-08-22 14:23:20 -0400984 flag: int
985 /
986
987Controls parsing of parameter entities (including the external DTD subset).
988
989Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
990XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
991XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
992was successful.
993[clinic start generated code]*/
994
Brett Cannond0aeda82014-08-22 14:23:20 -0400995static PyObject *
996pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +0300997/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
Brett Cannond0aeda82014-08-22 14:23:20 -0400998{
999 flag = XML_SetParamEntityParsing(self->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001000 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001001}
1002
Martin v. Löwisc847f402003-01-21 11:09:21 +00001003
1004#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001005/*[clinic input]
1006pyexpat.xmlparser.UseForeignDTD
1007
1008 flag: bool = True
1009 /
1010
1011Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1012
1013This readily allows the use of a 'default' document type controlled by the
1014application, while still getting the advantage of providing document type
1015information to the parser. 'flag' defaults to True if not provided.
1016[clinic start generated code]*/
1017
Brett Cannond0aeda82014-08-22 14:23:20 -04001018static PyObject *
1019pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001020/*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001021{
Martin v. Löwis069dde22003-01-21 10:58:18 +00001022 enum XML_Error rc;
Brett Cannond0aeda82014-08-22 14:23:20 -04001023
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001024 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001025 if (rc != XML_ERROR_NONE) {
1026 return set_error(self, rc);
1027 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001028 Py_RETURN_NONE;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001029}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001030#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001031
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001032static struct PyMethodDef xmlparse_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001033 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1034 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1035 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1036 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1037 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1038 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1039 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001040#if XML_COMBINED_VERSION >= 19505
Brett Cannond0aeda82014-08-22 14:23:20 -04001041 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
Martin v. Löwisc847f402003-01-21 11:09:21 +00001042#endif
Brett Cannond0aeda82014-08-22 14:23:20 -04001043 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001044};
1045
1046/* ---------- */
1047
1048
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001049
Fred Drake71b63ff2002-06-28 22:29:01 +00001050/* pyexpat international encoding support.
1051 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001052*/
1053
Fred Drake71b63ff2002-06-28 22:29:01 +00001054static int
1055PyUnknownEncodingHandler(void *encodingHandlerData,
1056 const XML_Char *name,
1057 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001058{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001059 static unsigned char template_buffer[256] = {0};
1060 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001061 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001062 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001063 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001064
Victor Stinner9e09c262013-07-18 23:17:01 +02001065 if (PyErr_Occurred())
1066 return XML_STATUS_ERROR;
1067
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001068 if (template_buffer[1] == 0) {
1069 for (i = 0; i < 256; i++)
1070 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001071 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001072
1073 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001074 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001075 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001076 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001077 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001078
1079 if (PyUnicode_GET_LENGTH(u) != 256) {
1080 Py_DECREF(u);
1081 PyErr_SetString(PyExc_ValueError,
1082 "multi-byte encodings are not supported");
1083 return XML_STATUS_ERROR;
1084 }
1085
1086 kind = PyUnicode_KIND(u);
1087 data = PyUnicode_DATA(u);
1088 for (i = 0; i < 256; i++) {
1089 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1090 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1091 info->map[i] = ch;
1092 else
1093 info->map[i] = -1;
1094 }
1095
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001096 info->data = NULL;
1097 info->convert = NULL;
1098 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001099 Py_DECREF(u);
1100
1101 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001102}
1103
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001104
1105static PyObject *
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001106newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001107{
1108 int i;
1109 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001110
Martin v. Löwis894258c2001-09-23 10:20:10 +00001111 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001112 if (self == NULL)
1113 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001114
Fred Drake2a3d7db2002-06-28 22:56:48 +00001115 self->buffer = NULL;
1116 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1117 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001118 self->ordered_attributes = 0;
1119 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001120 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001121 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001122 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001123 self->intern = intern;
1124 Py_XINCREF(self->intern);
1125 PyObject_GC_Track(self);
1126
Christian Heimesfa535f52013-07-07 17:35:11 +02001127 /* namespace_separator is either NULL or contains one char + \0 */
1128 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1129 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001130 if (self->itself == NULL) {
1131 PyErr_SetString(PyExc_RuntimeError,
1132 "XML_ParserCreate failed");
1133 Py_DECREF(self);
1134 return NULL;
1135 }
Victor Stinner23ec4b52017-06-15 00:54:36 +02001136#if XML_COMBINED_VERSION >= 20100
1137 /* This feature was added upstream in libexpat 2.1.0. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001138 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001139 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001140#endif
Fred Drake0582df92000-07-12 04:49:00 +00001141 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001142 XML_SetUnknownEncodingHandler(self->itself,
1143 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001144
Fred Drake2a3d7db2002-06-28 22:56:48 +00001145 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001146 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001147
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +02001148 self->handlers = PyMem_New(PyObject *, i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001149 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001150 Py_DECREF(self);
1151 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001152 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001153 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001154
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001155 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001156}
1157
1158
1159static void
Fred Drake0582df92000-07-12 04:49:00 +00001160xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001161{
Fred Drake0582df92000-07-12 04:49:00 +00001162 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001163 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001164 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001165 XML_ParserFree(self->itself);
1166 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001167
Fred Drake85d835f2001-02-08 15:39:08 +00001168 if (self->handlers != NULL) {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001169 for (i = 0; handler_info[i].name != NULL; i++)
1170 Py_CLEAR(self->handlers[i]);
Victor Stinnerb6404912013-07-07 16:21:41 +02001171 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001172 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001173 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001174 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001175 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001176 self->buffer = NULL;
1177 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001178 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001179 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001180}
1181
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001182
1183static PyObject *
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001184xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
Fred Drake71b63ff2002-06-28 22:29:01 +00001185{
Victor Stinner28f468c2018-11-22 13:21:43 +01001186 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1187 int handlernum = (int)(hi - handler_info);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001188 PyObject *result = self->handlers[handlernum];
1189 if (result == NULL)
1190 result = Py_None;
Fred Drake71b63ff2002-06-28 22:29:01 +00001191 Py_INCREF(result);
1192 return result;
1193}
1194
Fred Drake6f987622000-08-25 18:03:30 +00001195static int
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001196xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
Fred Drake0582df92000-07-12 04:49:00 +00001197{
Victor Stinner28f468c2018-11-22 13:21:43 +01001198 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1199 int handlernum = (int)(hi - handler_info);
Fred Drake85d835f2001-02-08 15:39:08 +00001200 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001201 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1202 return -1;
1203 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001204 if (handlernum == CharacterData) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001205 /* If we're changing the character data handler, flush all
1206 * cached data with the old handler. Not sure there's a
1207 * "right" thing to do, though, but this probably won't
1208 * happen.
1209 */
1210 if (flush_character_buffer(self) < 0)
1211 return -1;
1212 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001213
1214 xmlhandler c_handler = NULL;
1215 if (v == Py_None) {
1216 /* If this is the character data handler, and a character
1217 data handler is already active, we need to be more
1218 careful. What we can safely do is replace the existing
1219 character data handler callback function with a no-op
1220 function that will refuse to call Python. The downside
1221 is that this doesn't completely remove the character
1222 data handler from the C layer if there's any callback
1223 active, so Expat does a little more work than it
1224 otherwise would, but that's really an odd case. A more
1225 elaborate system of handlers and state could remove the
1226 C handler more effectively. */
1227 if (handlernum == CharacterData && self->in_callback)
1228 c_handler = noop_character_data_handler;
1229 v = NULL;
1230 }
1231 else if (v != NULL) {
1232 Py_INCREF(v);
1233 c_handler = handler_info[handlernum].handler;
1234 }
1235 Py_XSETREF(self->handlers[handlernum], v);
1236 handler_info[handlernum].setter(self->itself, c_handler);
1237 return 0;
1238}
1239
1240#define INT_GETTER(name) \
1241 static PyObject * \
1242 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1243 { \
1244 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1245 }
1246INT_GETTER(ErrorCode)
1247INT_GETTER(ErrorLineNumber)
1248INT_GETTER(ErrorColumnNumber)
1249INT_GETTER(ErrorByteIndex)
1250INT_GETTER(CurrentLineNumber)
1251INT_GETTER(CurrentColumnNumber)
1252INT_GETTER(CurrentByteIndex)
1253
1254#undef INT_GETTER
1255
1256static PyObject *
1257xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1258{
1259 return PyBool_FromLong(self->buffer != NULL);
1260}
1261
1262static int
1263xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1264{
1265 if (v == NULL) {
1266 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1267 return -1;
1268 }
1269 int b = PyObject_IsTrue(v);
1270 if (b < 0)
1271 return -1;
1272 if (b) {
1273 if (self->buffer == NULL) {
1274 self->buffer = PyMem_Malloc(self->buffer_size);
1275 if (self->buffer == NULL) {
1276 PyErr_NoMemory();
1277 return -1;
1278 }
1279 self->buffer_used = 0;
1280 }
1281 }
1282 else if (self->buffer != NULL) {
1283 if (flush_character_buffer(self) < 0)
1284 return -1;
1285 PyMem_Free(self->buffer);
1286 self->buffer = NULL;
1287 }
1288 return 0;
1289}
1290
1291static PyObject *
1292xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1293{
1294 return PyLong_FromLong((long) self->buffer_size);
1295}
1296
1297static int
1298xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1299{
1300 if (v == NULL) {
1301 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1302 return -1;
1303 }
1304 long new_buffer_size;
1305 if (!PyLong_Check(v)) {
1306 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1307 return -1;
1308 }
1309
1310 new_buffer_size = PyLong_AsLong(v);
1311 if (new_buffer_size <= 0) {
1312 if (!PyErr_Occurred())
1313 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1314 return -1;
1315 }
1316
1317 /* trivial case -- no change */
1318 if (new_buffer_size == self->buffer_size) {
Fred Drake6f987622000-08-25 18:03:30 +00001319 return 0;
1320 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001321
1322 /* check maximum */
1323 if (new_buffer_size > INT_MAX) {
1324 char errmsg[100];
1325 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1326 PyErr_SetString(PyExc_ValueError, errmsg);
1327 return -1;
1328 }
1329
1330 if (self->buffer != NULL) {
1331 /* there is already a buffer */
1332 if (self->buffer_used != 0) {
1333 if (flush_character_buffer(self) < 0) {
1334 return -1;
1335 }
1336 }
1337 /* free existing buffer */
1338 PyMem_Free(self->buffer);
1339 }
1340 self->buffer = PyMem_Malloc(new_buffer_size);
1341 if (self->buffer == NULL) {
1342 PyErr_NoMemory();
1343 return -1;
1344 }
1345 self->buffer_size = new_buffer_size;
1346 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001347}
1348
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001349static PyObject *
1350xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1351{
1352 return PyLong_FromLong((long) self->buffer_used);
1353}
1354
1355static PyObject *
1356xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1357{
1358 return PyBool_FromLong(self->ns_prefixes);
1359}
1360
1361static int
1362xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1363{
1364 if (v == NULL) {
1365 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1366 return -1;
1367 }
1368 int b = PyObject_IsTrue(v);
1369 if (b < 0)
1370 return -1;
1371 self->ns_prefixes = b;
1372 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1373 return 0;
1374}
1375
1376static PyObject *
1377xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1378{
1379 return PyBool_FromLong(self->ordered_attributes);
1380}
1381
1382static int
1383xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1384{
1385 if (v == NULL) {
1386 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1387 return -1;
1388 }
1389 int b = PyObject_IsTrue(v);
1390 if (b < 0)
1391 return -1;
1392 self->ordered_attributes = b;
1393 return 0;
1394}
1395
1396static PyObject *
1397xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1398{
1399 return PyBool_FromLong((long) self->specified_attributes);
1400}
1401
1402static int
1403xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1404{
1405 if (v == NULL) {
1406 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1407 return -1;
1408 }
1409 int b = PyObject_IsTrue(v);
1410 if (b < 0)
1411 return -1;
1412 self->specified_attributes = b;
1413 return 0;
1414}
1415
1416static PyMemberDef xmlparse_members[] = {
1417 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1418 {NULL}
1419};
1420
1421#define XMLPARSE_GETTER_DEF(name) \
1422 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1423#define XMLPARSE_GETTER_SETTER_DEF(name) \
1424 {#name, (getter)xmlparse_##name##_getter, \
1425 (setter)xmlparse_##name##_setter, NULL},
1426
1427static PyGetSetDef xmlparse_getsetlist[] = {
1428 XMLPARSE_GETTER_DEF(ErrorCode)
1429 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1430 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1431 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1432 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1433 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1434 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1435 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1436 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1437 XMLPARSE_GETTER_DEF(buffer_used)
1438 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1439 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1440 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1441 {NULL},
1442};
1443
1444#undef XMLPARSE_GETTER_DEF
1445#undef XMLPARSE_GETTER_SETTER_DEF
1446
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001447static int
1448xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1449{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001450 int i;
1451 for (i = 0; handler_info[i].name != NULL; i++)
1452 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001453 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001454}
1455
1456static int
1457xmlparse_clear(xmlparseobject *op)
1458{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001459 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001460 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001461 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001462}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001463
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001464PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001465
1466static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 PyVarObject_HEAD_INIT(NULL, 0)
1468 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001469 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 0, /*tp_itemsize*/
1471 /* methods */
1472 (destructor)xmlparse_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001473 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001475 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001476 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 (reprfunc)0, /*tp_repr*/
1478 0, /*tp_as_number*/
1479 0, /*tp_as_sequence*/
1480 0, /*tp_as_mapping*/
1481 (hashfunc)0, /*tp_hash*/
1482 (ternaryfunc)0, /*tp_call*/
1483 (reprfunc)0, /*tp_str*/
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001484 (getattrofunc)0, /* tp_getattro */
1485 (setattrofunc)0, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1489 (traverseproc)xmlparse_traverse, /* tp_traverse */
1490 (inquiry)xmlparse_clear, /* tp_clear */
1491 0, /* tp_richcompare */
1492 0, /* tp_weaklistoffset */
1493 0, /* tp_iter */
1494 0, /* tp_iternext */
1495 xmlparse_methods, /* tp_methods */
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001496 xmlparse_members, /* tp_members */
1497 xmlparse_getsetlist, /* tp_getset */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001498};
1499
1500/* End of code for xmlparser objects */
1501/* -------------------------------------------------------- */
1502
Brett Cannond0aeda82014-08-22 14:23:20 -04001503/*[clinic input]
1504pyexpat.ParserCreate
1505
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001506 encoding: str(accept={str, NoneType}) = None
1507 namespace_separator: str(accept={str, NoneType}) = None
Brett Cannond0aeda82014-08-22 14:23:20 -04001508 intern: object = NULL
1509
1510Return a new XML parser object.
1511[clinic start generated code]*/
1512
Brett Cannond0aeda82014-08-22 14:23:20 -04001513static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001514pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
Larry Hastings89964c42015-04-14 18:07:59 -04001515 const char *namespace_separator, PyObject *intern)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001516/*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001517{
Fred Drakeb91a36b2002-06-27 19:40:48 +00001518 PyObject *result;
1519 int intern_decref = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001520
Fred Drakecde79132001-04-25 16:01:30 +00001521 if (namespace_separator != NULL
1522 && strlen(namespace_separator) > 1) {
1523 PyErr_SetString(PyExc_ValueError,
1524 "namespace_separator must be at most one"
1525 " character, omitted, or None");
1526 return NULL;
1527 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001528 /* Explicitly passing None means no interning is desired.
1529 Not passing anything means that a new dictionary is used. */
1530 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001532 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 intern = PyDict_New();
1534 if (!intern)
1535 return NULL;
1536 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001537 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001538 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1540 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001541 }
1542
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001543 result = newxmlparseobject(encoding, namespace_separator, intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001544 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001546 }
1547 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001548}
1549
Brett Cannond0aeda82014-08-22 14:23:20 -04001550/*[clinic input]
1551pyexpat.ErrorString
1552
1553 code: long
1554 /
1555
1556Returns string error for given number.
1557[clinic start generated code]*/
1558
Brett Cannond0aeda82014-08-22 14:23:20 -04001559static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001560pyexpat_ErrorString_impl(PyObject *module, long code)
1561/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
Brett Cannond0aeda82014-08-22 14:23:20 -04001562{
Fred Drake0582df92000-07-12 04:49:00 +00001563 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001564}
1565
1566/* List of methods defined in the module */
1567
1568static struct PyMethodDef pyexpat_methods[] = {
Brett Cannond0aeda82014-08-22 14:23:20 -04001569 PYEXPAT_PARSERCREATE_METHODDEF
1570 PYEXPAT_ERRORSTRING_METHODDEF
1571 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001572};
1573
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001574/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001575
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001576PyDoc_STRVAR(pyexpat_module_documentation,
1577"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001578
Fred Drakecde79132001-04-25 16:01:30 +00001579/* Initialization function for the module */
1580
1581#ifndef MODULE_NAME
1582#define MODULE_NAME "pyexpat"
1583#endif
1584
1585#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001586#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001587#endif
1588
Martin v. Löwis1a214512008-06-11 05:26:20 +00001589static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 PyModuleDef_HEAD_INIT,
1591 MODULE_NAME,
1592 pyexpat_module_documentation,
1593 -1,
1594 pyexpat_methods,
1595 NULL,
1596 NULL,
1597 NULL,
1598 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001599};
1600
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001601static int init_handler_descrs(void)
1602{
1603 int i;
1604 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1605 for (i = 0; handler_info[i].name != NULL; i++) {
1606 struct HandlerInfo *hi = &handler_info[i];
1607 hi->getset.name = hi->name;
1608 hi->getset.get = (getter)xmlparse_handler_getter;
1609 hi->getset.set = (setter)xmlparse_handler_setter;
1610 hi->getset.closure = &handler_info[i];
1611
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001612 PyObject *descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001613 if (descr == NULL)
1614 return -1;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001615
1616 if (PyDict_GetItemWithError(Xmlparsetype.tp_dict, PyDescr_NAME(descr))) {
1617 Py_DECREF(descr);
1618 continue;
1619 }
1620 else if (PyErr_Occurred()) {
1621 Py_DECREF(descr);
1622 return -1;
1623 }
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001624 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1625 Py_DECREF(descr);
1626 return -1;
1627 }
1628 Py_DECREF(descr);
1629 }
1630 return 0;
1631}
1632
Martin v. Löwis069dde22003-01-21 10:58:18 +00001633PyMODINIT_FUNC
1634MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001635{
1636 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001637 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001638 PyObject *errors_module;
1639 PyObject *modelmod_name;
1640 PyObject *model_module;
Georg Brandlb4dac712010-10-15 14:46:48 +00001641 PyObject *tmpnum, *tmpstr;
1642 PyObject *codes_dict;
1643 PyObject *rev_codes_dict;
1644 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001645 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001646 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001647
Fred Drake6f987622000-08-25 18:03:30 +00001648 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001649 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001650 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001651 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001652 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001653
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001654 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001656
Fred Drake0582df92000-07-12 04:49:00 +00001657 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001658 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001659 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001661
Fred Drake0582df92000-07-12 04:49:00 +00001662 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001663 if (ErrorObject == NULL) {
1664 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001665 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001666 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001667 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001668 }
1669 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001670 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001671 Py_INCREF(ErrorObject);
1672 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001673 Py_INCREF(&Xmlparsetype);
1674 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001675
Fred Drake738293d2000-12-21 17:25:07 +00001676 PyModule_AddStringConstant(m, "EXPAT_VERSION",
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001677 XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001678 {
1679 XML_Expat_Version info = XML_ExpatVersionInfo();
1680 PyModule_AddObject(m, "version_info",
1681 Py_BuildValue("(iii)", info.major,
1682 info.minor, info.micro));
1683 }
Fred Drake0582df92000-07-12 04:49:00 +00001684 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001685 compiled, this should check and set native_encoding
1686 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001687 */
Fred Drake93adb692000-09-23 04:55:48 +00001688 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001689
Fred Drake93adb692000-09-23 04:55:48 +00001690 d = PyModule_GetDict(m);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001691 if (d == NULL) {
1692 Py_DECREF(m);
1693 return NULL;
1694 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001695 errors_module = PyDict_GetItemWithError(d, errmod_name);
1696 if (errors_module == NULL && !PyErr_Occurred()) {
Fred Drakecde79132001-04-25 16:01:30 +00001697 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001698 if (errors_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001699 _PyImport_SetModule(errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001700 /* gives away the reference to errors_module */
1701 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001702 }
1703 }
Fred Drake6f987622000-08-25 18:03:30 +00001704 Py_DECREF(errmod_name);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001705 model_module = PyDict_GetItemWithError(d, modelmod_name);
1706 if (model_module == NULL && !PyErr_Occurred()) {
Fred Drakecde79132001-04-25 16:01:30 +00001707 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001708 if (model_module != NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001709 _PyImport_SetModule(modelmod_name, model_module);
Fred Drake85d835f2001-02-08 15:39:08 +00001710 /* gives away the reference to model_module */
1711 PyModule_AddObject(m, "model", model_module);
1712 }
1713 }
1714 Py_DECREF(modelmod_name);
Christian Heimes7a5457b2016-09-09 00:13:35 +02001715 if (errors_module == NULL || model_module == NULL) {
Fred Drake85d835f2001-02-08 15:39:08 +00001716 /* Don't core dump later! */
Christian Heimes7a5457b2016-09-09 00:13:35 +02001717 Py_DECREF(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001718 return NULL;
Christian Heimes7a5457b2016-09-09 00:13:35 +02001719 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720
Martin v. Löwisc847f402003-01-21 11:09:21 +00001721#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001722 {
1723 const XML_Feature *features = XML_GetFeatureList();
1724 PyObject *list = PyList_New(0);
1725 if (list == NULL)
1726 /* just ignore it */
1727 PyErr_Clear();
1728 else {
1729 int i = 0;
1730 for (; features[i].feature != XML_FEATURE_END; ++i) {
1731 int ok;
1732 PyObject *item = Py_BuildValue("si", features[i].name,
1733 features[i].value);
1734 if (item == NULL) {
1735 Py_DECREF(list);
1736 list = NULL;
1737 break;
1738 }
1739 ok = PyList_Append(list, item);
1740 Py_DECREF(item);
1741 if (ok < 0) {
1742 PyErr_Clear();
1743 break;
1744 }
1745 }
1746 if (list != NULL)
1747 PyModule_AddObject(m, "features", list);
1748 }
1749 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001750#endif
Fred Drake6f987622000-08-25 18:03:30 +00001751
Georg Brandlb4dac712010-10-15 14:46:48 +00001752 codes_dict = PyDict_New();
1753 rev_codes_dict = PyDict_New();
1754 if (codes_dict == NULL || rev_codes_dict == NULL) {
1755 Py_XDECREF(codes_dict);
1756 Py_XDECREF(rev_codes_dict);
1757 return NULL;
1758 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001759
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001760#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001761 if (PyModule_AddStringConstant(errors_module, #name, \
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001762 XML_ErrorString(name)) < 0) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001763 return NULL; \
1764 tmpnum = PyLong_FromLong(name); \
1765 if (tmpnum == NULL) return NULL; \
1766 res = PyDict_SetItemString(codes_dict, \
1767 XML_ErrorString(name), tmpnum); \
1768 if (res < 0) return NULL; \
1769 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1770 if (tmpstr == NULL) return NULL; \
1771 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1772 Py_DECREF(tmpstr); \
1773 Py_DECREF(tmpnum); \
1774 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001775
Fred Drake0582df92000-07-12 04:49:00 +00001776 MYCONST(XML_ERROR_NO_MEMORY);
1777 MYCONST(XML_ERROR_SYNTAX);
1778 MYCONST(XML_ERROR_NO_ELEMENTS);
1779 MYCONST(XML_ERROR_INVALID_TOKEN);
1780 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1781 MYCONST(XML_ERROR_PARTIAL_CHAR);
1782 MYCONST(XML_ERROR_TAG_MISMATCH);
1783 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1784 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1785 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1786 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1787 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1788 MYCONST(XML_ERROR_ASYNC_ENTITY);
1789 MYCONST(XML_ERROR_BAD_CHAR_REF);
1790 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1791 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1792 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1793 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1794 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001795 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1796 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1797 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001798 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1799 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1800 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1801 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1802 /* Added in Expat 1.95.7. */
1803 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1804 /* Added in Expat 1.95.8. */
1805 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1806 MYCONST(XML_ERROR_INCOMPLETE_PE);
1807 MYCONST(XML_ERROR_XML_DECL);
1808 MYCONST(XML_ERROR_TEXT_DECL);
1809 MYCONST(XML_ERROR_PUBLICID);
1810 MYCONST(XML_ERROR_SUSPENDED);
1811 MYCONST(XML_ERROR_NOT_SUSPENDED);
1812 MYCONST(XML_ERROR_ABORTED);
1813 MYCONST(XML_ERROR_FINISHED);
1814 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001815
Georg Brandlb4dac712010-10-15 14:46:48 +00001816 if (PyModule_AddStringConstant(errors_module, "__doc__",
1817 "Constants used to describe "
1818 "error conditions.") < 0)
1819 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001820
Georg Brandlb4dac712010-10-15 14:46:48 +00001821 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1822 return NULL;
1823 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1824 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001825
Fred Drake93adb692000-09-23 04:55:48 +00001826#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001827
Fred Drake85d835f2001-02-08 15:39:08 +00001828#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001829 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1830 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1831 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001832#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001833
Fred Drake85d835f2001-02-08 15:39:08 +00001834#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1835 PyModule_AddStringConstant(model_module, "__doc__",
1836 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001837
Fred Drake85d835f2001-02-08 15:39:08 +00001838 MYCONST(XML_CTYPE_EMPTY);
1839 MYCONST(XML_CTYPE_ANY);
1840 MYCONST(XML_CTYPE_MIXED);
1841 MYCONST(XML_CTYPE_NAME);
1842 MYCONST(XML_CTYPE_CHOICE);
1843 MYCONST(XML_CTYPE_SEQ);
1844
1845 MYCONST(XML_CQUANT_NONE);
1846 MYCONST(XML_CQUANT_OPT);
1847 MYCONST(XML_CQUANT_REP);
1848 MYCONST(XML_CQUANT_PLUS);
1849#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001850
1851 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001852 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001853 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001854 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1855 capi.MINOR_VERSION = XML_MINOR_VERSION;
1856 capi.MICRO_VERSION = XML_MICRO_VERSION;
1857 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001858 capi.GetErrorCode = XML_GetErrorCode;
1859 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1860 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001861 capi.Parse = XML_Parse;
1862 capi.ParserCreate_MM = XML_ParserCreate_MM;
1863 capi.ParserFree = XML_ParserFree;
1864 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1865 capi.SetCommentHandler = XML_SetCommentHandler;
1866 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1867 capi.SetElementHandler = XML_SetElementHandler;
1868 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1869 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1870 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1871 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001872 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001873 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001874 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Christian Heimescb5778f2018-09-18 14:38:58 +02001875#if XML_COMBINED_VERSION >= 20100
1876 capi.SetHashSalt = XML_SetHashSalt;
1877#else
1878 capi.SetHashSalt = NULL;
1879#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880
Benjamin Petersonb173f782009-05-05 22:31:58 +00001881 /* export using capsule */
1882 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001883 if (capi_object)
1884 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001885 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001886}
1887
Fred Drake6f987622000-08-25 18:03:30 +00001888static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001889clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001890{
Fred Drakecde79132001-04-25 16:01:30 +00001891 int i = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001892
Fred Drake71b63ff2002-06-28 22:29:01 +00001893 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001894 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 self->handlers[i] = NULL;
1896 else {
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02001897 Py_CLEAR(self->handlers[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001899 }
Fred Drakecde79132001-04-25 16:01:30 +00001900 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001901}
1902
Tim Peters0c322792002-07-17 16:49:03 +00001903static struct HandlerInfo handler_info[] = {
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001904
1905#define HANDLER_INFO(name) \
1906 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1907
1908 HANDLER_INFO(StartElementHandler)
1909 HANDLER_INFO(EndElementHandler)
1910 HANDLER_INFO(ProcessingInstructionHandler)
1911 HANDLER_INFO(CharacterDataHandler)
1912 HANDLER_INFO(UnparsedEntityDeclHandler)
1913 HANDLER_INFO(NotationDeclHandler)
1914 HANDLER_INFO(StartNamespaceDeclHandler)
1915 HANDLER_INFO(EndNamespaceDeclHandler)
1916 HANDLER_INFO(CommentHandler)
1917 HANDLER_INFO(StartCdataSectionHandler)
1918 HANDLER_INFO(EndCdataSectionHandler)
1919 HANDLER_INFO(DefaultHandler)
1920 HANDLER_INFO(DefaultHandlerExpand)
1921 HANDLER_INFO(NotStandaloneHandler)
1922 HANDLER_INFO(ExternalEntityRefHandler)
1923 HANDLER_INFO(StartDoctypeDeclHandler)
1924 HANDLER_INFO(EndDoctypeDeclHandler)
1925 HANDLER_INFO(EntityDeclHandler)
1926 HANDLER_INFO(XmlDeclHandler)
1927 HANDLER_INFO(ElementDeclHandler)
1928 HANDLER_INFO(AttlistDeclHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001929#if XML_COMBINED_VERSION >= 19504
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001930 HANDLER_INFO(SkippedEntityHandler)
Martin v. Löwisc847f402003-01-21 11:09:21 +00001931#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001932
Serhiy Storchaka55f82492018-10-19 18:00:51 +03001933#undef HANDLER_INFO
1934
Fred Drake0582df92000-07-12 04:49:00 +00001935 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001936};