blob: c096eab43ea6a877bb4707a6b41b81e32aa5cef5 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200672 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200683 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200953 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954 PyObject *instancedict = NULL, *children;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 /* Construct the state object. */
967 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
968 if (noattrib)
969 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975 else
976 instancedict = Py_BuildValue("{sOsOsOsOsO}",
977 PICKLED_TAG, self->tag,
978 PICKLED_CHILDREN, children,
979 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700980 PICKLED_TEXT, JOIN_OBJ(self->text),
981 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800982 if (instancedict) {
983 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 else {
987 for (i = 0; i < PyList_GET_SIZE(children); i++)
988 Py_DECREF(PyList_GET_ITEM(children, i));
989 Py_DECREF(children);
990
991 return NULL;
992 }
993}
994
995static PyObject *
996element_setstate_from_attributes(ElementObject *self,
997 PyObject *tag,
998 PyObject *attrib,
999 PyObject *text,
1000 PyObject *tail,
1001 PyObject *children)
1002{
1003 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
1006 if (!tag) {
1007 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1008 return NULL;
1009 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001011 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
Oren Milman39ecb9c2017-10-10 23:26:24 +03001014 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1015 Py_INCREF(JOIN_OBJ(text));
1016 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
Oren Milman39ecb9c2017-10-10 23:26:24 +03001018 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1019 Py_INCREF(JOIN_OBJ(tail));
1020 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021
1022 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001024 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001025 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026
1027 /* Compute 'nchildren'. */
1028 if (children) {
1029 if (!PyList_Check(children)) {
1030 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1031 return NULL;
1032 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 nchildren = PyList_GET_SIZE(children);
1034
1035 /* (Re-)allocate 'extra'.
1036 Avoid DECREFs calling into this code again (cycles, etc.)
1037 */
1038 oldextra = self->extra;
1039 self->extra = NULL;
1040 if (element_resize(self, nchildren)) {
1041 assert(!self->extra || !self->extra->length);
1042 clear_extra(self);
1043 self->extra = oldextra;
1044 return NULL;
1045 }
1046 assert(self->extra);
1047 assert(self->extra->allocated >= nchildren);
1048 if (oldextra) {
1049 assert(self->extra->attrib == Py_None);
1050 self->extra->attrib = oldextra->attrib;
1051 oldextra->attrib = Py_None;
1052 }
1053
1054 /* Copy children */
1055 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001056 PyObject *child = PyList_GET_ITEM(children, i);
1057 if (!Element_Check(child)) {
1058 raise_type_error(child);
1059 self->extra->length = i;
1060 dealloc_extra(oldextra);
1061 return NULL;
1062 }
1063 Py_INCREF(child);
1064 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 }
1066
1067 assert(!self->extra->length);
1068 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 }
1070 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 if (element_resize(self, 0)) {
1072 return NULL;
1073 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
1075
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076 /* Stash attrib. */
1077 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001079 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001081 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082
1083 Py_RETURN_NONE;
1084}
1085
1086/* __setstate__ for Element instance from the Python implementation.
1087 * 'state' should be the instance dict.
1088 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090static PyObject *
1091element_setstate_from_Python(ElementObject *self, PyObject *state)
1092{
1093 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1094 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1095 PyObject *args;
1096 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 tag = attrib = text = tail = children = NULL;
1100 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001101 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103
1104 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1105 &attrib, &text, &tail, &children))
1106 retval = element_setstate_from_attributes(self, tag, attrib, text,
1107 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001109 retval = NULL;
1110
1111 Py_DECREF(args);
1112 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113}
1114
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115/*[clinic input]
1116_elementtree.Element.__setstate__
1117
1118 state: object
1119 /
1120
1121[clinic start generated code]*/
1122
Eli Bendersky698bdb22013-01-10 06:01:06 -08001123static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001124_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1125/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001126{
1127 if (!PyDict_CheckExact(state)) {
1128 PyErr_Format(PyExc_TypeError,
1129 "Don't know how to unpickle \"%.200R\" as an Element",
1130 state);
1131 return NULL;
1132 }
1133 else
1134 return element_setstate_from_Python(self, state);
1135}
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137LOCAL(int)
1138checkpath(PyObject* tag)
1139{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 Py_ssize_t i;
1141 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142
1143 /* check if a tag contains an xpath character */
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145#define PATHCHAR(ch) \
1146 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1150 void *data = PyUnicode_DATA(tag);
1151 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001152 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1153 PyUnicode_READ(kind, data, 1) == '}' || (
1154 PyUnicode_READ(kind, data, 1) == '*' &&
1155 PyUnicode_READ(kind, data, 2) == '}'))) {
1156 /* wildcard: '{}tag' or '{*}tag' */
1157 return 1;
1158 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001159 for (i = 0; i < len; i++) {
1160 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1161 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 return 1;
1167 }
1168 return 0;
1169 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001170 if (PyBytes_Check(tag)) {
1171 char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001172 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1173 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001174 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001175 /* wildcard: '{}tag' or '{*}tag' */
1176 return 1;
1177 }
1178 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 if (p[i] == '{')
1180 check = 0;
1181 else if (p[i] == '}')
1182 check = 1;
1183 else if (check && PATHCHAR(p[i]))
1184 return 1;
1185 }
1186 return 0;
1187 }
1188
1189 return 1; /* unknown type; might be path expression */
1190}
1191
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192/*[clinic input]
1193_elementtree.Element.extend
1194
1195 elements: object
1196 /
1197
1198[clinic start generated code]*/
1199
1200static PyObject *
1201_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1202/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001203{
1204 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206
Serhiy Storchakacb985562015-05-04 15:32:48 +03001207 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208 if (!seq) {
1209 PyErr_Format(
1210 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001211 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212 );
1213 return NULL;
1214 }
1215
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001216 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001217 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001219 if (element_add_subelement(self, element) < 0) {
1220 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001222 return NULL;
1223 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001224 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001225 }
1226
1227 Py_DECREF(seq);
1228
1229 Py_RETURN_NONE;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.find
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001246 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001247
Serhiy Storchakacb985562015-05-04 15:32:48 +03001248 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001249 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001250 return _PyObject_CallMethodIdObjArgs(
1251 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001253 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 if (!self->extra)
1256 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001257
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001258 for (i = 0; i < self->extra->length; i++) {
1259 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001260 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001261 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001262 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001263 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001264 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 Py_DECREF(item);
1267 if (rc < 0)
1268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 }
1270
1271 Py_RETURN_NONE;
1272}
1273
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274/*[clinic input]
1275_elementtree.Element.findtext
1276
1277 path: object
1278 default: object = None
1279 namespaces: object = None
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1285 PyObject *default_value,
1286 PyObject *namespaces)
1287/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001289 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001290 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001291 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001292
Serhiy Storchakacb985562015-05-04 15:32:48 +03001293 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001294 return _PyObject_CallMethodIdObjArgs(
1295 st->elementpath_obj, &PyId_findtext,
1296 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297 );
1298
1299 if (!self->extra) {
1300 Py_INCREF(default_value);
1301 return default_value;
1302 }
1303
1304 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001305 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001306 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001307 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001308 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001309 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001310 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001311 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001312 if (text == Py_None) {
1313 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001314 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001315 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001316 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001317 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 return text;
1319 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001320 Py_DECREF(item);
1321 if (rc < 0)
1322 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 }
1324
1325 Py_INCREF(default_value);
1326 return default_value;
1327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.findall
1331
1332 path: object
1333 namespaces: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1339 PyObject *namespaces)
1340/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001342 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001344 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001345
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001346 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001347 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001348 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001349 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001351 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352
1353 out = PyList_New(0);
1354 if (!out)
1355 return NULL;
1356
1357 if (!self->extra)
1358 return out;
1359
1360 for (i = 0; i < self->extra->length; i++) {
1361 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001362 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001363 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001364 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001365 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001366 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1367 Py_DECREF(item);
1368 Py_DECREF(out);
1369 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001371 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 }
1373
1374 return out;
1375}
1376
Serhiy Storchakacb985562015-05-04 15:32:48 +03001377/*[clinic input]
1378_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001379
Serhiy Storchakacb985562015-05-04 15:32:48 +03001380 path: object
1381 namespaces: object = None
1382
1383[clinic start generated code]*/
1384
1385static PyObject *
1386_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1387 PyObject *namespaces)
1388/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1389{
1390 PyObject* tag = path;
1391 _Py_IDENTIFIER(iterfind);
1392 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Victor Stinnerf5616342016-12-09 15:26:00 +01001394 return _PyObject_CallMethodIdObjArgs(
1395 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396}
1397
Serhiy Storchakacb985562015-05-04 15:32:48 +03001398/*[clinic input]
1399_elementtree.Element.get
1400
1401 key: object
1402 default: object = None
1403
1404[clinic start generated code]*/
1405
1406static PyObject *
1407_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1408 PyObject *default_value)
1409/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410{
1411 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412
1413 if (!self->extra || self->extra->attrib == Py_None)
1414 value = default_value;
1415 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001416 value = PyDict_GetItemWithError(self->extra->attrib, key);
1417 if (!value) {
1418 if (PyErr_Occurred()) {
1419 return NULL;
1420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001422 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001423 }
1424
1425 Py_INCREF(value);
1426 return value;
1427}
1428
Eli Bendersky64d11e62012-06-15 07:42:50 +03001429static PyObject *
1430create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1431
1432
Serhiy Storchakacb985562015-05-04 15:32:48 +03001433/*[clinic input]
1434_elementtree.Element.iter
1435
1436 tag: object = None
1437
1438[clinic start generated code]*/
1439
Eli Bendersky64d11e62012-06-15 07:42:50 +03001440static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001441_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1442/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001443{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001444 if (PyUnicode_Check(tag)) {
1445 if (PyUnicode_READY(tag) < 0)
1446 return NULL;
1447 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1448 tag = Py_None;
1449 }
1450 else if (PyBytes_Check(tag)) {
1451 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1452 tag = Py_None;
1453 }
1454
Eli Bendersky64d11e62012-06-15 07:42:50 +03001455 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001456}
1457
1458
Serhiy Storchakacb985562015-05-04 15:32:48 +03001459/*[clinic input]
1460_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001461
Serhiy Storchakacb985562015-05-04 15:32:48 +03001462[clinic start generated code]*/
1463
1464static PyObject *
1465_elementtree_Element_itertext_impl(ElementObject *self)
1466/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1467{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001468 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469}
1470
Eli Bendersky64d11e62012-06-15 07:42:50 +03001471
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001472static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001473element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001474{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001475 ElementObject* self = (ElementObject*) self_;
1476
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001477 if (!self->extra || index < 0 || index >= self->extra->length) {
1478 PyErr_SetString(
1479 PyExc_IndexError,
1480 "child index out of range"
1481 );
1482 return NULL;
1483 }
1484
1485 Py_INCREF(self->extra->children[index]);
1486 return self->extra->children[index];
1487}
1488
Serhiy Storchakacb985562015-05-04 15:32:48 +03001489/*[clinic input]
1490_elementtree.Element.insert
1491
1492 index: Py_ssize_t
1493 subelement: object(subclass_of='&Element_Type')
1494 /
1495
1496[clinic start generated code]*/
1497
1498static PyObject *
1499_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1500 PyObject *subelement)
1501/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001504
Victor Stinner5f0af232013-07-11 23:01:36 +02001505 if (!self->extra) {
1506 if (create_extra(self, NULL) < 0)
1507 return NULL;
1508 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001510 if (index < 0) {
1511 index += self->extra->length;
1512 if (index < 0)
1513 index = 0;
1514 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 if (index > self->extra->length)
1516 index = self->extra->length;
1517
1518 if (element_resize(self, 1) < 0)
1519 return NULL;
1520
1521 for (i = self->extra->length; i > index; i--)
1522 self->extra->children[i] = self->extra->children[i-1];
1523
Serhiy Storchakacb985562015-05-04 15:32:48 +03001524 Py_INCREF(subelement);
1525 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526
1527 self->extra->length++;
1528
1529 Py_RETURN_NONE;
1530}
1531
Serhiy Storchakacb985562015-05-04 15:32:48 +03001532/*[clinic input]
1533_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534
Serhiy Storchakacb985562015-05-04 15:32:48 +03001535[clinic start generated code]*/
1536
1537static PyObject *
1538_elementtree_Element_items_impl(ElementObject *self)
1539/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1540{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541 if (!self->extra || self->extra->attrib == Py_None)
1542 return PyList_New(0);
1543
1544 return PyDict_Items(self->extra->attrib);
1545}
1546
Serhiy Storchakacb985562015-05-04 15:32:48 +03001547/*[clinic input]
1548_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549
Serhiy Storchakacb985562015-05-04 15:32:48 +03001550[clinic start generated code]*/
1551
1552static PyObject *
1553_elementtree_Element_keys_impl(ElementObject *self)
1554/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1555{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001556 if (!self->extra || self->extra->attrib == Py_None)
1557 return PyList_New(0);
1558
1559 return PyDict_Keys(self->extra->attrib);
1560}
1561
Martin v. Löwis18e16552006-02-15 17:27:45 +00001562static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563element_length(ElementObject* self)
1564{
1565 if (!self->extra)
1566 return 0;
1567
1568 return self->extra->length;
1569}
1570
Serhiy Storchakacb985562015-05-04 15:32:48 +03001571/*[clinic input]
1572_elementtree.Element.makeelement
1573
1574 tag: object
1575 attrib: object
1576 /
1577
1578[clinic start generated code]*/
1579
1580static PyObject *
1581_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1582 PyObject *attrib)
1583/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584{
1585 PyObject* elem;
1586
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587 attrib = PyDict_Copy(attrib);
1588 if (!attrib)
1589 return NULL;
1590
Eli Bendersky092af1f2012-03-04 07:14:03 +02001591 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592
1593 Py_DECREF(attrib);
1594
1595 return elem;
1596}
1597
Serhiy Storchakacb985562015-05-04 15:32:48 +03001598/*[clinic input]
1599_elementtree.Element.remove
1600
1601 subelement: object(subclass_of='&Element_Type')
1602 /
1603
1604[clinic start generated code]*/
1605
1606static PyObject *
1607_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1608/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001610 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001611 int rc;
1612 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614 if (!self->extra) {
1615 /* element has no children, so raise exception */
1616 PyErr_SetString(
1617 PyExc_ValueError,
1618 "list.remove(x): x not in list"
1619 );
1620 return NULL;
1621 }
1622
1623 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001624 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001626 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001627 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001629 if (rc < 0)
1630 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 }
1632
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001633 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001634 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 PyErr_SetString(
1636 PyExc_ValueError,
1637 "list.remove(x): x not in list"
1638 );
1639 return NULL;
1640 }
1641
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001642 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643
1644 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645 for (; i < self->extra->length; i++)
1646 self->extra->children[i] = self->extra->children[i+1];
1647
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001648 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649 Py_RETURN_NONE;
1650}
1651
1652static PyObject*
1653element_repr(ElementObject* self)
1654{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001655 int status;
1656
1657 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001658 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001659
1660 status = Py_ReprEnter((PyObject *)self);
1661 if (status == 0) {
1662 PyObject *res;
1663 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1664 Py_ReprLeave((PyObject *)self);
1665 return res;
1666 }
1667 if (status > 0)
1668 PyErr_Format(PyExc_RuntimeError,
1669 "reentrant call inside %s.__repr__",
1670 Py_TYPE(self)->tp_name);
1671 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672}
1673
Serhiy Storchakacb985562015-05-04 15:32:48 +03001674/*[clinic input]
1675_elementtree.Element.set
1676
1677 key: object
1678 value: object
1679 /
1680
1681[clinic start generated code]*/
1682
1683static PyObject *
1684_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1685 PyObject *value)
1686/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001687{
1688 PyObject* attrib;
1689
Victor Stinner5f0af232013-07-11 23:01:36 +02001690 if (!self->extra) {
1691 if (create_extra(self, NULL) < 0)
1692 return NULL;
1693 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694
1695 attrib = element_get_attrib(self);
1696 if (!attrib)
1697 return NULL;
1698
1699 if (PyDict_SetItem(attrib, key, value) < 0)
1700 return NULL;
1701
1702 Py_RETURN_NONE;
1703}
1704
1705static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001706element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001708 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001709 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710 PyObject* old;
1711
1712 if (!self->extra || index < 0 || index >= self->extra->length) {
1713 PyErr_SetString(
1714 PyExc_IndexError,
1715 "child assignment index out of range");
1716 return -1;
1717 }
1718
1719 old = self->extra->children[index];
1720
1721 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001722 if (!Element_Check(item)) {
1723 raise_type_error(item);
1724 return -1;
1725 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001726 Py_INCREF(item);
1727 self->extra->children[index] = item;
1728 } else {
1729 self->extra->length--;
1730 for (i = index; i < self->extra->length; i++)
1731 self->extra->children[i] = self->extra->children[i+1];
1732 }
1733
1734 Py_DECREF(old);
1735
1736 return 0;
1737}
1738
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001739static PyObject*
1740element_subscr(PyObject* self_, PyObject* item)
1741{
1742 ElementObject* self = (ElementObject*) self_;
1743
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744 if (PyIndex_Check(item)) {
1745 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746
1747 if (i == -1 && PyErr_Occurred()) {
1748 return NULL;
1749 }
1750 if (i < 0 && self->extra)
1751 i += self->extra->length;
1752 return element_getitem(self_, i);
1753 }
1754 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001755 Py_ssize_t start, stop, step, slicelen, i;
1756 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001757 PyObject* list;
1758
1759 if (!self->extra)
1760 return PyList_New(0);
1761
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001762 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763 return NULL;
1764 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001765 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1766 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767
1768 if (slicelen <= 0)
1769 return PyList_New(0);
1770 else {
1771 list = PyList_New(slicelen);
1772 if (!list)
1773 return NULL;
1774
1775 for (cur = start, i = 0; i < slicelen;
1776 cur += step, i++) {
1777 PyObject* item = self->extra->children[cur];
1778 Py_INCREF(item);
1779 PyList_SET_ITEM(list, i, item);
1780 }
1781
1782 return list;
1783 }
1784 }
1785 else {
1786 PyErr_SetString(PyExc_TypeError,
1787 "element indices must be integers");
1788 return NULL;
1789 }
1790}
1791
1792static int
1793element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1794{
1795 ElementObject* self = (ElementObject*) self_;
1796
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001797 if (PyIndex_Check(item)) {
1798 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799
1800 if (i == -1 && PyErr_Occurred()) {
1801 return -1;
1802 }
1803 if (i < 0 && self->extra)
1804 i += self->extra->length;
1805 return element_setitem(self_, i, value);
1806 }
1807 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001808 Py_ssize_t start, stop, step, slicelen, newlen, i;
1809 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001810
1811 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001812 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001813
Victor Stinner5f0af232013-07-11 23:01:36 +02001814 if (!self->extra) {
1815 if (create_extra(self, NULL) < 0)
1816 return -1;
1817 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001818
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001819 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 return -1;
1821 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001822 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1823 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001824
Eli Bendersky865756a2012-03-09 13:38:15 +02001825 if (value == NULL) {
1826 /* Delete slice */
1827 size_t cur;
1828 Py_ssize_t i;
1829
1830 if (slicelen <= 0)
1831 return 0;
1832
1833 /* Since we're deleting, the direction of the range doesn't matter,
1834 * so for simplicity make it always ascending.
1835 */
1836 if (step < 0) {
1837 stop = start + 1;
1838 start = stop + step * (slicelen - 1) - 1;
1839 step = -step;
1840 }
1841
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001842 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001843
1844 /* recycle is a list that will contain all the children
1845 * scheduled for removal.
1846 */
1847 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001848 return -1;
1849 }
1850
1851 /* This loop walks over all the children that have to be deleted,
1852 * with cur pointing at them. num_moved is the amount of children
1853 * until the next deleted child that have to be "shifted down" to
1854 * occupy the deleted's places.
1855 * Note that in the ith iteration, shifting is done i+i places down
1856 * because i children were already removed.
1857 */
1858 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1859 /* Compute how many children have to be moved, clipping at the
1860 * list end.
1861 */
1862 Py_ssize_t num_moved = step - 1;
1863 if (cur + step >= (size_t)self->extra->length) {
1864 num_moved = self->extra->length - cur - 1;
1865 }
1866
1867 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1868
1869 memmove(
1870 self->extra->children + cur - i,
1871 self->extra->children + cur + 1,
1872 num_moved * sizeof(PyObject *));
1873 }
1874
1875 /* Leftover "tail" after the last removed child */
1876 cur = start + (size_t)slicelen * step;
1877 if (cur < (size_t)self->extra->length) {
1878 memmove(
1879 self->extra->children + cur - slicelen,
1880 self->extra->children + cur,
1881 (self->extra->length - cur) * sizeof(PyObject *));
1882 }
1883
1884 self->extra->length -= slicelen;
1885
1886 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001887 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001888 return 0;
1889 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001890
1891 /* A new slice is actually being assigned */
1892 seq = PySequence_Fast(value, "");
1893 if (!seq) {
1894 PyErr_Format(
1895 PyExc_TypeError,
1896 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1897 );
1898 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001899 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001900 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001901
1902 if (step != 1 && newlen != slicelen)
1903 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001904 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001905 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001906 "attempt to assign sequence of size %zd "
1907 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001908 newlen, slicelen
1909 );
1910 return -1;
1911 }
1912
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001913 /* Resize before creating the recycle bin, to prevent refleaks. */
1914 if (newlen > slicelen) {
1915 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001916 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001917 return -1;
1918 }
1919 }
1920
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001921 for (i = 0; i < newlen; i++) {
1922 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1923 if (!Element_Check(element)) {
1924 raise_type_error(element);
1925 Py_DECREF(seq);
1926 return -1;
1927 }
1928 }
1929
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001930 if (slicelen > 0) {
1931 /* to avoid recursive calls to this method (via decref), move
1932 old items to the recycle bin here, and get rid of them when
1933 we're done modifying the element */
1934 recycle = PyList_New(slicelen);
1935 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001936 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001937 return -1;
1938 }
1939 for (cur = start, i = 0; i < slicelen;
1940 cur += step, i++)
1941 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1942 }
1943
1944 if (newlen < slicelen) {
1945 /* delete slice */
1946 for (i = stop; i < self->extra->length; i++)
1947 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1948 } else if (newlen > slicelen) {
1949 /* insert slice */
1950 for (i = self->extra->length-1; i >= stop; i--)
1951 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1952 }
1953
1954 /* replace the slice */
1955 for (cur = start, i = 0; i < newlen;
1956 cur += step, i++) {
1957 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1958 Py_INCREF(element);
1959 self->extra->children[cur] = element;
1960 }
1961
1962 self->extra->length += newlen - slicelen;
1963
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001964 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001965
1966 /* discard the recycle bin, and everything in it */
1967 Py_XDECREF(recycle);
1968
1969 return 0;
1970 }
1971 else {
1972 PyErr_SetString(PyExc_TypeError,
1973 "element indices must be integers");
1974 return -1;
1975 }
1976}
1977
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001978static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001979element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001980{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001981 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001982 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001983 return res;
1984}
1985
Serhiy Storchakadde08152015-11-25 15:28:13 +02001986static PyObject*
1987element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001988{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001989 PyObject *res = element_get_text(self);
1990 Py_XINCREF(res);
1991 return res;
1992}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001993
Serhiy Storchakadde08152015-11-25 15:28:13 +02001994static PyObject*
1995element_tail_getter(ElementObject *self, void *closure)
1996{
1997 PyObject *res = element_get_tail(self);
1998 Py_XINCREF(res);
1999 return res;
2000}
2001
2002static PyObject*
2003element_attrib_getter(ElementObject *self, void *closure)
2004{
2005 PyObject *res;
2006 if (!self->extra) {
2007 if (create_extra(self, NULL) < 0)
2008 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002009 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002010 res = element_get_attrib(self);
2011 Py_XINCREF(res);
2012 return res;
2013}
Victor Stinner4d463432013-07-11 23:05:03 +02002014
Serhiy Storchakadde08152015-11-25 15:28:13 +02002015/* macro for setter validation */
2016#define _VALIDATE_ATTR_VALUE(V) \
2017 if ((V) == NULL) { \
2018 PyErr_SetString( \
2019 PyExc_AttributeError, \
2020 "can't delete element attribute"); \
2021 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002022 }
2023
Serhiy Storchakadde08152015-11-25 15:28:13 +02002024static int
2025element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2026{
2027 _VALIDATE_ATTR_VALUE(value);
2028 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002029 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002030 return 0;
2031}
2032
2033static int
2034element_text_setter(ElementObject *self, PyObject *value, void *closure)
2035{
2036 _VALIDATE_ATTR_VALUE(value);
2037 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002038 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002039 return 0;
2040}
2041
2042static int
2043element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2044{
2045 _VALIDATE_ATTR_VALUE(value);
2046 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002047 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002048 return 0;
2049}
2050
2051static int
2052element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2053{
2054 _VALIDATE_ATTR_VALUE(value);
2055 if (!self->extra) {
2056 if (create_extra(self, NULL) < 0)
2057 return -1;
2058 }
2059 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002060 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002061 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002062}
2063
2064static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002065 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002066 0, /* sq_concat */
2067 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002068 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002069 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002070 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002071 0,
2072};
2073
Eli Bendersky64d11e62012-06-15 07:42:50 +03002074/******************************* Element iterator ****************************/
2075
2076/* ElementIterObject represents the iteration state over an XML element in
2077 * pre-order traversal. To keep track of which sub-element should be returned
2078 * next, a stack of parents is maintained. This is a standard stack-based
2079 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002080 * The stack is managed using a continuous array.
2081 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082 * the current one is exhausted, and the next child to examine in that parent.
2083 */
2084typedef struct ParentLocator_t {
2085 ElementObject *parent;
2086 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002087} ParentLocator;
2088
2089typedef struct {
2090 PyObject_HEAD
2091 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002092 Py_ssize_t parent_stack_used;
2093 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002094 ElementObject *root_element;
2095 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002096 int gettext;
2097} ElementIterObject;
2098
2099
2100static void
2101elementiter_dealloc(ElementIterObject *it)
2102{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002103 Py_ssize_t i = it->parent_stack_used;
2104 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002105 /* bpo-31095: UnTrack is needed before calling any callbacks */
2106 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002107 while (i--)
2108 Py_XDECREF(it->parent_stack[i].parent);
2109 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002110
2111 Py_XDECREF(it->sought_tag);
2112 Py_XDECREF(it->root_element);
2113
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114 PyObject_GC_Del(it);
2115}
2116
2117static int
2118elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2119{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002120 Py_ssize_t i = it->parent_stack_used;
2121 while (i--)
2122 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123
2124 Py_VISIT(it->root_element);
2125 Py_VISIT(it->sought_tag);
2126 return 0;
2127}
2128
2129/* Helper function for elementiter_next. Add a new parent to the parent stack.
2130 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002131static int
2132parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 ParentLocator *item;
2135
2136 if (it->parent_stack_used >= it->parent_stack_size) {
2137 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2138 ParentLocator *parent_stack = it->parent_stack;
2139 PyMem_Resize(parent_stack, ParentLocator, new_size);
2140 if (parent_stack == NULL)
2141 return -1;
2142 it->parent_stack = parent_stack;
2143 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002145 item = it->parent_stack + it->parent_stack_used++;
2146 Py_INCREF(parent);
2147 item->parent = parent;
2148 item->child_index = 0;
2149 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002150}
2151
2152static PyObject *
2153elementiter_next(ElementIterObject *it)
2154{
2155 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002156 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002157 * A short note on gettext: this function serves both the iter() and
2158 * itertext() methods to avoid code duplication. However, there are a few
2159 * small differences in the way these iterations work. Namely:
2160 * - itertext() only yields text from nodes that have it, and continues
2161 * iterating when a node doesn't have text (so it doesn't return any
2162 * node like iter())
2163 * - itertext() also has to handle tail, after finishing with all the
2164 * children of a node.
2165 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002166 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002167 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002168 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002169
2170 while (1) {
2171 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002172 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002173 * iterator is exhausted.
2174 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002175 if (!it->parent_stack_used) {
2176 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002177 PyErr_SetNone(PyExc_StopIteration);
2178 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002179 }
2180
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002181 elem = it->root_element; /* steals a reference */
2182 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002183 }
2184 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002185 /* See if there are children left to traverse in the current parent. If
2186 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002187 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002188 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2189 Py_ssize_t child_index = item->child_index;
2190 ElementObjectExtra *extra;
2191 elem = item->parent;
2192 extra = elem->extra;
2193 if (!extra || child_index >= extra->length) {
2194 it->parent_stack_used--;
2195 /* Note that extra condition on it->parent_stack_used here;
2196 * this is because itertext() is supposed to only return *inner*
2197 * text, not text following the element it began iteration with.
2198 */
2199 if (it->gettext && it->parent_stack_used) {
2200 text = element_get_tail(elem);
2201 goto gettext;
2202 }
2203 Py_DECREF(elem);
2204 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002205 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002206
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002207 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002208 elem = (ElementObject *)extra->children[child_index];
2209 item->child_index++;
2210 Py_INCREF(elem);
2211 }
2212
2213 if (parent_stack_push_new(it, elem) < 0) {
2214 Py_DECREF(elem);
2215 PyErr_NoMemory();
2216 return NULL;
2217 }
2218 if (it->gettext) {
2219 text = element_get_text(elem);
2220 goto gettext;
2221 }
2222
2223 if (it->sought_tag == Py_None)
2224 return (PyObject *)elem;
2225
2226 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2227 if (rc > 0)
2228 return (PyObject *)elem;
2229
2230 Py_DECREF(elem);
2231 if (rc < 0)
2232 return NULL;
2233 continue;
2234
2235gettext:
2236 if (!text) {
2237 Py_DECREF(elem);
2238 return NULL;
2239 }
2240 if (text == Py_None) {
2241 Py_DECREF(elem);
2242 }
2243 else {
2244 Py_INCREF(text);
2245 Py_DECREF(elem);
2246 rc = PyObject_IsTrue(text);
2247 if (rc > 0)
2248 return text;
2249 Py_DECREF(text);
2250 if (rc < 0)
2251 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002252 }
2253 }
2254
2255 return NULL;
2256}
2257
2258
2259static PyTypeObject ElementIter_Type = {
2260 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002261 /* Using the module's name since the pure-Python implementation does not
2262 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002263 "_elementtree._element_iterator", /* tp_name */
2264 sizeof(ElementIterObject), /* tp_basicsize */
2265 0, /* tp_itemsize */
2266 /* methods */
2267 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002268 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002269 0, /* tp_getattr */
2270 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002271 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002272 0, /* tp_repr */
2273 0, /* tp_as_number */
2274 0, /* tp_as_sequence */
2275 0, /* tp_as_mapping */
2276 0, /* tp_hash */
2277 0, /* tp_call */
2278 0, /* tp_str */
2279 0, /* tp_getattro */
2280 0, /* tp_setattro */
2281 0, /* tp_as_buffer */
2282 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2283 0, /* tp_doc */
2284 (traverseproc)elementiter_traverse, /* tp_traverse */
2285 0, /* tp_clear */
2286 0, /* tp_richcompare */
2287 0, /* tp_weaklistoffset */
2288 PyObject_SelfIter, /* tp_iter */
2289 (iternextfunc)elementiter_next, /* tp_iternext */
2290 0, /* tp_methods */
2291 0, /* tp_members */
2292 0, /* tp_getset */
2293 0, /* tp_base */
2294 0, /* tp_dict */
2295 0, /* tp_descr_get */
2296 0, /* tp_descr_set */
2297 0, /* tp_dictoffset */
2298 0, /* tp_init */
2299 0, /* tp_alloc */
2300 0, /* tp_new */
2301};
2302
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002303#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002304
2305static PyObject *
2306create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2307{
2308 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002309
2310 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2311 if (!it)
2312 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002313
Victor Stinner4d463432013-07-11 23:05:03 +02002314 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002315 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002316 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002317 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002318 it->root_element = self;
2319
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002320 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002321 if (it->parent_stack == NULL) {
2322 Py_DECREF(it);
2323 PyErr_NoMemory();
2324 return NULL;
2325 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002326 it->parent_stack_used = 0;
2327 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002328
Victor Stinner1b184552019-10-08 00:09:31 +02002329 PyObject_GC_Track(it);
2330
Eli Bendersky64d11e62012-06-15 07:42:50 +03002331 return (PyObject *)it;
2332}
2333
2334
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335/* ==================================================================== */
2336/* the tree builder type */
2337
2338typedef struct {
2339 PyObject_HEAD
2340
Eli Bendersky58d548d2012-05-29 15:45:16 +03002341 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002342
Antoine Pitrouee329312012-10-04 19:53:29 +02002343 PyObject *this; /* current node */
2344 PyObject *last; /* most recently created node */
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002345 PyObject *last_for_tail; /* most recently created node that takes a tail */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346
Eli Bendersky58d548d2012-05-29 15:45:16 +03002347 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002348
Eli Bendersky58d548d2012-05-29 15:45:16 +03002349 PyObject *stack; /* element stack */
2350 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002351
Eli Bendersky48d358b2012-05-30 17:57:50 +03002352 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002353 PyObject *comment_factory;
2354 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002355
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002357 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002358 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2359 PyObject *end_event_obj;
2360 PyObject *start_ns_event_obj;
2361 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002362 PyObject *comment_event_obj;
2363 PyObject *pi_event_obj;
2364
2365 char insert_comments;
2366 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002367} TreeBuilderObject;
2368
Christian Heimes90aa7642007-12-19 02:45:37 +00002369#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370
2371/* -------------------------------------------------------------------- */
2372/* constructor and destructor */
2373
Eli Bendersky58d548d2012-05-29 15:45:16 +03002374static PyObject *
2375treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002376{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002377 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2378 if (t != NULL) {
2379 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380
Eli Bendersky58d548d2012-05-29 15:45:16 +03002381 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002382 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002384 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385
Eli Bendersky58d548d2012-05-29 15:45:16 +03002386 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002388 t->comment_factory = NULL;
2389 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 t->stack = PyList_New(20);
2391 if (!t->stack) {
2392 Py_DECREF(t->this);
2393 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002394 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002395 return NULL;
2396 }
2397 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002399 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 t->start_event_obj = t->end_event_obj = NULL;
2401 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002402 t->comment_event_obj = t->pi_event_obj = NULL;
2403 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002404 }
2405 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406}
2407
Serhiy Storchakacb985562015-05-04 15:32:48 +03002408/*[clinic input]
2409_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002410
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002411 element_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002412 *
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002413 comment_factory: object = None
2414 pi_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002415 insert_comments: bool = False
2416 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002417
2418[clinic start generated code]*/
2419
2420static int
2421_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002422 PyObject *element_factory,
2423 PyObject *comment_factory,
2424 PyObject *pi_factory,
2425 int insert_comments, int insert_pis)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002426/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002427{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002428 if (element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002429 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002430 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002431 } else {
2432 Py_CLEAR(self->element_factory);
2433 }
2434
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002435 if (comment_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002436 elementtreestate *st = ET_STATE_GLOBAL;
2437 comment_factory = st->comment_factory;
2438 }
2439 if (comment_factory) {
2440 Py_INCREF(comment_factory);
2441 Py_XSETREF(self->comment_factory, comment_factory);
2442 self->insert_comments = insert_comments;
2443 } else {
2444 Py_CLEAR(self->comment_factory);
2445 self->insert_comments = 0;
2446 }
2447
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002448 if (pi_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002449 elementtreestate *st = ET_STATE_GLOBAL;
2450 pi_factory = st->pi_factory;
2451 }
2452 if (pi_factory) {
2453 Py_INCREF(pi_factory);
2454 Py_XSETREF(self->pi_factory, pi_factory);
2455 self->insert_pis = insert_pis;
2456 } else {
2457 Py_CLEAR(self->pi_factory);
2458 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002459 }
2460
Eli Bendersky58d548d2012-05-29 15:45:16 +03002461 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462}
2463
Eli Bendersky48d358b2012-05-30 17:57:50 +03002464static int
2465treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2466{
Stefan Behnel43851a22019-05-01 21:20:38 +02002467 Py_VISIT(self->pi_event_obj);
2468 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002469 Py_VISIT(self->end_ns_event_obj);
2470 Py_VISIT(self->start_ns_event_obj);
2471 Py_VISIT(self->end_event_obj);
2472 Py_VISIT(self->start_event_obj);
2473 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002474 Py_VISIT(self->root);
2475 Py_VISIT(self->this);
2476 Py_VISIT(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002477 Py_VISIT(self->last_for_tail);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002478 Py_VISIT(self->data);
2479 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002480 Py_VISIT(self->pi_factory);
2481 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002482 Py_VISIT(self->element_factory);
2483 return 0;
2484}
2485
2486static int
2487treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488{
Stefan Behnel43851a22019-05-01 21:20:38 +02002489 Py_CLEAR(self->pi_event_obj);
2490 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002491 Py_CLEAR(self->end_ns_event_obj);
2492 Py_CLEAR(self->start_ns_event_obj);
2493 Py_CLEAR(self->end_event_obj);
2494 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002495 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002496 Py_CLEAR(self->stack);
2497 Py_CLEAR(self->data);
2498 Py_CLEAR(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002499 Py_CLEAR(self->last_for_tail);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002500 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002501 Py_CLEAR(self->pi_factory);
2502 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002503 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002504 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002505 return 0;
2506}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507
Eli Bendersky48d358b2012-05-30 17:57:50 +03002508static void
2509treebuilder_dealloc(TreeBuilderObject *self)
2510{
2511 PyObject_GC_UnTrack(self);
2512 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002513 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514}
2515
2516/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002517/* helpers for handling of arbitrary element-like objects */
2518
Stefan Behnel43851a22019-05-01 21:20:38 +02002519/*[clinic input]
2520_elementtree._set_factories
2521
2522 comment_factory: object
2523 pi_factory: object
2524 /
2525
2526Change the factories used to create comments and processing instructions.
2527
2528For internal use only.
2529[clinic start generated code]*/
2530
2531static PyObject *
2532_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2533 PyObject *pi_factory)
2534/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2535{
2536 elementtreestate *st = ET_STATE_GLOBAL;
2537 PyObject *old;
2538
2539 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2540 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2541 Py_TYPE(comment_factory)->tp_name);
2542 return NULL;
2543 }
2544 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2545 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2546 Py_TYPE(pi_factory)->tp_name);
2547 return NULL;
2548 }
2549
2550 old = PyTuple_Pack(2,
2551 st->comment_factory ? st->comment_factory : Py_None,
2552 st->pi_factory ? st->pi_factory : Py_None);
2553
2554 if (comment_factory == Py_None) {
2555 Py_CLEAR(st->comment_factory);
2556 } else {
2557 Py_INCREF(comment_factory);
2558 Py_XSETREF(st->comment_factory, comment_factory);
2559 }
2560 if (pi_factory == Py_None) {
2561 Py_CLEAR(st->pi_factory);
2562 } else {
2563 Py_INCREF(pi_factory);
2564 Py_XSETREF(st->pi_factory, pi_factory);
2565 }
2566
2567 return old;
2568}
2569
Antoine Pitrouee329312012-10-04 19:53:29 +02002570static int
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002571treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2572 PyObject **dest, _Py_Identifier *name)
Antoine Pitrouee329312012-10-04 19:53:29 +02002573{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002574 /* Fast paths for the "almost always" cases. */
Antoine Pitrouee329312012-10-04 19:53:29 +02002575 if (Element_CheckExact(element)) {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002576 PyObject *dest_obj = JOIN_OBJ(*dest);
2577 if (dest_obj == Py_None) {
2578 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2579 *data = NULL;
2580 Py_DECREF(dest_obj);
2581 return 0;
2582 }
2583 else if (JOIN_GET(*dest)) {
2584 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2585 return -1;
2586 }
2587 Py_CLEAR(*data);
2588 return 0;
2589 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002590 }
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002591
2592 /* Fallback for the non-Element / non-trivial cases. */
2593 {
Antoine Pitrouee329312012-10-04 19:53:29 +02002594 int r;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002595 PyObject* joined;
2596 PyObject* previous = _PyObject_GetAttrId(element, name);
2597 if (!previous)
Antoine Pitrouee329312012-10-04 19:53:29 +02002598 return -1;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002599 joined = list_join(*data);
2600 if (!joined) {
2601 Py_DECREF(previous);
2602 return -1;
2603 }
2604 if (previous != Py_None) {
2605 PyObject *tmp = PyNumber_Add(previous, joined);
2606 Py_DECREF(joined);
2607 Py_DECREF(previous);
2608 if (!tmp)
2609 return -1;
2610 joined = tmp;
2611 } else {
2612 Py_DECREF(previous);
2613 }
2614
Antoine Pitrouee329312012-10-04 19:53:29 +02002615 r = _PyObject_SetAttrId(element, name, joined);
2616 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002617 if (r < 0)
2618 return -1;
2619 Py_CLEAR(*data);
2620 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002621 }
2622}
2623
Serhiy Storchaka576def02017-03-30 09:47:31 +03002624LOCAL(int)
2625treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002626{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002627 if (!self->data) {
2628 return 0;
2629 }
2630
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002631 if (!self->last_for_tail) {
2632 PyObject *element = self->last;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002633 _Py_IDENTIFIER(text);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002634 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002635 element, &self->data,
2636 &((ElementObject *) element)->text, &PyId_text);
2637 }
2638 else {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002639 PyObject *element = self->last_for_tail;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002640 _Py_IDENTIFIER(tail);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002641 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002642 element, &self->data,
2643 &((ElementObject *) element)->tail, &PyId_tail);
2644 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002645}
2646
2647static int
2648treebuilder_add_subelement(PyObject *element, PyObject *child)
2649{
2650 _Py_IDENTIFIER(append);
2651 if (Element_CheckExact(element)) {
2652 ElementObject *elem = (ElementObject *) element;
2653 return element_add_subelement(elem, child);
2654 }
2655 else {
2656 PyObject *res;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002657 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
Antoine Pitrouee329312012-10-04 19:53:29 +02002658 if (res == NULL)
2659 return -1;
2660 Py_DECREF(res);
2661 return 0;
2662 }
2663}
2664
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002665LOCAL(int)
2666treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2667 PyObject *node)
2668{
2669 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002670 PyObject *res;
2671 PyObject *event = PyTuple_Pack(2, action, node);
2672 if (event == NULL)
2673 return -1;
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002674 res = _PyObject_CallOneArg(self->events_append, event);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002675 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002676 if (res == NULL)
2677 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002678 Py_DECREF(res);
2679 }
2680 return 0;
2681}
2682
Antoine Pitrouee329312012-10-04 19:53:29 +02002683/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684/* handlers */
2685
2686LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2688 PyObject* attrib)
2689{
2690 PyObject* node;
2691 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002692 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693
Serhiy Storchaka576def02017-03-30 09:47:31 +03002694 if (treebuilder_flush_data(self) < 0) {
2695 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696 }
2697
Stefan Behnel43851a22019-05-01 21:20:38 +02002698 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002699 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002700 } else if (attrib == Py_None) {
2701 attrib = PyDict_New();
2702 if (!attrib)
2703 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002704 node = PyObject_CallFunctionObjArgs(self->element_factory,
2705 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002706 Py_DECREF(attrib);
2707 }
2708 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002709 node = PyObject_CallFunctionObjArgs(self->element_factory,
2710 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002711 }
2712 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Antoine Pitrouee329312012-10-04 19:53:29 +02002716 this = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002717 Py_CLEAR(self->last_for_tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
2719 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002720 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002721 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722 } else {
2723 if (self->root) {
2724 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002725 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726 "multiple elements on top level"
2727 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002728 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 }
2730 Py_INCREF(node);
2731 self->root = node;
2732 }
2733
2734 if (self->index < PyList_GET_SIZE(self->stack)) {
2735 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002736 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737 Py_INCREF(this);
2738 } else {
2739 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002740 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741 }
2742 self->index++;
2743
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002745 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002747 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002749 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2750 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751
2752 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002753
2754 error:
2755 Py_DECREF(node);
2756 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757}
2758
2759LOCAL(PyObject*)
2760treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2761{
2762 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002763 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002764 /* ignore calls to data before the first call to start */
2765 Py_RETURN_NONE;
2766 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767 /* store the first item as is */
2768 Py_INCREF(data); self->data = data;
2769 } else {
2770 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002771 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2772 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002773 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774 /* expat often generates single character data sections; handle
2775 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002776 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2777 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002779 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 } else if (PyList_CheckExact(self->data)) {
2781 if (PyList_Append(self->data, data) < 0)
2782 return NULL;
2783 } else {
2784 PyObject* list = PyList_New(2);
2785 if (!list)
2786 return NULL;
2787 PyList_SET_ITEM(list, 0, self->data);
2788 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2789 self->data = list;
2790 }
2791 }
2792
2793 Py_RETURN_NONE;
2794}
2795
2796LOCAL(PyObject*)
2797treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2798{
2799 PyObject* item;
2800
Serhiy Storchaka576def02017-03-30 09:47:31 +03002801 if (treebuilder_flush_data(self) < 0) {
2802 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 }
2804
2805 if (self->index == 0) {
2806 PyErr_SetString(
2807 PyExc_IndexError,
2808 "pop from empty stack"
2809 );
2810 return NULL;
2811 }
2812
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002813 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002814 self->last = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002815 Py_INCREF(self->last);
2816 Py_XSETREF(self->last_for_tail, self->last);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002817 self->index--;
2818 self->this = PyList_GET_ITEM(self->stack, self->index);
2819 Py_INCREF(self->this);
2820 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002822 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2823 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824
2825 Py_INCREF(self->last);
2826 return (PyObject*) self->last;
2827}
2828
Stefan Behnel43851a22019-05-01 21:20:38 +02002829LOCAL(PyObject*)
2830treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2831{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002832 PyObject* comment;
Stefan Behnel43851a22019-05-01 21:20:38 +02002833 PyObject* this;
2834
2835 if (treebuilder_flush_data(self) < 0) {
2836 return NULL;
2837 }
2838
2839 if (self->comment_factory) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002840 comment = _PyObject_CallOneArg(self->comment_factory, text);
Stefan Behnel43851a22019-05-01 21:20:38 +02002841 if (!comment)
2842 return NULL;
2843
2844 this = self->this;
2845 if (self->insert_comments && this != Py_None) {
2846 if (treebuilder_add_subelement(this, comment) < 0)
2847 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002848 Py_INCREF(comment);
2849 Py_XSETREF(self->last_for_tail, comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02002850 }
2851 } else {
2852 Py_INCREF(text);
2853 comment = text;
2854 }
2855
2856 if (self->events_append && self->comment_event_obj) {
2857 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2858 goto error;
2859 }
2860
2861 return comment;
2862
2863 error:
2864 Py_DECREF(comment);
2865 return NULL;
2866}
2867
2868LOCAL(PyObject*)
2869treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2870{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002871 PyObject* pi;
Stefan Behnel43851a22019-05-01 21:20:38 +02002872 PyObject* this;
2873 PyObject* stack[2] = {target, text};
2874
2875 if (treebuilder_flush_data(self) < 0) {
2876 return NULL;
2877 }
2878
2879 if (self->pi_factory) {
2880 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2881 if (!pi) {
2882 return NULL;
2883 }
2884
2885 this = self->this;
2886 if (self->insert_pis && this != Py_None) {
2887 if (treebuilder_add_subelement(this, pi) < 0)
2888 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002889 Py_INCREF(pi);
2890 Py_XSETREF(self->last_for_tail, pi);
Stefan Behnel43851a22019-05-01 21:20:38 +02002891 }
2892 } else {
2893 pi = PyTuple_Pack(2, target, text);
2894 if (!pi) {
2895 return NULL;
2896 }
2897 }
2898
2899 if (self->events_append && self->pi_event_obj) {
2900 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2901 goto error;
2902 }
2903
2904 return pi;
2905
2906 error:
2907 Py_DECREF(pi);
2908 return NULL;
2909}
2910
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002911LOCAL(PyObject*)
2912treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2913{
2914 PyObject* parcel;
2915
2916 if (self->events_append && self->start_ns_event_obj) {
2917 parcel = PyTuple_Pack(2, prefix, uri);
2918 if (!parcel) {
2919 return NULL;
2920 }
2921
2922 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2923 Py_DECREF(parcel);
2924 return NULL;
2925 }
2926 Py_DECREF(parcel);
2927 }
2928
2929 Py_RETURN_NONE;
2930}
2931
2932LOCAL(PyObject*)
2933treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2934{
2935 if (self->events_append && self->end_ns_event_obj) {
2936 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2937 return NULL;
2938 }
2939 }
2940
2941 Py_RETURN_NONE;
2942}
2943
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944/* -------------------------------------------------------------------- */
2945/* methods (in alphabetical order) */
2946
Serhiy Storchakacb985562015-05-04 15:32:48 +03002947/*[clinic input]
2948_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949
Serhiy Storchakacb985562015-05-04 15:32:48 +03002950 data: object
2951 /
2952
2953[clinic start generated code]*/
2954
2955static PyObject *
2956_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2957/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2958{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 return treebuilder_handle_data(self, data);
2960}
2961
Serhiy Storchakacb985562015-05-04 15:32:48 +03002962/*[clinic input]
2963_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964
Serhiy Storchakacb985562015-05-04 15:32:48 +03002965 tag: object
2966 /
2967
2968[clinic start generated code]*/
2969
2970static PyObject *
2971_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2972/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2973{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 return treebuilder_handle_end(self, tag);
2975}
2976
Stefan Behnel43851a22019-05-01 21:20:38 +02002977/*[clinic input]
2978_elementtree.TreeBuilder.comment
2979
2980 text: object
2981 /
2982
2983[clinic start generated code]*/
2984
2985static PyObject *
2986_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2987/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2988{
2989 return treebuilder_handle_comment(self, text);
2990}
2991
2992/*[clinic input]
2993_elementtree.TreeBuilder.pi
2994
2995 target: object
2996 text: object = None
2997 /
2998
2999[clinic start generated code]*/
3000
3001static PyObject *
3002_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3003 PyObject *text)
3004/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3005{
3006 return treebuilder_handle_pi(self, target, text);
3007}
3008
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009LOCAL(PyObject*)
3010treebuilder_done(TreeBuilderObject* self)
3011{
3012 PyObject* res;
3013
3014 /* FIXME: check stack size? */
3015
3016 if (self->root)
3017 res = self->root;
3018 else
3019 res = Py_None;
3020
3021 Py_INCREF(res);
3022 return res;
3023}
3024
Serhiy Storchakacb985562015-05-04 15:32:48 +03003025/*[clinic input]
3026_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027
Serhiy Storchakacb985562015-05-04 15:32:48 +03003028[clinic start generated code]*/
3029
3030static PyObject *
3031_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3032/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3033{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 return treebuilder_done(self);
3035}
3036
Serhiy Storchakacb985562015-05-04 15:32:48 +03003037/*[clinic input]
3038_elementtree.TreeBuilder.start
3039
3040 tag: object
3041 attrs: object = None
3042 /
3043
3044[clinic start generated code]*/
3045
3046static PyObject *
3047_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3048 PyObject *attrs)
3049/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003051 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052}
3053
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054/* ==================================================================== */
3055/* the expat interface */
3056
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003059
3060/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3061 * cached globally without being in per-module state.
3062 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003063static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065
Eli Bendersky52467b12012-06-01 07:13:08 +03003066static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3067 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3068
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069typedef struct {
3070 PyObject_HEAD
3071
3072 XML_Parser parser;
3073
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003074 PyObject *target;
3075 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003077 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003079 PyObject *handle_start_ns;
3080 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003081 PyObject *handle_start;
3082 PyObject *handle_data;
3083 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003085 PyObject *handle_comment;
3086 PyObject *handle_pi;
3087 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003088
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003089 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003090
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091} XMLParserObject;
3092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093/* helpers */
3094
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003095LOCAL(PyObject*)
3096makeuniversal(XMLParserObject* self, const char* string)
3097{
3098 /* convert a UTF-8 tag/attribute name from the expat parser
3099 to a universal name string */
3100
Antoine Pitrouc1948842012-10-01 23:40:37 +02003101 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102 PyObject* key;
3103 PyObject* value;
3104
3105 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003106 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107 if (!key)
3108 return NULL;
3109
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003110 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111
3112 if (value) {
3113 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003114 }
3115 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116 /* new name. convert to universal name, and decode as
3117 necessary */
3118
3119 PyObject* tag;
3120 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003121 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122
3123 /* look for namespace separator */
3124 for (i = 0; i < size; i++)
3125 if (string[i] == '}')
3126 break;
3127 if (i != size) {
3128 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003129 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003130 if (tag == NULL) {
3131 Py_DECREF(key);
3132 return NULL;
3133 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003134 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135 p[0] = '{';
3136 memcpy(p+1, string, size);
3137 size++;
3138 } else {
3139 /* plain name; use key as tag */
3140 Py_INCREF(key);
3141 tag = key;
3142 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003143
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003144 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003145 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003146 value = PyUnicode_DecodeUTF8(p, size, "strict");
3147 Py_DECREF(tag);
3148 if (!value) {
3149 Py_DECREF(key);
3150 return NULL;
3151 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003152
3153 /* add to names dictionary */
3154 if (PyDict_SetItem(self->names, key, value) < 0) {
3155 Py_DECREF(key);
3156 Py_DECREF(value);
3157 return NULL;
3158 }
3159 }
3160
3161 Py_DECREF(key);
3162 return value;
3163}
3164
Eli Bendersky5b77d812012-03-16 08:20:05 +02003165/* Set the ParseError exception with the given parameters.
3166 * If message is not NULL, it's used as the error string. Otherwise, the
3167 * message string is the default for the given error_code.
3168*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003169static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003170expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3171 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003172{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003173 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003174 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003175
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003176 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003177 message ? message : EXPAT(ErrorString)(error_code),
3178 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003179 if (errmsg == NULL)
3180 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003181
Jeroen Demeyer196a5302019-07-04 12:31:34 +02003182 error = _PyObject_CallOneArg(st->parseerror_obj, errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003183 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003184 if (!error)
3185 return;
3186
Eli Bendersky5b77d812012-03-16 08:20:05 +02003187 /* Add code and position attributes */
3188 code = PyLong_FromLong((long)error_code);
3189 if (!code) {
3190 Py_DECREF(error);
3191 return;
3192 }
3193 if (PyObject_SetAttrString(error, "code", code) == -1) {
3194 Py_DECREF(error);
3195 Py_DECREF(code);
3196 return;
3197 }
3198 Py_DECREF(code);
3199
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003200 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003201 if (!position) {
3202 Py_DECREF(error);
3203 return;
3204 }
3205 if (PyObject_SetAttrString(error, "position", position) == -1) {
3206 Py_DECREF(error);
3207 Py_DECREF(position);
3208 return;
3209 }
3210 Py_DECREF(position);
3211
Eli Bendersky532d03e2013-08-10 08:00:39 -07003212 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003213 Py_DECREF(error);
3214}
3215
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216/* -------------------------------------------------------------------- */
3217/* handlers */
3218
3219static void
3220expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3221 int data_len)
3222{
3223 PyObject* key;
3224 PyObject* value;
3225 PyObject* res;
3226
3227 if (data_len < 2 || data_in[0] != '&')
3228 return;
3229
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003230 if (PyErr_Occurred())
3231 return;
3232
Neal Norwitz0269b912007-08-08 06:56:02 +00003233 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 if (!key)
3235 return;
3236
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003237 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238
3239 if (value) {
3240 if (TreeBuilder_CheckExact(self->target))
3241 res = treebuilder_handle_data(
3242 (TreeBuilderObject*) self->target, value
3243 );
3244 else if (self->handle_data)
Jeroen Demeyer196a5302019-07-04 12:31:34 +02003245 res = _PyObject_CallOneArg(self->handle_data, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246 else
3247 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003249 } else if (!PyErr_Occurred()) {
3250 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003251 char message[128] = "undefined entity ";
3252 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003253 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003254 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003256 EXPAT(GetErrorColumnNumber)(self->parser),
3257 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258 );
3259 }
3260
3261 Py_DECREF(key);
3262}
3263
3264static void
3265expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3266 const XML_Char **attrib_in)
3267{
3268 PyObject* res;
3269 PyObject* tag;
3270 PyObject* attrib;
3271 int ok;
3272
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003273 if (PyErr_Occurred())
3274 return;
3275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 /* tag name */
3277 tag = makeuniversal(self, tag_in);
3278 if (!tag)
3279 return; /* parser will look for errors */
3280
3281 /* attributes */
3282 if (attrib_in[0]) {
3283 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003284 if (!attrib) {
3285 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003287 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 while (attrib_in[0] && attrib_in[1]) {
3289 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003290 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 if (!key || !value) {
3292 Py_XDECREF(value);
3293 Py_XDECREF(key);
3294 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003295 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 return;
3297 }
3298 ok = PyDict_SetItem(attrib, key, value);
3299 Py_DECREF(value);
3300 Py_DECREF(key);
3301 if (ok < 0) {
3302 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003303 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 return;
3305 }
3306 attrib_in += 2;
3307 }
3308 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003309 Py_INCREF(Py_None);
3310 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003311 }
3312
3313 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 /* shortcut */
3315 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3316 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003317 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003318 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003319 if (attrib == Py_None) {
3320 Py_DECREF(attrib);
3321 attrib = PyDict_New();
3322 if (!attrib) {
3323 Py_DECREF(tag);
3324 return;
3325 }
3326 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003327 res = PyObject_CallFunctionObjArgs(self->handle_start,
3328 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003329 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330 res = NULL;
3331
3332 Py_DECREF(tag);
3333 Py_DECREF(attrib);
3334
3335 Py_XDECREF(res);
3336}
3337
3338static void
3339expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3340 int data_len)
3341{
3342 PyObject* data;
3343 PyObject* res;
3344
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003345 if (PyErr_Occurred())
3346 return;
3347
Neal Norwitz0269b912007-08-08 06:56:02 +00003348 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003349 if (!data)
3350 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351
3352 if (TreeBuilder_CheckExact(self->target))
3353 /* shortcut */
3354 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3355 else if (self->handle_data)
Jeroen Demeyer196a5302019-07-04 12:31:34 +02003356 res = _PyObject_CallOneArg(self->handle_data, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 else
3358 res = NULL;
3359
3360 Py_DECREF(data);
3361
3362 Py_XDECREF(res);
3363}
3364
3365static void
3366expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3367{
3368 PyObject* tag;
3369 PyObject* res = NULL;
3370
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003371 if (PyErr_Occurred())
3372 return;
3373
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003374 if (TreeBuilder_CheckExact(self->target))
3375 /* shortcut */
3376 /* the standard tree builder doesn't look at the end tag */
3377 res = treebuilder_handle_end(
3378 (TreeBuilderObject*) self->target, Py_None
3379 );
3380 else if (self->handle_end) {
3381 tag = makeuniversal(self, tag_in);
3382 if (tag) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02003383 res = _PyObject_CallOneArg(self->handle_end, tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384 Py_DECREF(tag);
3385 }
3386 }
3387
3388 Py_XDECREF(res);
3389}
3390
3391static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003392expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3393 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003395 PyObject* res = NULL;
3396 PyObject* uri;
3397 PyObject* prefix;
3398 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003399
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003400 if (PyErr_Occurred())
3401 return;
3402
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003403 if (!uri_in)
3404 uri_in = "";
3405 if (!prefix_in)
3406 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003407
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003408 if (TreeBuilder_CheckExact(self->target)) {
3409 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3410 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003411
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003412 if (target->events_append && target->start_ns_event_obj) {
3413 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3414 if (!prefix)
3415 return;
3416 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3417 if (!uri) {
3418 Py_DECREF(prefix);
3419 return;
3420 }
3421
3422 res = treebuilder_handle_start_ns(target, prefix, uri);
3423 Py_DECREF(uri);
3424 Py_DECREF(prefix);
3425 }
3426 } else if (self->handle_start_ns) {
3427 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3428 if (!prefix)
3429 return;
3430 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3431 if (!uri) {
3432 Py_DECREF(prefix);
3433 return;
3434 }
3435
3436 stack[0] = prefix;
3437 stack[1] = uri;
3438 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3439 Py_DECREF(uri);
3440 Py_DECREF(prefix);
3441 }
3442
3443 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003444}
3445
3446static void
3447expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3448{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003449 PyObject *res = NULL;
3450 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003451
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003452 if (PyErr_Occurred())
3453 return;
3454
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003455 if (!prefix_in)
3456 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003457
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003458 if (TreeBuilder_CheckExact(self->target)) {
3459 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3460 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3461
3462 if (target->events_append && target->end_ns_event_obj) {
3463 res = treebuilder_handle_end_ns(target, Py_None);
3464 }
3465 } else if (self->handle_end_ns) {
3466 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3467 if (!prefix)
3468 return;
3469
Jeroen Demeyer196a5302019-07-04 12:31:34 +02003470 res = _PyObject_CallOneArg(self->handle_end_ns, prefix);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003471 Py_DECREF(prefix);
3472 }
3473
3474 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475}
3476
3477static void
3478expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3479{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003480 PyObject* comment;
3481 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003483 if (PyErr_Occurred())
3484 return;
3485
Stefan Behnel43851a22019-05-01 21:20:38 +02003486 if (TreeBuilder_CheckExact(self->target)) {
3487 /* shortcut */
3488 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3489
Neal Norwitz0269b912007-08-08 06:56:02 +00003490 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003491 if (!comment)
3492 return; /* parser will look for errors */
3493
3494 res = treebuilder_handle_comment(target, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003495 Py_XDECREF(res);
3496 Py_DECREF(comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02003497 } else if (self->handle_comment) {
3498 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3499 if (!comment)
3500 return;
3501
Jeroen Demeyer196a5302019-07-04 12:31:34 +02003502 res = _PyObject_CallOneArg(self->handle_comment, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003503 Py_XDECREF(res);
3504 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003505 }
3506}
3507
Eli Bendersky45839902013-01-13 05:14:47 -08003508static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003509expat_start_doctype_handler(XMLParserObject *self,
3510 const XML_Char *doctype_name,
3511 const XML_Char *sysid,
3512 const XML_Char *pubid,
3513 int has_internal_subset)
3514{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003515 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003516 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003517 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003518
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003519 if (PyErr_Occurred())
3520 return;
3521
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003522 doctype_name_obj = makeuniversal(self, doctype_name);
3523 if (!doctype_name_obj)
3524 return;
3525
3526 if (sysid) {
3527 sysid_obj = makeuniversal(self, sysid);
3528 if (!sysid_obj) {
3529 Py_DECREF(doctype_name_obj);
3530 return;
3531 }
3532 } else {
3533 Py_INCREF(Py_None);
3534 sysid_obj = Py_None;
3535 }
3536
3537 if (pubid) {
3538 pubid_obj = makeuniversal(self, pubid);
3539 if (!pubid_obj) {
3540 Py_DECREF(doctype_name_obj);
3541 Py_DECREF(sysid_obj);
3542 return;
3543 }
3544 } else {
3545 Py_INCREF(Py_None);
3546 pubid_obj = Py_None;
3547 }
3548
3549 /* If the target has a handler for doctype, call it. */
3550 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003551 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3552 doctype_name_obj, pubid_obj,
3553 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003554 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003555 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003556 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3557 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3558 "The doctype() method of XMLParser is ignored. "
3559 "Define doctype() method on the TreeBuilder target.",
3560 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003561 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003562 }
3563
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003564 Py_DECREF(doctype_name_obj);
3565 Py_DECREF(pubid_obj);
3566 Py_DECREF(sysid_obj);
3567}
3568
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003569static void
3570expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3571 const XML_Char* data_in)
3572{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003573 PyObject* pi_target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003574 PyObject* data;
3575 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003576 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003578 if (PyErr_Occurred())
3579 return;
3580
Stefan Behnel43851a22019-05-01 21:20:38 +02003581 if (TreeBuilder_CheckExact(self->target)) {
3582 /* shortcut */
3583 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3584
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003585 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003586 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3587 if (!pi_target)
3588 goto error;
3589 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3590 if (!data)
3591 goto error;
3592 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593 Py_XDECREF(res);
3594 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003595 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003596 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003597 } else if (self->handle_pi) {
3598 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3599 if (!pi_target)
3600 goto error;
3601 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3602 if (!data)
3603 goto error;
3604
3605 stack[0] = pi_target;
3606 stack[1] = data;
3607 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3608 Py_XDECREF(res);
3609 Py_DECREF(data);
3610 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003612
3613 return;
3614
3615 error:
3616 Py_XDECREF(pi_target);
3617 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003618}
3619
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003620/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621
Eli Bendersky52467b12012-06-01 07:13:08 +03003622static PyObject *
3623xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003624{
Eli Bendersky52467b12012-06-01 07:13:08 +03003625 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3626 if (self) {
3627 self->parser = NULL;
3628 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003629 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003630 self->handle_start = self->handle_data = self->handle_end = NULL;
3631 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003632 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003634 return (PyObject *)self;
3635}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636
scoderc8d8e152017-09-14 22:00:03 +02003637static int
3638ignore_attribute_error(PyObject *value)
3639{
3640 if (value == NULL) {
3641 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3642 return -1;
3643 }
3644 PyErr_Clear();
3645 }
3646 return 0;
3647}
3648
Serhiy Storchakacb985562015-05-04 15:32:48 +03003649/*[clinic input]
3650_elementtree.XMLParser.__init__
3651
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003652 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003653 target: object = NULL
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003654 encoding: str(accept={str, NoneType}) = None
Serhiy Storchakacb985562015-05-04 15:32:48 +03003655
3656[clinic start generated code]*/
3657
Eli Bendersky52467b12012-06-01 07:13:08 +03003658static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003659_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3660 const char *encoding)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003661/*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003662{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003663 self->entity = PyDict_New();
3664 if (!self->entity)
3665 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666
Serhiy Storchakacb985562015-05-04 15:32:48 +03003667 self->names = PyDict_New();
3668 if (!self->names) {
3669 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003670 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003672
Serhiy Storchakacb985562015-05-04 15:32:48 +03003673 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3674 if (!self->parser) {
3675 Py_CLEAR(self->entity);
3676 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003678 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003680 /* expat < 2.1.0 has no XML_SetHashSalt() */
3681 if (EXPAT(SetHashSalt) != NULL) {
3682 EXPAT(SetHashSalt)(self->parser,
3683 (unsigned long)_Py_HashSecret.expat.hashsalt);
3684 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685
Eli Bendersky52467b12012-06-01 07:13:08 +03003686 if (target) {
3687 Py_INCREF(target);
3688 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003689 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003690 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003691 Py_CLEAR(self->entity);
3692 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003693 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003695 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003696 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003697
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003698 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3699 if (ignore_attribute_error(self->handle_start_ns)) {
3700 return -1;
3701 }
3702 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3703 if (ignore_attribute_error(self->handle_end_ns)) {
3704 return -1;
3705 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003706 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003707 if (ignore_attribute_error(self->handle_start)) {
3708 return -1;
3709 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003710 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003711 if (ignore_attribute_error(self->handle_data)) {
3712 return -1;
3713 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003714 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003715 if (ignore_attribute_error(self->handle_end)) {
3716 return -1;
3717 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003718 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003719 if (ignore_attribute_error(self->handle_comment)) {
3720 return -1;
3721 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003722 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003723 if (ignore_attribute_error(self->handle_pi)) {
3724 return -1;
3725 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003726 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003727 if (ignore_attribute_error(self->handle_close)) {
3728 return -1;
3729 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003730 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003731 if (ignore_attribute_error(self->handle_doctype)) {
3732 return -1;
3733 }
Eli Bendersky45839902013-01-13 05:14:47 -08003734
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003735 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003736 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003737 if (self->handle_start_ns || self->handle_end_ns)
3738 EXPAT(SetNamespaceDeclHandler)(
3739 self->parser,
3740 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3741 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3742 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003743 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003744 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003745 (XML_StartElementHandler) expat_start_handler,
3746 (XML_EndElementHandler) expat_end_handler
3747 );
3748 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003749 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003750 (XML_DefaultHandler) expat_default_handler
3751 );
3752 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003753 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003754 (XML_CharacterDataHandler) expat_data_handler
3755 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003756 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003757 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003758 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003759 (XML_CommentHandler) expat_comment_handler
3760 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003761 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003763 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003764 (XML_ProcessingInstructionHandler) expat_pi_handler
3765 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003766 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003767 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003768 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3769 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003770 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003771 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003772 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003773 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003774
Eli Bendersky52467b12012-06-01 07:13:08 +03003775 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003776}
3777
Eli Bendersky52467b12012-06-01 07:13:08 +03003778static int
3779xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3780{
3781 Py_VISIT(self->handle_close);
3782 Py_VISIT(self->handle_pi);
3783 Py_VISIT(self->handle_comment);
3784 Py_VISIT(self->handle_end);
3785 Py_VISIT(self->handle_data);
3786 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003787 Py_VISIT(self->handle_start_ns);
3788 Py_VISIT(self->handle_end_ns);
3789 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003790
3791 Py_VISIT(self->target);
3792 Py_VISIT(self->entity);
3793 Py_VISIT(self->names);
3794
3795 return 0;
3796}
3797
3798static int
3799xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003800{
Victor Stinnere727d412017-09-18 05:29:37 -07003801 if (self->parser != NULL) {
3802 XML_Parser parser = self->parser;
3803 self->parser = NULL;
3804 EXPAT(ParserFree)(parser);
3805 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003806
Antoine Pitrouc1948842012-10-01 23:40:37 +02003807 Py_CLEAR(self->handle_close);
3808 Py_CLEAR(self->handle_pi);
3809 Py_CLEAR(self->handle_comment);
3810 Py_CLEAR(self->handle_end);
3811 Py_CLEAR(self->handle_data);
3812 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003813 Py_CLEAR(self->handle_start_ns);
3814 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003815 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003816
Antoine Pitrouc1948842012-10-01 23:40:37 +02003817 Py_CLEAR(self->target);
3818 Py_CLEAR(self->entity);
3819 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003820
Eli Bendersky52467b12012-06-01 07:13:08 +03003821 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003822}
3823
Eli Bendersky52467b12012-06-01 07:13:08 +03003824static void
3825xmlparser_dealloc(XMLParserObject* self)
3826{
3827 PyObject_GC_UnTrack(self);
3828 xmlparser_gc_clear(self);
3829 Py_TYPE(self)->tp_free((PyObject *)self);
3830}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003831
3832LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003833expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003834{
3835 int ok;
3836
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003837 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003838 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3839
3840 if (PyErr_Occurred())
3841 return NULL;
3842
3843 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003844 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003845 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003846 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003847 EXPAT(GetErrorColumnNumber)(self->parser),
3848 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003849 );
3850 return NULL;
3851 }
3852
3853 Py_RETURN_NONE;
3854}
3855
Serhiy Storchakacb985562015-05-04 15:32:48 +03003856/*[clinic input]
3857_elementtree.XMLParser.close
3858
3859[clinic start generated code]*/
3860
3861static PyObject *
3862_elementtree_XMLParser_close_impl(XMLParserObject *self)
3863/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003864{
3865 /* end feeding data to parser */
3866
3867 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003868 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003869 if (!res)
3870 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003871
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003872 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003873 Py_DECREF(res);
3874 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003875 }
3876 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003877 Py_DECREF(res);
Victor Stinner2ff58a22019-06-17 14:27:23 +02003878 return PyObject_CallNoArgs(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003879 }
3880 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003881 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003882 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003883}
3884
Serhiy Storchakacb985562015-05-04 15:32:48 +03003885/*[clinic input]
3886_elementtree.XMLParser.feed
3887
3888 data: object
3889 /
3890
3891[clinic start generated code]*/
3892
3893static PyObject *
3894_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3895/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003896{
3897 /* feed data to parser */
3898
Serhiy Storchakacb985562015-05-04 15:32:48 +03003899 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003900 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003901 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3902 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003903 return NULL;
3904 if (data_len > INT_MAX) {
3905 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3906 return NULL;
3907 }
3908 /* Explicitly set UTF-8 encoding. Return code ignored. */
3909 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003910 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003911 }
3912 else {
3913 Py_buffer view;
3914 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003915 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003916 return NULL;
3917 if (view.len > INT_MAX) {
3918 PyBuffer_Release(&view);
3919 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3920 return NULL;
3921 }
3922 res = expat_parse(self, view.buf, (int)view.len, 0);
3923 PyBuffer_Release(&view);
3924 return res;
3925 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003926}
3927
Serhiy Storchakacb985562015-05-04 15:32:48 +03003928/*[clinic input]
3929_elementtree.XMLParser._parse_whole
3930
3931 file: object
3932 /
3933
3934[clinic start generated code]*/
3935
3936static PyObject *
3937_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3938/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003939{
Eli Benderskya3699232013-05-19 18:47:23 -07003940 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003941 PyObject* reader;
3942 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003943 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003944 PyObject* res;
3945
Serhiy Storchakacb985562015-05-04 15:32:48 +03003946 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003947 if (!reader)
3948 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003949
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003950 /* read from open file object */
3951 for (;;) {
3952
3953 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3954
3955 if (!buffer) {
3956 /* read failed (e.g. due to KeyboardInterrupt) */
3957 Py_DECREF(reader);
3958 return NULL;
3959 }
3960
Eli Benderskyf996e772012-03-16 05:53:30 +02003961 if (PyUnicode_CheckExact(buffer)) {
3962 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003963 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003964 Py_DECREF(buffer);
3965 break;
3966 }
3967 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003968 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003969 if (!temp) {
3970 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003971 Py_DECREF(reader);
3972 return NULL;
3973 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003974 buffer = temp;
3975 }
3976 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003977 Py_DECREF(buffer);
3978 break;
3979 }
3980
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003981 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3982 Py_DECREF(buffer);
3983 Py_DECREF(reader);
3984 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3985 return NULL;
3986 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003987 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003988 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003989 );
3990
3991 Py_DECREF(buffer);
3992
3993 if (!res) {
3994 Py_DECREF(reader);
3995 return NULL;
3996 }
3997 Py_DECREF(res);
3998
3999 }
4000
4001 Py_DECREF(reader);
4002
4003 res = expat_parse(self, "", 0, 1);
4004
4005 if (res && TreeBuilder_CheckExact(self->target)) {
4006 Py_DECREF(res);
4007 return treebuilder_done((TreeBuilderObject*) self->target);
4008 }
4009
4010 return res;
4011}
4012
Serhiy Storchakacb985562015-05-04 15:32:48 +03004013/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004014_elementtree.XMLParser._setevents
4015
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004016 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004017 events_to_report: object = None
4018 /
4019
4020[clinic start generated code]*/
4021
4022static PyObject *
4023_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4024 PyObject *events_queue,
4025 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004026/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004027{
4028 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004029 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004030 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004031 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004032
4033 if (!TreeBuilder_CheckExact(self->target)) {
4034 PyErr_SetString(
4035 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004036 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004037 "targets"
4038 );
4039 return NULL;
4040 }
4041
4042 target = (TreeBuilderObject*) self->target;
4043
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004044 events_append = PyObject_GetAttrString(events_queue, "append");
4045 if (events_append == NULL)
4046 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004047 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004048
4049 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004050 Py_CLEAR(target->start_event_obj);
4051 Py_CLEAR(target->end_event_obj);
4052 Py_CLEAR(target->start_ns_event_obj);
4053 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004054 Py_CLEAR(target->comment_event_obj);
4055 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004056
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004057 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004058 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004059 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004060 Py_RETURN_NONE;
4061 }
4062
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004063 if (!(events_seq = PySequence_Fast(events_to_report,
4064 "events must be a sequence"))) {
4065 return NULL;
4066 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004067
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004068 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004069 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004070 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004071 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004072 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004073 } else if (PyBytes_Check(event_name_obj)) {
4074 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004075 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004076 if (event_name == NULL) {
4077 Py_DECREF(events_seq);
4078 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4079 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004080 }
4081
4082 Py_INCREF(event_name_obj);
4083 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004084 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004085 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004086 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004087 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004088 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004089 EXPAT(SetNamespaceDeclHandler)(
4090 self->parser,
4091 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4092 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4093 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004094 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004095 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004096 EXPAT(SetNamespaceDeclHandler)(
4097 self->parser,
4098 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4099 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4100 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004101 } else if (strcmp(event_name, "comment") == 0) {
4102 Py_XSETREF(target->comment_event_obj, event_name_obj);
4103 EXPAT(SetCommentHandler)(
4104 self->parser,
4105 (XML_CommentHandler) expat_comment_handler
4106 );
4107 } else if (strcmp(event_name, "pi") == 0) {
4108 Py_XSETREF(target->pi_event_obj, event_name_obj);
4109 EXPAT(SetProcessingInstructionHandler)(
4110 self->parser,
4111 (XML_ProcessingInstructionHandler) expat_pi_handler
4112 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004113 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004114 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004115 Py_DECREF(events_seq);
4116 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004117 return NULL;
4118 }
4119 }
4120
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004121 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004122 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004123}
4124
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004125static PyMemberDef xmlparser_members[] = {
4126 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4127 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4128 {NULL}
4129};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004130
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004131static PyObject*
4132xmlparser_version_getter(XMLParserObject *self, void *closure)
4133{
4134 return PyUnicode_FromFormat(
4135 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4136 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004137}
4138
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004139static PyGetSetDef xmlparser_getsetlist[] = {
4140 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4141 {NULL},
4142};
4143
Serhiy Storchakacb985562015-05-04 15:32:48 +03004144#include "clinic/_elementtree.c.h"
4145
4146static PyMethodDef element_methods[] = {
4147
4148 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4149
4150 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4151 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4152
4153 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4154 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4155 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4156
4157 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4158 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4159 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4160 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4161
4162 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4163 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4164 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4165
Serhiy Storchakacb985562015-05-04 15:32:48 +03004166 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4167 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4168
4169 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4170
4171 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4172 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4173 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4174 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4175 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4176
4177 {NULL, NULL}
4178};
4179
4180static PyMappingMethods element_as_mapping = {
4181 (lenfunc) element_length,
4182 (binaryfunc) element_subscr,
4183 (objobjargproc) element_ass_subscr,
4184};
4185
Serhiy Storchakadde08152015-11-25 15:28:13 +02004186static PyGetSetDef element_getsetlist[] = {
4187 {"tag",
4188 (getter)element_tag_getter,
4189 (setter)element_tag_setter,
4190 "A string identifying what kind of data this element represents"},
4191 {"text",
4192 (getter)element_text_getter,
4193 (setter)element_text_setter,
4194 "A string of text directly after the start tag, or None"},
4195 {"tail",
4196 (getter)element_tail_getter,
4197 (setter)element_tail_setter,
4198 "A string of text directly after the end tag, or None"},
4199 {"attrib",
4200 (getter)element_attrib_getter,
4201 (setter)element_attrib_setter,
4202 "A dictionary containing the element's attributes"},
4203 {NULL},
4204};
4205
Serhiy Storchakacb985562015-05-04 15:32:48 +03004206static PyTypeObject Element_Type = {
4207 PyVarObject_HEAD_INIT(NULL, 0)
4208 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4209 /* methods */
4210 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004211 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004212 0, /* tp_getattr */
4213 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004214 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004215 (reprfunc)element_repr, /* tp_repr */
4216 0, /* tp_as_number */
4217 &element_as_sequence, /* tp_as_sequence */
4218 &element_as_mapping, /* tp_as_mapping */
4219 0, /* tp_hash */
4220 0, /* tp_call */
4221 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004222 PyObject_GenericGetAttr, /* tp_getattro */
4223 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004224 0, /* tp_as_buffer */
4225 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4226 /* tp_flags */
4227 0, /* tp_doc */
4228 (traverseproc)element_gc_traverse, /* tp_traverse */
4229 (inquiry)element_gc_clear, /* tp_clear */
4230 0, /* tp_richcompare */
4231 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4232 0, /* tp_iter */
4233 0, /* tp_iternext */
4234 element_methods, /* tp_methods */
4235 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004236 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004237 0, /* tp_base */
4238 0, /* tp_dict */
4239 0, /* tp_descr_get */
4240 0, /* tp_descr_set */
4241 0, /* tp_dictoffset */
4242 (initproc)element_init, /* tp_init */
4243 PyType_GenericAlloc, /* tp_alloc */
4244 element_new, /* tp_new */
4245 0, /* tp_free */
4246};
4247
4248static PyMethodDef treebuilder_methods[] = {
4249 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4250 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4251 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004252 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4253 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004254 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4255 {NULL, NULL}
4256};
4257
4258static PyTypeObject TreeBuilder_Type = {
4259 PyVarObject_HEAD_INIT(NULL, 0)
4260 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4261 /* methods */
4262 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004263 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004264 0, /* tp_getattr */
4265 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004266 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004267 0, /* tp_repr */
4268 0, /* tp_as_number */
4269 0, /* tp_as_sequence */
4270 0, /* tp_as_mapping */
4271 0, /* tp_hash */
4272 0, /* tp_call */
4273 0, /* tp_str */
4274 0, /* tp_getattro */
4275 0, /* tp_setattro */
4276 0, /* tp_as_buffer */
4277 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4278 /* tp_flags */
4279 0, /* tp_doc */
4280 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4281 (inquiry)treebuilder_gc_clear, /* tp_clear */
4282 0, /* tp_richcompare */
4283 0, /* tp_weaklistoffset */
4284 0, /* tp_iter */
4285 0, /* tp_iternext */
4286 treebuilder_methods, /* tp_methods */
4287 0, /* tp_members */
4288 0, /* tp_getset */
4289 0, /* tp_base */
4290 0, /* tp_dict */
4291 0, /* tp_descr_get */
4292 0, /* tp_descr_set */
4293 0, /* tp_dictoffset */
4294 _elementtree_TreeBuilder___init__, /* tp_init */
4295 PyType_GenericAlloc, /* tp_alloc */
4296 treebuilder_new, /* tp_new */
4297 0, /* tp_free */
4298};
4299
4300static PyMethodDef xmlparser_methods[] = {
4301 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4302 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4303 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4304 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004305 {NULL, NULL}
4306};
4307
Neal Norwitz227b5332006-03-22 09:28:35 +00004308static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004309 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004310 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004311 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004312 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004313 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004314 0, /* tp_getattr */
4315 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004316 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004317 0, /* tp_repr */
4318 0, /* tp_as_number */
4319 0, /* tp_as_sequence */
4320 0, /* tp_as_mapping */
4321 0, /* tp_hash */
4322 0, /* tp_call */
4323 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004324 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004325 0, /* tp_setattro */
4326 0, /* tp_as_buffer */
4327 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4328 /* tp_flags */
4329 0, /* tp_doc */
4330 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4331 (inquiry)xmlparser_gc_clear, /* tp_clear */
4332 0, /* tp_richcompare */
4333 0, /* tp_weaklistoffset */
4334 0, /* tp_iter */
4335 0, /* tp_iternext */
4336 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004337 xmlparser_members, /* tp_members */
4338 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004339 0, /* tp_base */
4340 0, /* tp_dict */
4341 0, /* tp_descr_get */
4342 0, /* tp_descr_set */
4343 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004344 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004345 PyType_GenericAlloc, /* tp_alloc */
4346 xmlparser_new, /* tp_new */
4347 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004348};
4349
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004350/* ==================================================================== */
4351/* python module interface */
4352
4353static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004354 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004355 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004356 {NULL, NULL}
4357};
4358
Martin v. Löwis1a214512008-06-11 05:26:20 +00004359
Eli Bendersky532d03e2013-08-10 08:00:39 -07004360static struct PyModuleDef elementtreemodule = {
4361 PyModuleDef_HEAD_INIT,
4362 "_elementtree",
4363 NULL,
4364 sizeof(elementtreestate),
4365 _functions,
4366 NULL,
4367 elementtree_traverse,
4368 elementtree_clear,
4369 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004370};
4371
Neal Norwitzf6657e62006-12-28 04:47:50 +00004372PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004373PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004374{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004375 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004376 elementtreestate *st;
4377
4378 m = PyState_FindModule(&elementtreemodule);
4379 if (m) {
4380 Py_INCREF(m);
4381 return m;
4382 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004383
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004384 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004385 if (PyType_Ready(&ElementIter_Type) < 0)
4386 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004387 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004388 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004389 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004390 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004391 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004392 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004393
Eli Bendersky532d03e2013-08-10 08:00:39 -07004394 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004395 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004396 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004397 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004398
Eli Bendersky828efde2012-04-05 05:40:58 +03004399 if (!(temp = PyImport_ImportModule("copy")))
4400 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004401 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004402 Py_XDECREF(temp);
4403
Victor Stinnerb136f112017-07-10 22:28:02 +02004404 if (st->deepcopy_obj == NULL) {
4405 return NULL;
4406 }
4407
4408 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004409 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004410 return NULL;
4411
Eli Bendersky20d41742012-06-01 09:48:37 +03004412 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004413 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4414 if (expat_capi) {
4415 /* check that it's usable */
4416 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004417 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004418 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4419 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004420 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004421 PyErr_SetString(PyExc_ImportError,
4422 "pyexpat version is incompatible");
4423 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004424 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004425 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004426 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004428
Eli Bendersky532d03e2013-08-10 08:00:39 -07004429 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004430 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004431 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004432 Py_INCREF(st->parseerror_obj);
4433 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004434
Eli Bendersky092af1f2012-03-04 07:14:03 +02004435 Py_INCREF((PyObject *)&Element_Type);
4436 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4437
Eli Bendersky58d548d2012-05-29 15:45:16 +03004438 Py_INCREF((PyObject *)&TreeBuilder_Type);
4439 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4440
Eli Bendersky52467b12012-06-01 07:13:08 +03004441 Py_INCREF((PyObject *)&XMLParser_Type);
4442 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004443
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004444 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004445}