blob: c3f30c9339cd0aef3a3e0fe7858793da1851fda4 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200672 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200683 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200953 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954 PyObject *instancedict = NULL, *children;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 /* Construct the state object. */
967 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
968 if (noattrib)
969 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975 else
976 instancedict = Py_BuildValue("{sOsOsOsOsO}",
977 PICKLED_TAG, self->tag,
978 PICKLED_CHILDREN, children,
979 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700980 PICKLED_TEXT, JOIN_OBJ(self->text),
981 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800982 if (instancedict) {
983 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 else {
987 for (i = 0; i < PyList_GET_SIZE(children); i++)
988 Py_DECREF(PyList_GET_ITEM(children, i));
989 Py_DECREF(children);
990
991 return NULL;
992 }
993}
994
995static PyObject *
996element_setstate_from_attributes(ElementObject *self,
997 PyObject *tag,
998 PyObject *attrib,
999 PyObject *text,
1000 PyObject *tail,
1001 PyObject *children)
1002{
1003 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
1006 if (!tag) {
1007 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1008 return NULL;
1009 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001011 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
Oren Milman39ecb9c2017-10-10 23:26:24 +03001014 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1015 Py_INCREF(JOIN_OBJ(text));
1016 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
Oren Milman39ecb9c2017-10-10 23:26:24 +03001018 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1019 Py_INCREF(JOIN_OBJ(tail));
1020 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021
1022 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001024 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001025 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026
1027 /* Compute 'nchildren'. */
1028 if (children) {
1029 if (!PyList_Check(children)) {
1030 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1031 return NULL;
1032 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 nchildren = PyList_GET_SIZE(children);
1034
1035 /* (Re-)allocate 'extra'.
1036 Avoid DECREFs calling into this code again (cycles, etc.)
1037 */
1038 oldextra = self->extra;
1039 self->extra = NULL;
1040 if (element_resize(self, nchildren)) {
1041 assert(!self->extra || !self->extra->length);
1042 clear_extra(self);
1043 self->extra = oldextra;
1044 return NULL;
1045 }
1046 assert(self->extra);
1047 assert(self->extra->allocated >= nchildren);
1048 if (oldextra) {
1049 assert(self->extra->attrib == Py_None);
1050 self->extra->attrib = oldextra->attrib;
1051 oldextra->attrib = Py_None;
1052 }
1053
1054 /* Copy children */
1055 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001056 PyObject *child = PyList_GET_ITEM(children, i);
1057 if (!Element_Check(child)) {
1058 raise_type_error(child);
1059 self->extra->length = i;
1060 dealloc_extra(oldextra);
1061 return NULL;
1062 }
1063 Py_INCREF(child);
1064 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 }
1066
1067 assert(!self->extra->length);
1068 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 }
1070 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 if (element_resize(self, 0)) {
1072 return NULL;
1073 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
1075
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076 /* Stash attrib. */
1077 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001079 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001081 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082
1083 Py_RETURN_NONE;
1084}
1085
1086/* __setstate__ for Element instance from the Python implementation.
1087 * 'state' should be the instance dict.
1088 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090static PyObject *
1091element_setstate_from_Python(ElementObject *self, PyObject *state)
1092{
1093 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1094 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1095 PyObject *args;
1096 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 tag = attrib = text = tail = children = NULL;
1100 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001101 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103
1104 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1105 &attrib, &text, &tail, &children))
1106 retval = element_setstate_from_attributes(self, tag, attrib, text,
1107 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001109 retval = NULL;
1110
1111 Py_DECREF(args);
1112 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113}
1114
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115/*[clinic input]
1116_elementtree.Element.__setstate__
1117
1118 state: object
1119 /
1120
1121[clinic start generated code]*/
1122
Eli Bendersky698bdb22013-01-10 06:01:06 -08001123static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001124_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1125/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001126{
1127 if (!PyDict_CheckExact(state)) {
1128 PyErr_Format(PyExc_TypeError,
1129 "Don't know how to unpickle \"%.200R\" as an Element",
1130 state);
1131 return NULL;
1132 }
1133 else
1134 return element_setstate_from_Python(self, state);
1135}
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137LOCAL(int)
1138checkpath(PyObject* tag)
1139{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 Py_ssize_t i;
1141 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142
1143 /* check if a tag contains an xpath character */
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145#define PATHCHAR(ch) \
1146 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1150 void *data = PyUnicode_DATA(tag);
1151 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001152 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1153 PyUnicode_READ(kind, data, 1) == '}' || (
1154 PyUnicode_READ(kind, data, 1) == '*' &&
1155 PyUnicode_READ(kind, data, 2) == '}'))) {
1156 /* wildcard: '{}tag' or '{*}tag' */
1157 return 1;
1158 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001159 for (i = 0; i < len; i++) {
1160 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1161 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 return 1;
1167 }
1168 return 0;
1169 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001170 if (PyBytes_Check(tag)) {
1171 char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001172 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1173 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001174 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001175 /* wildcard: '{}tag' or '{*}tag' */
1176 return 1;
1177 }
1178 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 if (p[i] == '{')
1180 check = 0;
1181 else if (p[i] == '}')
1182 check = 1;
1183 else if (check && PATHCHAR(p[i]))
1184 return 1;
1185 }
1186 return 0;
1187 }
1188
1189 return 1; /* unknown type; might be path expression */
1190}
1191
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192/*[clinic input]
1193_elementtree.Element.extend
1194
1195 elements: object
1196 /
1197
1198[clinic start generated code]*/
1199
1200static PyObject *
1201_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1202/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001203{
1204 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206
Serhiy Storchakacb985562015-05-04 15:32:48 +03001207 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208 if (!seq) {
1209 PyErr_Format(
1210 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001211 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212 );
1213 return NULL;
1214 }
1215
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001216 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001217 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001219 if (element_add_subelement(self, element) < 0) {
1220 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001222 return NULL;
1223 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001224 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001225 }
1226
1227 Py_DECREF(seq);
1228
1229 Py_RETURN_NONE;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.find
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001246 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001247
Serhiy Storchakacb985562015-05-04 15:32:48 +03001248 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001249 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001250 return _PyObject_CallMethodIdObjArgs(
1251 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001253 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 if (!self->extra)
1256 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001257
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001258 for (i = 0; i < self->extra->length; i++) {
1259 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001260 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001261 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001262 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001263 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001264 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 Py_DECREF(item);
1267 if (rc < 0)
1268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 }
1270
1271 Py_RETURN_NONE;
1272}
1273
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274/*[clinic input]
1275_elementtree.Element.findtext
1276
1277 path: object
1278 default: object = None
1279 namespaces: object = None
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1285 PyObject *default_value,
1286 PyObject *namespaces)
1287/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001289 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001290 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001291 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001292
Serhiy Storchakacb985562015-05-04 15:32:48 +03001293 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001294 return _PyObject_CallMethodIdObjArgs(
1295 st->elementpath_obj, &PyId_findtext,
1296 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297 );
1298
1299 if (!self->extra) {
1300 Py_INCREF(default_value);
1301 return default_value;
1302 }
1303
1304 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001305 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001306 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001307 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001308 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001309 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001310 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001311 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001312 if (text == Py_None) {
1313 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001314 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001315 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001316 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001317 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 return text;
1319 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001320 Py_DECREF(item);
1321 if (rc < 0)
1322 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 }
1324
1325 Py_INCREF(default_value);
1326 return default_value;
1327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.findall
1331
1332 path: object
1333 namespaces: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1339 PyObject *namespaces)
1340/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001342 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001344 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001345
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001346 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001347 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001348 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001349 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001351 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352
1353 out = PyList_New(0);
1354 if (!out)
1355 return NULL;
1356
1357 if (!self->extra)
1358 return out;
1359
1360 for (i = 0; i < self->extra->length; i++) {
1361 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001362 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001363 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001364 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001365 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001366 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1367 Py_DECREF(item);
1368 Py_DECREF(out);
1369 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001371 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 }
1373
1374 return out;
1375}
1376
Serhiy Storchakacb985562015-05-04 15:32:48 +03001377/*[clinic input]
1378_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001379
Serhiy Storchakacb985562015-05-04 15:32:48 +03001380 path: object
1381 namespaces: object = None
1382
1383[clinic start generated code]*/
1384
1385static PyObject *
1386_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1387 PyObject *namespaces)
1388/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1389{
1390 PyObject* tag = path;
1391 _Py_IDENTIFIER(iterfind);
1392 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Victor Stinnerf5616342016-12-09 15:26:00 +01001394 return _PyObject_CallMethodIdObjArgs(
1395 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396}
1397
Serhiy Storchakacb985562015-05-04 15:32:48 +03001398/*[clinic input]
1399_elementtree.Element.get
1400
1401 key: object
1402 default: object = None
1403
1404[clinic start generated code]*/
1405
1406static PyObject *
1407_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1408 PyObject *default_value)
1409/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410{
1411 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412
1413 if (!self->extra || self->extra->attrib == Py_None)
1414 value = default_value;
1415 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001416 value = PyDict_GetItemWithError(self->extra->attrib, key);
1417 if (!value) {
1418 if (PyErr_Occurred()) {
1419 return NULL;
1420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001422 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001423 }
1424
1425 Py_INCREF(value);
1426 return value;
1427}
1428
Serhiy Storchakacb985562015-05-04 15:32:48 +03001429/*[clinic input]
1430_elementtree.Element.getchildren
1431
1432[clinic start generated code]*/
1433
1434static PyObject *
1435_elementtree_Element_getchildren_impl(ElementObject *self)
1436/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001437{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001438 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439 PyObject* list;
1440
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001441 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1442 "This method will be removed in future versions. "
1443 "Use 'list(elem)' or iteration over elem instead.",
1444 1) < 0) {
1445 return NULL;
1446 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001447
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448 if (!self->extra)
1449 return PyList_New(0);
1450
1451 list = PyList_New(self->extra->length);
1452 if (!list)
1453 return NULL;
1454
1455 for (i = 0; i < self->extra->length; i++) {
1456 PyObject* item = self->extra->children[i];
1457 Py_INCREF(item);
1458 PyList_SET_ITEM(list, i, item);
1459 }
1460
1461 return list;
1462}
1463
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001464
Eli Bendersky64d11e62012-06-15 07:42:50 +03001465static PyObject *
1466create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1467
1468
Serhiy Storchakacb985562015-05-04 15:32:48 +03001469/*[clinic input]
1470_elementtree.Element.iter
1471
1472 tag: object = None
1473
1474[clinic start generated code]*/
1475
Eli Bendersky64d11e62012-06-15 07:42:50 +03001476static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1478/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001479{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001480 if (PyUnicode_Check(tag)) {
1481 if (PyUnicode_READY(tag) < 0)
1482 return NULL;
1483 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1484 tag = Py_None;
1485 }
1486 else if (PyBytes_Check(tag)) {
1487 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1488 tag = Py_None;
1489 }
1490
Eli Bendersky64d11e62012-06-15 07:42:50 +03001491 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492}
1493
1494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001496_elementtree.Element.getiterator
1497
1498 tag: object = None
1499
1500[clinic start generated code]*/
1501
1502static PyObject *
1503_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1504/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1505{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001506 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001507 "This method will be removed in future versions. "
1508 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1509 1) < 0) {
1510 return NULL;
1511 }
1512 return _elementtree_Element_iter_impl(self, tag);
1513}
1514
1515
1516/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_itertext_impl(ElementObject *self)
1523/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1524{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001525 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526}
1527
Eli Bendersky64d11e62012-06-15 07:42:50 +03001528
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001530element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001531{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001532 ElementObject* self = (ElementObject*) self_;
1533
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534 if (!self->extra || index < 0 || index >= self->extra->length) {
1535 PyErr_SetString(
1536 PyExc_IndexError,
1537 "child index out of range"
1538 );
1539 return NULL;
1540 }
1541
1542 Py_INCREF(self->extra->children[index]);
1543 return self->extra->children[index];
1544}
1545
Serhiy Storchakacb985562015-05-04 15:32:48 +03001546/*[clinic input]
1547_elementtree.Element.insert
1548
1549 index: Py_ssize_t
1550 subelement: object(subclass_of='&Element_Type')
1551 /
1552
1553[clinic start generated code]*/
1554
1555static PyObject *
1556_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1557 PyObject *subelement)
1558/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001560 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561
Victor Stinner5f0af232013-07-11 23:01:36 +02001562 if (!self->extra) {
1563 if (create_extra(self, NULL) < 0)
1564 return NULL;
1565 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 if (index < 0) {
1568 index += self->extra->length;
1569 if (index < 0)
1570 index = 0;
1571 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 if (index > self->extra->length)
1573 index = self->extra->length;
1574
1575 if (element_resize(self, 1) < 0)
1576 return NULL;
1577
1578 for (i = self->extra->length; i > index; i--)
1579 self->extra->children[i] = self->extra->children[i-1];
1580
Serhiy Storchakacb985562015-05-04 15:32:48 +03001581 Py_INCREF(subelement);
1582 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583
1584 self->extra->length++;
1585
1586 Py_RETURN_NONE;
1587}
1588
Serhiy Storchakacb985562015-05-04 15:32:48 +03001589/*[clinic input]
1590_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591
Serhiy Storchakacb985562015-05-04 15:32:48 +03001592[clinic start generated code]*/
1593
1594static PyObject *
1595_elementtree_Element_items_impl(ElementObject *self)
1596/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1597{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 if (!self->extra || self->extra->attrib == Py_None)
1599 return PyList_New(0);
1600
1601 return PyDict_Items(self->extra->attrib);
1602}
1603
Serhiy Storchakacb985562015-05-04 15:32:48 +03001604/*[clinic input]
1605_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606
Serhiy Storchakacb985562015-05-04 15:32:48 +03001607[clinic start generated code]*/
1608
1609static PyObject *
1610_elementtree_Element_keys_impl(ElementObject *self)
1611/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1612{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 if (!self->extra || self->extra->attrib == Py_None)
1614 return PyList_New(0);
1615
1616 return PyDict_Keys(self->extra->attrib);
1617}
1618
Martin v. Löwis18e16552006-02-15 17:27:45 +00001619static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620element_length(ElementObject* self)
1621{
1622 if (!self->extra)
1623 return 0;
1624
1625 return self->extra->length;
1626}
1627
Serhiy Storchakacb985562015-05-04 15:32:48 +03001628/*[clinic input]
1629_elementtree.Element.makeelement
1630
1631 tag: object
1632 attrib: object
1633 /
1634
1635[clinic start generated code]*/
1636
1637static PyObject *
1638_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1639 PyObject *attrib)
1640/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641{
1642 PyObject* elem;
1643
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644 attrib = PyDict_Copy(attrib);
1645 if (!attrib)
1646 return NULL;
1647
Eli Bendersky092af1f2012-03-04 07:14:03 +02001648 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649
1650 Py_DECREF(attrib);
1651
1652 return elem;
1653}
1654
Serhiy Storchakacb985562015-05-04 15:32:48 +03001655/*[clinic input]
1656_elementtree.Element.remove
1657
1658 subelement: object(subclass_of='&Element_Type')
1659 /
1660
1661[clinic start generated code]*/
1662
1663static PyObject *
1664_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1665/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001667 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001668 int rc;
1669 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001670
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671 if (!self->extra) {
1672 /* element has no children, so raise exception */
1673 PyErr_SetString(
1674 PyExc_ValueError,
1675 "list.remove(x): x not in list"
1676 );
1677 return NULL;
1678 }
1679
1680 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001681 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001683 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001684 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001686 if (rc < 0)
1687 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001688 }
1689
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001690 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001691 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692 PyErr_SetString(
1693 PyExc_ValueError,
1694 "list.remove(x): x not in list"
1695 );
1696 return NULL;
1697 }
1698
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001699 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001700
1701 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001702 for (; i < self->extra->length; i++)
1703 self->extra->children[i] = self->extra->children[i+1];
1704
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001705 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706 Py_RETURN_NONE;
1707}
1708
1709static PyObject*
1710element_repr(ElementObject* self)
1711{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001712 int status;
1713
1714 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001715 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001716
1717 status = Py_ReprEnter((PyObject *)self);
1718 if (status == 0) {
1719 PyObject *res;
1720 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1721 Py_ReprLeave((PyObject *)self);
1722 return res;
1723 }
1724 if (status > 0)
1725 PyErr_Format(PyExc_RuntimeError,
1726 "reentrant call inside %s.__repr__",
1727 Py_TYPE(self)->tp_name);
1728 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729}
1730
Serhiy Storchakacb985562015-05-04 15:32:48 +03001731/*[clinic input]
1732_elementtree.Element.set
1733
1734 key: object
1735 value: object
1736 /
1737
1738[clinic start generated code]*/
1739
1740static PyObject *
1741_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1742 PyObject *value)
1743/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744{
1745 PyObject* attrib;
1746
Victor Stinner5f0af232013-07-11 23:01:36 +02001747 if (!self->extra) {
1748 if (create_extra(self, NULL) < 0)
1749 return NULL;
1750 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751
1752 attrib = element_get_attrib(self);
1753 if (!attrib)
1754 return NULL;
1755
1756 if (PyDict_SetItem(attrib, key, value) < 0)
1757 return NULL;
1758
1759 Py_RETURN_NONE;
1760}
1761
1762static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001763element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001765 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001766 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001767 PyObject* old;
1768
1769 if (!self->extra || index < 0 || index >= self->extra->length) {
1770 PyErr_SetString(
1771 PyExc_IndexError,
1772 "child assignment index out of range");
1773 return -1;
1774 }
1775
1776 old = self->extra->children[index];
1777
1778 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001779 if (!Element_Check(item)) {
1780 raise_type_error(item);
1781 return -1;
1782 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783 Py_INCREF(item);
1784 self->extra->children[index] = item;
1785 } else {
1786 self->extra->length--;
1787 for (i = index; i < self->extra->length; i++)
1788 self->extra->children[i] = self->extra->children[i+1];
1789 }
1790
1791 Py_DECREF(old);
1792
1793 return 0;
1794}
1795
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796static PyObject*
1797element_subscr(PyObject* self_, PyObject* item)
1798{
1799 ElementObject* self = (ElementObject*) self_;
1800
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 if (PyIndex_Check(item)) {
1802 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
1804 if (i == -1 && PyErr_Occurred()) {
1805 return NULL;
1806 }
1807 if (i < 0 && self->extra)
1808 i += self->extra->length;
1809 return element_getitem(self_, i);
1810 }
1811 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001812 Py_ssize_t start, stop, step, slicelen, i;
1813 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001814 PyObject* list;
1815
1816 if (!self->extra)
1817 return PyList_New(0);
1818
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001819 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 return NULL;
1821 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001822 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1823 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001824
1825 if (slicelen <= 0)
1826 return PyList_New(0);
1827 else {
1828 list = PyList_New(slicelen);
1829 if (!list)
1830 return NULL;
1831
1832 for (cur = start, i = 0; i < slicelen;
1833 cur += step, i++) {
1834 PyObject* item = self->extra->children[cur];
1835 Py_INCREF(item);
1836 PyList_SET_ITEM(list, i, item);
1837 }
1838
1839 return list;
1840 }
1841 }
1842 else {
1843 PyErr_SetString(PyExc_TypeError,
1844 "element indices must be integers");
1845 return NULL;
1846 }
1847}
1848
1849static int
1850element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1851{
1852 ElementObject* self = (ElementObject*) self_;
1853
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854 if (PyIndex_Check(item)) {
1855 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001856
1857 if (i == -1 && PyErr_Occurred()) {
1858 return -1;
1859 }
1860 if (i < 0 && self->extra)
1861 i += self->extra->length;
1862 return element_setitem(self_, i, value);
1863 }
1864 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001865 Py_ssize_t start, stop, step, slicelen, newlen, i;
1866 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001867
1868 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870
Victor Stinner5f0af232013-07-11 23:01:36 +02001871 if (!self->extra) {
1872 if (create_extra(self, NULL) < 0)
1873 return -1;
1874 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001875
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001876 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001877 return -1;
1878 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001879 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1880 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881
Eli Bendersky865756a2012-03-09 13:38:15 +02001882 if (value == NULL) {
1883 /* Delete slice */
1884 size_t cur;
1885 Py_ssize_t i;
1886
1887 if (slicelen <= 0)
1888 return 0;
1889
1890 /* Since we're deleting, the direction of the range doesn't matter,
1891 * so for simplicity make it always ascending.
1892 */
1893 if (step < 0) {
1894 stop = start + 1;
1895 start = stop + step * (slicelen - 1) - 1;
1896 step = -step;
1897 }
1898
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001899 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001900
1901 /* recycle is a list that will contain all the children
1902 * scheduled for removal.
1903 */
1904 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001905 return -1;
1906 }
1907
1908 /* This loop walks over all the children that have to be deleted,
1909 * with cur pointing at them. num_moved is the amount of children
1910 * until the next deleted child that have to be "shifted down" to
1911 * occupy the deleted's places.
1912 * Note that in the ith iteration, shifting is done i+i places down
1913 * because i children were already removed.
1914 */
1915 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1916 /* Compute how many children have to be moved, clipping at the
1917 * list end.
1918 */
1919 Py_ssize_t num_moved = step - 1;
1920 if (cur + step >= (size_t)self->extra->length) {
1921 num_moved = self->extra->length - cur - 1;
1922 }
1923
1924 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1925
1926 memmove(
1927 self->extra->children + cur - i,
1928 self->extra->children + cur + 1,
1929 num_moved * sizeof(PyObject *));
1930 }
1931
1932 /* Leftover "tail" after the last removed child */
1933 cur = start + (size_t)slicelen * step;
1934 if (cur < (size_t)self->extra->length) {
1935 memmove(
1936 self->extra->children + cur - slicelen,
1937 self->extra->children + cur,
1938 (self->extra->length - cur) * sizeof(PyObject *));
1939 }
1940
1941 self->extra->length -= slicelen;
1942
1943 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001944 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001945 return 0;
1946 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001947
1948 /* A new slice is actually being assigned */
1949 seq = PySequence_Fast(value, "");
1950 if (!seq) {
1951 PyErr_Format(
1952 PyExc_TypeError,
1953 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1954 );
1955 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001956 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001957 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001958
1959 if (step != 1 && newlen != slicelen)
1960 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001961 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001962 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001963 "attempt to assign sequence of size %zd "
1964 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001965 newlen, slicelen
1966 );
1967 return -1;
1968 }
1969
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001970 /* Resize before creating the recycle bin, to prevent refleaks. */
1971 if (newlen > slicelen) {
1972 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001973 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001974 return -1;
1975 }
1976 }
1977
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001978 for (i = 0; i < newlen; i++) {
1979 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1980 if (!Element_Check(element)) {
1981 raise_type_error(element);
1982 Py_DECREF(seq);
1983 return -1;
1984 }
1985 }
1986
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001987 if (slicelen > 0) {
1988 /* to avoid recursive calls to this method (via decref), move
1989 old items to the recycle bin here, and get rid of them when
1990 we're done modifying the element */
1991 recycle = PyList_New(slicelen);
1992 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001993 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001994 return -1;
1995 }
1996 for (cur = start, i = 0; i < slicelen;
1997 cur += step, i++)
1998 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1999 }
2000
2001 if (newlen < slicelen) {
2002 /* delete slice */
2003 for (i = stop; i < self->extra->length; i++)
2004 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2005 } else if (newlen > slicelen) {
2006 /* insert slice */
2007 for (i = self->extra->length-1; i >= stop; i--)
2008 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2009 }
2010
2011 /* replace the slice */
2012 for (cur = start, i = 0; i < newlen;
2013 cur += step, i++) {
2014 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2015 Py_INCREF(element);
2016 self->extra->children[cur] = element;
2017 }
2018
2019 self->extra->length += newlen - slicelen;
2020
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002021 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002022
2023 /* discard the recycle bin, and everything in it */
2024 Py_XDECREF(recycle);
2025
2026 return 0;
2027 }
2028 else {
2029 PyErr_SetString(PyExc_TypeError,
2030 "element indices must be integers");
2031 return -1;
2032 }
2033}
2034
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002035static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002036element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002038 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002039 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002040 return res;
2041}
2042
Serhiy Storchakadde08152015-11-25 15:28:13 +02002043static PyObject*
2044element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002045{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002046 PyObject *res = element_get_text(self);
2047 Py_XINCREF(res);
2048 return res;
2049}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002050
Serhiy Storchakadde08152015-11-25 15:28:13 +02002051static PyObject*
2052element_tail_getter(ElementObject *self, void *closure)
2053{
2054 PyObject *res = element_get_tail(self);
2055 Py_XINCREF(res);
2056 return res;
2057}
2058
2059static PyObject*
2060element_attrib_getter(ElementObject *self, void *closure)
2061{
2062 PyObject *res;
2063 if (!self->extra) {
2064 if (create_extra(self, NULL) < 0)
2065 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002066 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002067 res = element_get_attrib(self);
2068 Py_XINCREF(res);
2069 return res;
2070}
Victor Stinner4d463432013-07-11 23:05:03 +02002071
Serhiy Storchakadde08152015-11-25 15:28:13 +02002072/* macro for setter validation */
2073#define _VALIDATE_ATTR_VALUE(V) \
2074 if ((V) == NULL) { \
2075 PyErr_SetString( \
2076 PyExc_AttributeError, \
2077 "can't delete element attribute"); \
2078 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002079 }
2080
Serhiy Storchakadde08152015-11-25 15:28:13 +02002081static int
2082element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2083{
2084 _VALIDATE_ATTR_VALUE(value);
2085 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002086 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002087 return 0;
2088}
2089
2090static int
2091element_text_setter(ElementObject *self, PyObject *value, void *closure)
2092{
2093 _VALIDATE_ATTR_VALUE(value);
2094 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002095 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002096 return 0;
2097}
2098
2099static int
2100element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2101{
2102 _VALIDATE_ATTR_VALUE(value);
2103 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002104 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002105 return 0;
2106}
2107
2108static int
2109element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2110{
2111 _VALIDATE_ATTR_VALUE(value);
2112 if (!self->extra) {
2113 if (create_extra(self, NULL) < 0)
2114 return -1;
2115 }
2116 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002117 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002118 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002119}
2120
2121static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002122 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002123 0, /* sq_concat */
2124 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002125 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002126 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002127 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002128 0,
2129};
2130
Eli Bendersky64d11e62012-06-15 07:42:50 +03002131/******************************* Element iterator ****************************/
2132
2133/* ElementIterObject represents the iteration state over an XML element in
2134 * pre-order traversal. To keep track of which sub-element should be returned
2135 * next, a stack of parents is maintained. This is a standard stack-based
2136 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002137 * The stack is managed using a continuous array.
2138 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002139 * the current one is exhausted, and the next child to examine in that parent.
2140 */
2141typedef struct ParentLocator_t {
2142 ElementObject *parent;
2143 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144} ParentLocator;
2145
2146typedef struct {
2147 PyObject_HEAD
2148 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002149 Py_ssize_t parent_stack_used;
2150 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002151 ElementObject *root_element;
2152 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002153 int gettext;
2154} ElementIterObject;
2155
2156
2157static void
2158elementiter_dealloc(ElementIterObject *it)
2159{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002160 Py_ssize_t i = it->parent_stack_used;
2161 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002162 /* bpo-31095: UnTrack is needed before calling any callbacks */
2163 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002164 while (i--)
2165 Py_XDECREF(it->parent_stack[i].parent);
2166 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002167
2168 Py_XDECREF(it->sought_tag);
2169 Py_XDECREF(it->root_element);
2170
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171 PyObject_GC_Del(it);
2172}
2173
2174static int
2175elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2176{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002177 Py_ssize_t i = it->parent_stack_used;
2178 while (i--)
2179 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002180
2181 Py_VISIT(it->root_element);
2182 Py_VISIT(it->sought_tag);
2183 return 0;
2184}
2185
2186/* Helper function for elementiter_next. Add a new parent to the parent stack.
2187 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002188static int
2189parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002190{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002191 ParentLocator *item;
2192
2193 if (it->parent_stack_used >= it->parent_stack_size) {
2194 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2195 ParentLocator *parent_stack = it->parent_stack;
2196 PyMem_Resize(parent_stack, ParentLocator, new_size);
2197 if (parent_stack == NULL)
2198 return -1;
2199 it->parent_stack = parent_stack;
2200 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002201 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002202 item = it->parent_stack + it->parent_stack_used++;
2203 Py_INCREF(parent);
2204 item->parent = parent;
2205 item->child_index = 0;
2206 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207}
2208
2209static PyObject *
2210elementiter_next(ElementIterObject *it)
2211{
2212 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002213 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002214 * A short note on gettext: this function serves both the iter() and
2215 * itertext() methods to avoid code duplication. However, there are a few
2216 * small differences in the way these iterations work. Namely:
2217 * - itertext() only yields text from nodes that have it, and continues
2218 * iterating when a node doesn't have text (so it doesn't return any
2219 * node like iter())
2220 * - itertext() also has to handle tail, after finishing with all the
2221 * children of a node.
2222 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002223 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002224 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002225 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002226
2227 while (1) {
2228 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002229 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002230 * iterator is exhausted.
2231 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002232 if (!it->parent_stack_used) {
2233 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002234 PyErr_SetNone(PyExc_StopIteration);
2235 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002236 }
2237
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002238 elem = it->root_element; /* steals a reference */
2239 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002240 }
2241 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002242 /* See if there are children left to traverse in the current parent. If
2243 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002244 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002245 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2246 Py_ssize_t child_index = item->child_index;
2247 ElementObjectExtra *extra;
2248 elem = item->parent;
2249 extra = elem->extra;
2250 if (!extra || child_index >= extra->length) {
2251 it->parent_stack_used--;
2252 /* Note that extra condition on it->parent_stack_used here;
2253 * this is because itertext() is supposed to only return *inner*
2254 * text, not text following the element it began iteration with.
2255 */
2256 if (it->gettext && it->parent_stack_used) {
2257 text = element_get_tail(elem);
2258 goto gettext;
2259 }
2260 Py_DECREF(elem);
2261 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002262 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002263
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002264 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002265 elem = (ElementObject *)extra->children[child_index];
2266 item->child_index++;
2267 Py_INCREF(elem);
2268 }
2269
2270 if (parent_stack_push_new(it, elem) < 0) {
2271 Py_DECREF(elem);
2272 PyErr_NoMemory();
2273 return NULL;
2274 }
2275 if (it->gettext) {
2276 text = element_get_text(elem);
2277 goto gettext;
2278 }
2279
2280 if (it->sought_tag == Py_None)
2281 return (PyObject *)elem;
2282
2283 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2284 if (rc > 0)
2285 return (PyObject *)elem;
2286
2287 Py_DECREF(elem);
2288 if (rc < 0)
2289 return NULL;
2290 continue;
2291
2292gettext:
2293 if (!text) {
2294 Py_DECREF(elem);
2295 return NULL;
2296 }
2297 if (text == Py_None) {
2298 Py_DECREF(elem);
2299 }
2300 else {
2301 Py_INCREF(text);
2302 Py_DECREF(elem);
2303 rc = PyObject_IsTrue(text);
2304 if (rc > 0)
2305 return text;
2306 Py_DECREF(text);
2307 if (rc < 0)
2308 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002309 }
2310 }
2311
2312 return NULL;
2313}
2314
2315
2316static PyTypeObject ElementIter_Type = {
2317 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002318 /* Using the module's name since the pure-Python implementation does not
2319 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002320 "_elementtree._element_iterator", /* tp_name */
2321 sizeof(ElementIterObject), /* tp_basicsize */
2322 0, /* tp_itemsize */
2323 /* methods */
2324 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002325 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002326 0, /* tp_getattr */
2327 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002328 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002329 0, /* tp_repr */
2330 0, /* tp_as_number */
2331 0, /* tp_as_sequence */
2332 0, /* tp_as_mapping */
2333 0, /* tp_hash */
2334 0, /* tp_call */
2335 0, /* tp_str */
2336 0, /* tp_getattro */
2337 0, /* tp_setattro */
2338 0, /* tp_as_buffer */
2339 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2340 0, /* tp_doc */
2341 (traverseproc)elementiter_traverse, /* tp_traverse */
2342 0, /* tp_clear */
2343 0, /* tp_richcompare */
2344 0, /* tp_weaklistoffset */
2345 PyObject_SelfIter, /* tp_iter */
2346 (iternextfunc)elementiter_next, /* tp_iternext */
2347 0, /* tp_methods */
2348 0, /* tp_members */
2349 0, /* tp_getset */
2350 0, /* tp_base */
2351 0, /* tp_dict */
2352 0, /* tp_descr_get */
2353 0, /* tp_descr_set */
2354 0, /* tp_dictoffset */
2355 0, /* tp_init */
2356 0, /* tp_alloc */
2357 0, /* tp_new */
2358};
2359
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002360#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002361
2362static PyObject *
2363create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2364{
2365 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002366
2367 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2368 if (!it)
2369 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002370
Victor Stinner4d463432013-07-11 23:05:03 +02002371 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002372 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002373 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002374 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002375 it->root_element = self;
2376
Eli Bendersky64d11e62012-06-15 07:42:50 +03002377 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002378
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002379 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002380 if (it->parent_stack == NULL) {
2381 Py_DECREF(it);
2382 PyErr_NoMemory();
2383 return NULL;
2384 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002385 it->parent_stack_used = 0;
2386 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002387
Eli Bendersky64d11e62012-06-15 07:42:50 +03002388 return (PyObject *)it;
2389}
2390
2391
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392/* ==================================================================== */
2393/* the tree builder type */
2394
2395typedef struct {
2396 PyObject_HEAD
2397
Eli Bendersky58d548d2012-05-29 15:45:16 +03002398 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399
Antoine Pitrouee329312012-10-04 19:53:29 +02002400 PyObject *this; /* current node */
2401 PyObject *last; /* most recently created node */
Stefan Behnelbb697892019-07-24 20:46:01 +02002402 PyObject *last_for_tail; /* most recently created node that takes a tail */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403
Eli Bendersky58d548d2012-05-29 15:45:16 +03002404 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405
Eli Bendersky58d548d2012-05-29 15:45:16 +03002406 PyObject *stack; /* element stack */
2407 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408
Eli Bendersky48d358b2012-05-30 17:57:50 +03002409 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002410 PyObject *comment_factory;
2411 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002414 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002415 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2416 PyObject *end_event_obj;
2417 PyObject *start_ns_event_obj;
2418 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002419 PyObject *comment_event_obj;
2420 PyObject *pi_event_obj;
2421
2422 char insert_comments;
2423 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424} TreeBuilderObject;
2425
Christian Heimes90aa7642007-12-19 02:45:37 +00002426#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427
2428/* -------------------------------------------------------------------- */
2429/* constructor and destructor */
2430
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431static PyObject *
2432treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002434 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2435 if (t != NULL) {
2436 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002437
Eli Bendersky58d548d2012-05-29 15:45:16 +03002438 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002439 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002440 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002441 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442
Eli Bendersky58d548d2012-05-29 15:45:16 +03002443 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002444 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002445 t->comment_factory = NULL;
2446 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002447 t->stack = PyList_New(20);
2448 if (!t->stack) {
2449 Py_DECREF(t->this);
2450 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002451 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002452 return NULL;
2453 }
2454 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002455
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002456 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002457 t->start_event_obj = t->end_event_obj = NULL;
2458 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002459 t->comment_event_obj = t->pi_event_obj = NULL;
2460 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002461 }
2462 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002463}
2464
Serhiy Storchakacb985562015-05-04 15:32:48 +03002465/*[clinic input]
2466_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002467
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002468 element_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002469 *
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002470 comment_factory: object = None
2471 pi_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002472 insert_comments: bool = False
2473 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002474
2475[clinic start generated code]*/
2476
2477static int
2478_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002479 PyObject *element_factory,
2480 PyObject *comment_factory,
2481 PyObject *pi_factory,
2482 int insert_comments, int insert_pis)
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002483/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002484{
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002485 if (element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002486 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002487 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002488 } else {
2489 Py_CLEAR(self->element_factory);
2490 }
2491
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002492 if (comment_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002493 elementtreestate *st = ET_STATE_GLOBAL;
2494 comment_factory = st->comment_factory;
2495 }
2496 if (comment_factory) {
2497 Py_INCREF(comment_factory);
2498 Py_XSETREF(self->comment_factory, comment_factory);
2499 self->insert_comments = insert_comments;
2500 } else {
2501 Py_CLEAR(self->comment_factory);
2502 self->insert_comments = 0;
2503 }
2504
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002505 if (pi_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002506 elementtreestate *st = ET_STATE_GLOBAL;
2507 pi_factory = st->pi_factory;
2508 }
2509 if (pi_factory) {
2510 Py_INCREF(pi_factory);
2511 Py_XSETREF(self->pi_factory, pi_factory);
2512 self->insert_pis = insert_pis;
2513 } else {
2514 Py_CLEAR(self->pi_factory);
2515 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002516 }
2517
Eli Bendersky58d548d2012-05-29 15:45:16 +03002518 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519}
2520
Eli Bendersky48d358b2012-05-30 17:57:50 +03002521static int
2522treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2523{
Stefan Behnel43851a22019-05-01 21:20:38 +02002524 Py_VISIT(self->pi_event_obj);
2525 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002526 Py_VISIT(self->end_ns_event_obj);
2527 Py_VISIT(self->start_ns_event_obj);
2528 Py_VISIT(self->end_event_obj);
2529 Py_VISIT(self->start_event_obj);
2530 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002531 Py_VISIT(self->root);
2532 Py_VISIT(self->this);
2533 Py_VISIT(self->last);
Stefan Behnelbb697892019-07-24 20:46:01 +02002534 Py_VISIT(self->last_for_tail);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002535 Py_VISIT(self->data);
2536 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002537 Py_VISIT(self->pi_factory);
2538 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002539 Py_VISIT(self->element_factory);
2540 return 0;
2541}
2542
2543static int
2544treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545{
Stefan Behnel43851a22019-05-01 21:20:38 +02002546 Py_CLEAR(self->pi_event_obj);
2547 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002548 Py_CLEAR(self->end_ns_event_obj);
2549 Py_CLEAR(self->start_ns_event_obj);
2550 Py_CLEAR(self->end_event_obj);
2551 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002552 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002553 Py_CLEAR(self->stack);
2554 Py_CLEAR(self->data);
2555 Py_CLEAR(self->last);
Stefan Behnelbb697892019-07-24 20:46:01 +02002556 Py_CLEAR(self->last_for_tail);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002557 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002558 Py_CLEAR(self->pi_factory);
2559 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002560 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002561 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002562 return 0;
2563}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564
Eli Bendersky48d358b2012-05-30 17:57:50 +03002565static void
2566treebuilder_dealloc(TreeBuilderObject *self)
2567{
2568 PyObject_GC_UnTrack(self);
2569 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002570 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571}
2572
2573/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002574/* helpers for handling of arbitrary element-like objects */
2575
Stefan Behnel43851a22019-05-01 21:20:38 +02002576/*[clinic input]
2577_elementtree._set_factories
2578
2579 comment_factory: object
2580 pi_factory: object
2581 /
2582
2583Change the factories used to create comments and processing instructions.
2584
2585For internal use only.
2586[clinic start generated code]*/
2587
2588static PyObject *
2589_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2590 PyObject *pi_factory)
2591/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2592{
2593 elementtreestate *st = ET_STATE_GLOBAL;
2594 PyObject *old;
2595
2596 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2597 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2598 Py_TYPE(comment_factory)->tp_name);
2599 return NULL;
2600 }
2601 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2602 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2603 Py_TYPE(pi_factory)->tp_name);
2604 return NULL;
2605 }
2606
2607 old = PyTuple_Pack(2,
2608 st->comment_factory ? st->comment_factory : Py_None,
2609 st->pi_factory ? st->pi_factory : Py_None);
2610
2611 if (comment_factory == Py_None) {
2612 Py_CLEAR(st->comment_factory);
2613 } else {
2614 Py_INCREF(comment_factory);
2615 Py_XSETREF(st->comment_factory, comment_factory);
2616 }
2617 if (pi_factory == Py_None) {
2618 Py_CLEAR(st->pi_factory);
2619 } else {
2620 Py_INCREF(pi_factory);
2621 Py_XSETREF(st->pi_factory, pi_factory);
2622 }
2623
2624 return old;
2625}
2626
Antoine Pitrouee329312012-10-04 19:53:29 +02002627static int
Stefan Behnelbb697892019-07-24 20:46:01 +02002628treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2629 PyObject **dest, _Py_Identifier *name)
Antoine Pitrouee329312012-10-04 19:53:29 +02002630{
Stefan Behnelbb697892019-07-24 20:46:01 +02002631 /* Fast paths for the "almost always" cases. */
Antoine Pitrouee329312012-10-04 19:53:29 +02002632 if (Element_CheckExact(element)) {
Stefan Behnelbb697892019-07-24 20:46:01 +02002633 PyObject *dest_obj = JOIN_OBJ(*dest);
2634 if (dest_obj == Py_None) {
2635 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2636 *data = NULL;
2637 Py_DECREF(dest_obj);
2638 return 0;
2639 }
2640 else if (JOIN_GET(*dest)) {
2641 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2642 return -1;
2643 }
2644 Py_CLEAR(*data);
2645 return 0;
2646 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002647 }
Stefan Behnelbb697892019-07-24 20:46:01 +02002648
2649 /* Fallback for the non-Element / non-trivial cases. */
2650 {
Antoine Pitrouee329312012-10-04 19:53:29 +02002651 int r;
Stefan Behnelbb697892019-07-24 20:46:01 +02002652 PyObject* joined;
2653 PyObject* previous = _PyObject_GetAttrId(element, name);
2654 if (!previous)
Antoine Pitrouee329312012-10-04 19:53:29 +02002655 return -1;
Stefan Behnelbb697892019-07-24 20:46:01 +02002656 joined = list_join(*data);
2657 if (!joined) {
2658 Py_DECREF(previous);
2659 return -1;
2660 }
2661 if (previous != Py_None) {
2662 PyObject *tmp = PyNumber_Add(previous, joined);
2663 Py_DECREF(joined);
2664 Py_DECREF(previous);
2665 if (!tmp)
2666 return -1;
2667 joined = tmp;
2668 } else {
2669 Py_DECREF(previous);
2670 }
2671
Antoine Pitrouee329312012-10-04 19:53:29 +02002672 r = _PyObject_SetAttrId(element, name, joined);
2673 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002674 if (r < 0)
2675 return -1;
2676 Py_CLEAR(*data);
2677 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002678 }
2679}
2680
Serhiy Storchaka576def02017-03-30 09:47:31 +03002681LOCAL(int)
2682treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002683{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002684 if (!self->data) {
2685 return 0;
2686 }
2687
Stefan Behnelbb697892019-07-24 20:46:01 +02002688 if (!self->last_for_tail) {
2689 PyObject *element = self->last;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002690 _Py_IDENTIFIER(text);
Stefan Behnelbb697892019-07-24 20:46:01 +02002691 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002692 element, &self->data,
2693 &((ElementObject *) element)->text, &PyId_text);
2694 }
2695 else {
Stefan Behnelbb697892019-07-24 20:46:01 +02002696 PyObject *element = self->last_for_tail;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002697 _Py_IDENTIFIER(tail);
Stefan Behnelbb697892019-07-24 20:46:01 +02002698 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002699 element, &self->data,
2700 &((ElementObject *) element)->tail, &PyId_tail);
2701 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002702}
2703
2704static int
2705treebuilder_add_subelement(PyObject *element, PyObject *child)
2706{
2707 _Py_IDENTIFIER(append);
2708 if (Element_CheckExact(element)) {
2709 ElementObject *elem = (ElementObject *) element;
2710 return element_add_subelement(elem, child);
2711 }
2712 else {
2713 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002714 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002715 if (res == NULL)
2716 return -1;
2717 Py_DECREF(res);
2718 return 0;
2719 }
2720}
2721
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002722LOCAL(int)
2723treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2724 PyObject *node)
2725{
2726 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002727 PyObject *res;
2728 PyObject *event = PyTuple_Pack(2, action, node);
2729 if (event == NULL)
2730 return -1;
Stefan Behnel43851a22019-05-01 21:20:38 +02002731 res = _PyObject_FastCall(self->events_append, &event, 1);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002732 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002733 if (res == NULL)
2734 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002735 Py_DECREF(res);
2736 }
2737 return 0;
2738}
2739
Antoine Pitrouee329312012-10-04 19:53:29 +02002740/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741/* handlers */
2742
2743LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2745 PyObject* attrib)
2746{
2747 PyObject* node;
2748 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002749 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750
Serhiy Storchaka576def02017-03-30 09:47:31 +03002751 if (treebuilder_flush_data(self) < 0) {
2752 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 }
2754
Stefan Behnel43851a22019-05-01 21:20:38 +02002755 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002756 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002757 } else if (attrib == Py_None) {
2758 attrib = PyDict_New();
2759 if (!attrib)
2760 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002761 node = PyObject_CallFunctionObjArgs(self->element_factory,
2762 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002763 Py_DECREF(attrib);
2764 }
2765 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002766 node = PyObject_CallFunctionObjArgs(self->element_factory,
2767 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002768 }
2769 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002771 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772
Antoine Pitrouee329312012-10-04 19:53:29 +02002773 this = self->this;
Stefan Behnelbb697892019-07-24 20:46:01 +02002774 Py_CLEAR(self->last_for_tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775
2776 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002777 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002778 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779 } else {
2780 if (self->root) {
2781 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002782 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 "multiple elements on top level"
2784 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002785 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786 }
2787 Py_INCREF(node);
2788 self->root = node;
2789 }
2790
2791 if (self->index < PyList_GET_SIZE(self->stack)) {
2792 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002793 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 Py_INCREF(this);
2795 } else {
2796 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002797 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798 }
2799 self->index++;
2800
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002802 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002804 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002806 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2807 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808
2809 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002810
2811 error:
2812 Py_DECREF(node);
2813 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814}
2815
2816LOCAL(PyObject*)
2817treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2818{
2819 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002820 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002821 /* ignore calls to data before the first call to start */
2822 Py_RETURN_NONE;
2823 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824 /* store the first item as is */
2825 Py_INCREF(data); self->data = data;
2826 } else {
2827 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002828 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2829 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002830 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 /* expat often generates single character data sections; handle
2832 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002833 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2834 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002836 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837 } else if (PyList_CheckExact(self->data)) {
2838 if (PyList_Append(self->data, data) < 0)
2839 return NULL;
2840 } else {
2841 PyObject* list = PyList_New(2);
2842 if (!list)
2843 return NULL;
2844 PyList_SET_ITEM(list, 0, self->data);
2845 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2846 self->data = list;
2847 }
2848 }
2849
2850 Py_RETURN_NONE;
2851}
2852
2853LOCAL(PyObject*)
2854treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2855{
2856 PyObject* item;
2857
Serhiy Storchaka576def02017-03-30 09:47:31 +03002858 if (treebuilder_flush_data(self) < 0) {
2859 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002860 }
2861
2862 if (self->index == 0) {
2863 PyErr_SetString(
2864 PyExc_IndexError,
2865 "pop from empty stack"
2866 );
2867 return NULL;
2868 }
2869
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002870 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002871 self->last = self->this;
Stefan Behnelbb697892019-07-24 20:46:01 +02002872 Py_INCREF(self->last);
2873 Py_XSETREF(self->last_for_tail, self->last);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002874 self->index--;
2875 self->this = PyList_GET_ITEM(self->stack, self->index);
2876 Py_INCREF(self->this);
2877 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002878
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002879 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2880 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002881
2882 Py_INCREF(self->last);
2883 return (PyObject*) self->last;
2884}
2885
Stefan Behnel43851a22019-05-01 21:20:38 +02002886LOCAL(PyObject*)
2887treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2888{
Stefan Behnelbb697892019-07-24 20:46:01 +02002889 PyObject* comment;
Stefan Behnel43851a22019-05-01 21:20:38 +02002890 PyObject* this;
2891
2892 if (treebuilder_flush_data(self) < 0) {
2893 return NULL;
2894 }
2895
2896 if (self->comment_factory) {
2897 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2898 if (!comment)
2899 return NULL;
2900
2901 this = self->this;
2902 if (self->insert_comments && this != Py_None) {
2903 if (treebuilder_add_subelement(this, comment) < 0)
2904 goto error;
Stefan Behnelbb697892019-07-24 20:46:01 +02002905 Py_INCREF(comment);
2906 Py_XSETREF(self->last_for_tail, comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02002907 }
2908 } else {
2909 Py_INCREF(text);
2910 comment = text;
2911 }
2912
2913 if (self->events_append && self->comment_event_obj) {
2914 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2915 goto error;
2916 }
2917
2918 return comment;
2919
2920 error:
2921 Py_DECREF(comment);
2922 return NULL;
2923}
2924
2925LOCAL(PyObject*)
2926treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2927{
Stefan Behnelbb697892019-07-24 20:46:01 +02002928 PyObject* pi;
Stefan Behnel43851a22019-05-01 21:20:38 +02002929 PyObject* this;
2930 PyObject* stack[2] = {target, text};
2931
2932 if (treebuilder_flush_data(self) < 0) {
2933 return NULL;
2934 }
2935
2936 if (self->pi_factory) {
2937 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2938 if (!pi) {
2939 return NULL;
2940 }
2941
2942 this = self->this;
2943 if (self->insert_pis && this != Py_None) {
2944 if (treebuilder_add_subelement(this, pi) < 0)
2945 goto error;
Stefan Behnelbb697892019-07-24 20:46:01 +02002946 Py_INCREF(pi);
2947 Py_XSETREF(self->last_for_tail, pi);
Stefan Behnel43851a22019-05-01 21:20:38 +02002948 }
2949 } else {
2950 pi = PyTuple_Pack(2, target, text);
2951 if (!pi) {
2952 return NULL;
2953 }
2954 }
2955
2956 if (self->events_append && self->pi_event_obj) {
2957 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2958 goto error;
2959 }
2960
2961 return pi;
2962
2963 error:
2964 Py_DECREF(pi);
2965 return NULL;
2966}
2967
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002968LOCAL(PyObject*)
2969treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2970{
2971 PyObject* parcel;
2972
2973 if (self->events_append && self->start_ns_event_obj) {
2974 parcel = PyTuple_Pack(2, prefix, uri);
2975 if (!parcel) {
2976 return NULL;
2977 }
2978
2979 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2980 Py_DECREF(parcel);
2981 return NULL;
2982 }
2983 Py_DECREF(parcel);
2984 }
2985
2986 Py_RETURN_NONE;
2987}
2988
2989LOCAL(PyObject*)
2990treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2991{
2992 if (self->events_append && self->end_ns_event_obj) {
2993 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2994 return NULL;
2995 }
2996 }
2997
2998 Py_RETURN_NONE;
2999}
3000
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003001/* -------------------------------------------------------------------- */
3002/* methods (in alphabetical order) */
3003
Serhiy Storchakacb985562015-05-04 15:32:48 +03003004/*[clinic input]
3005_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006
Serhiy Storchakacb985562015-05-04 15:32:48 +03003007 data: object
3008 /
3009
3010[clinic start generated code]*/
3011
3012static PyObject *
3013_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
3014/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
3015{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 return treebuilder_handle_data(self, data);
3017}
3018
Serhiy Storchakacb985562015-05-04 15:32:48 +03003019/*[clinic input]
3020_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021
Serhiy Storchakacb985562015-05-04 15:32:48 +03003022 tag: object
3023 /
3024
3025[clinic start generated code]*/
3026
3027static PyObject *
3028_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
3029/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
3030{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 return treebuilder_handle_end(self, tag);
3032}
3033
Stefan Behnel43851a22019-05-01 21:20:38 +02003034/*[clinic input]
3035_elementtree.TreeBuilder.comment
3036
3037 text: object
3038 /
3039
3040[clinic start generated code]*/
3041
3042static PyObject *
3043_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
3044/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
3045{
3046 return treebuilder_handle_comment(self, text);
3047}
3048
3049/*[clinic input]
3050_elementtree.TreeBuilder.pi
3051
3052 target: object
3053 text: object = None
3054 /
3055
3056[clinic start generated code]*/
3057
3058static PyObject *
3059_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3060 PyObject *text)
3061/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3062{
3063 return treebuilder_handle_pi(self, target, text);
3064}
3065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066LOCAL(PyObject*)
3067treebuilder_done(TreeBuilderObject* self)
3068{
3069 PyObject* res;
3070
3071 /* FIXME: check stack size? */
3072
3073 if (self->root)
3074 res = self->root;
3075 else
3076 res = Py_None;
3077
3078 Py_INCREF(res);
3079 return res;
3080}
3081
Serhiy Storchakacb985562015-05-04 15:32:48 +03003082/*[clinic input]
3083_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084
Serhiy Storchakacb985562015-05-04 15:32:48 +03003085[clinic start generated code]*/
3086
3087static PyObject *
3088_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3089/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3090{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091 return treebuilder_done(self);
3092}
3093
Serhiy Storchakacb985562015-05-04 15:32:48 +03003094/*[clinic input]
3095_elementtree.TreeBuilder.start
3096
3097 tag: object
3098 attrs: object = None
3099 /
3100
3101[clinic start generated code]*/
3102
3103static PyObject *
3104_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3105 PyObject *attrs)
3106/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003108 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109}
3110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111/* ==================================================================== */
3112/* the expat interface */
3113
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003116
3117/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3118 * cached globally without being in per-module state.
3119 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003120static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122
Eli Bendersky52467b12012-06-01 07:13:08 +03003123static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3124 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3125
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126typedef struct {
3127 PyObject_HEAD
3128
3129 XML_Parser parser;
3130
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003131 PyObject *target;
3132 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003134 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003136 PyObject *handle_start_ns;
3137 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003138 PyObject *handle_start;
3139 PyObject *handle_data;
3140 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003141
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003142 PyObject *handle_comment;
3143 PyObject *handle_pi;
3144 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003145
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003146 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148} XMLParserObject;
3149
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003150/* helpers */
3151
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003152LOCAL(PyObject*)
3153makeuniversal(XMLParserObject* self, const char* string)
3154{
3155 /* convert a UTF-8 tag/attribute name from the expat parser
3156 to a universal name string */
3157
Antoine Pitrouc1948842012-10-01 23:40:37 +02003158 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159 PyObject* key;
3160 PyObject* value;
3161
3162 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003163 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164 if (!key)
3165 return NULL;
3166
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003167 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168
3169 if (value) {
3170 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003171 }
3172 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003173 /* new name. convert to universal name, and decode as
3174 necessary */
3175
3176 PyObject* tag;
3177 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003178 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179
3180 /* look for namespace separator */
3181 for (i = 0; i < size; i++)
3182 if (string[i] == '}')
3183 break;
3184 if (i != size) {
3185 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003186 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003187 if (tag == NULL) {
3188 Py_DECREF(key);
3189 return NULL;
3190 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003191 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192 p[0] = '{';
3193 memcpy(p+1, string, size);
3194 size++;
3195 } else {
3196 /* plain name; use key as tag */
3197 Py_INCREF(key);
3198 tag = key;
3199 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003202 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003203 value = PyUnicode_DecodeUTF8(p, size, "strict");
3204 Py_DECREF(tag);
3205 if (!value) {
3206 Py_DECREF(key);
3207 return NULL;
3208 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209
3210 /* add to names dictionary */
3211 if (PyDict_SetItem(self->names, key, value) < 0) {
3212 Py_DECREF(key);
3213 Py_DECREF(value);
3214 return NULL;
3215 }
3216 }
3217
3218 Py_DECREF(key);
3219 return value;
3220}
3221
Eli Bendersky5b77d812012-03-16 08:20:05 +02003222/* Set the ParseError exception with the given parameters.
3223 * If message is not NULL, it's used as the error string. Otherwise, the
3224 * message string is the default for the given error_code.
3225*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003226static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003227expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3228 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003229{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003230 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003231 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003232
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003233 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003234 message ? message : EXPAT(ErrorString)(error_code),
3235 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003236 if (errmsg == NULL)
3237 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003238
Stefan Behnel43851a22019-05-01 21:20:38 +02003239 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003240 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003241 if (!error)
3242 return;
3243
Eli Bendersky5b77d812012-03-16 08:20:05 +02003244 /* Add code and position attributes */
3245 code = PyLong_FromLong((long)error_code);
3246 if (!code) {
3247 Py_DECREF(error);
3248 return;
3249 }
3250 if (PyObject_SetAttrString(error, "code", code) == -1) {
3251 Py_DECREF(error);
3252 Py_DECREF(code);
3253 return;
3254 }
3255 Py_DECREF(code);
3256
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003257 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003258 if (!position) {
3259 Py_DECREF(error);
3260 return;
3261 }
3262 if (PyObject_SetAttrString(error, "position", position) == -1) {
3263 Py_DECREF(error);
3264 Py_DECREF(position);
3265 return;
3266 }
3267 Py_DECREF(position);
3268
Eli Bendersky532d03e2013-08-10 08:00:39 -07003269 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003270 Py_DECREF(error);
3271}
3272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273/* -------------------------------------------------------------------- */
3274/* handlers */
3275
3276static void
3277expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3278 int data_len)
3279{
3280 PyObject* key;
3281 PyObject* value;
3282 PyObject* res;
3283
3284 if (data_len < 2 || data_in[0] != '&')
3285 return;
3286
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003287 if (PyErr_Occurred())
3288 return;
3289
Neal Norwitz0269b912007-08-08 06:56:02 +00003290 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 if (!key)
3292 return;
3293
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003294 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295
3296 if (value) {
3297 if (TreeBuilder_CheckExact(self->target))
3298 res = treebuilder_handle_data(
3299 (TreeBuilderObject*) self->target, value
3300 );
3301 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003302 res = _PyObject_FastCall(self->handle_data, &value, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 else
3304 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003306 } else if (!PyErr_Occurred()) {
3307 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003308 char message[128] = "undefined entity ";
3309 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003310 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003311 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003313 EXPAT(GetErrorColumnNumber)(self->parser),
3314 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315 );
3316 }
3317
3318 Py_DECREF(key);
3319}
3320
3321static void
3322expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3323 const XML_Char **attrib_in)
3324{
3325 PyObject* res;
3326 PyObject* tag;
3327 PyObject* attrib;
3328 int ok;
3329
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003330 if (PyErr_Occurred())
3331 return;
3332
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 /* tag name */
3334 tag = makeuniversal(self, tag_in);
3335 if (!tag)
3336 return; /* parser will look for errors */
3337
3338 /* attributes */
3339 if (attrib_in[0]) {
3340 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003341 if (!attrib) {
3342 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003344 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003345 while (attrib_in[0] && attrib_in[1]) {
3346 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003347 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348 if (!key || !value) {
3349 Py_XDECREF(value);
3350 Py_XDECREF(key);
3351 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003352 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 return;
3354 }
3355 ok = PyDict_SetItem(attrib, key, value);
3356 Py_DECREF(value);
3357 Py_DECREF(key);
3358 if (ok < 0) {
3359 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003360 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361 return;
3362 }
3363 attrib_in += 2;
3364 }
3365 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003366 Py_INCREF(Py_None);
3367 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003368 }
3369
3370 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003371 /* shortcut */
3372 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3373 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003374 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003375 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003376 if (attrib == Py_None) {
3377 Py_DECREF(attrib);
3378 attrib = PyDict_New();
3379 if (!attrib) {
3380 Py_DECREF(tag);
3381 return;
3382 }
3383 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003384 res = PyObject_CallFunctionObjArgs(self->handle_start,
3385 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003386 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387 res = NULL;
3388
3389 Py_DECREF(tag);
3390 Py_DECREF(attrib);
3391
3392 Py_XDECREF(res);
3393}
3394
3395static void
3396expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3397 int data_len)
3398{
3399 PyObject* data;
3400 PyObject* res;
3401
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003402 if (PyErr_Occurred())
3403 return;
3404
Neal Norwitz0269b912007-08-08 06:56:02 +00003405 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003406 if (!data)
3407 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408
3409 if (TreeBuilder_CheckExact(self->target))
3410 /* shortcut */
3411 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3412 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003413 res = _PyObject_FastCall(self->handle_data, &data, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414 else
3415 res = NULL;
3416
3417 Py_DECREF(data);
3418
3419 Py_XDECREF(res);
3420}
3421
3422static void
3423expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3424{
3425 PyObject* tag;
3426 PyObject* res = NULL;
3427
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003428 if (PyErr_Occurred())
3429 return;
3430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431 if (TreeBuilder_CheckExact(self->target))
3432 /* shortcut */
3433 /* the standard tree builder doesn't look at the end tag */
3434 res = treebuilder_handle_end(
3435 (TreeBuilderObject*) self->target, Py_None
3436 );
3437 else if (self->handle_end) {
3438 tag = makeuniversal(self, tag_in);
3439 if (tag) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003440 res = _PyObject_FastCall(self->handle_end, &tag, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003441 Py_DECREF(tag);
3442 }
3443 }
3444
3445 Py_XDECREF(res);
3446}
3447
3448static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003449expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3450 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003451{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003452 PyObject* res = NULL;
3453 PyObject* uri;
3454 PyObject* prefix;
3455 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003456
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003457 if (PyErr_Occurred())
3458 return;
3459
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003460 if (!uri_in)
3461 uri_in = "";
3462 if (!prefix_in)
3463 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003464
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003465 if (TreeBuilder_CheckExact(self->target)) {
3466 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3467 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003468
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003469 if (target->events_append && target->start_ns_event_obj) {
3470 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3471 if (!prefix)
3472 return;
3473 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3474 if (!uri) {
3475 Py_DECREF(prefix);
3476 return;
3477 }
3478
3479 res = treebuilder_handle_start_ns(target, prefix, uri);
3480 Py_DECREF(uri);
3481 Py_DECREF(prefix);
3482 }
3483 } else if (self->handle_start_ns) {
3484 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3485 if (!prefix)
3486 return;
3487 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3488 if (!uri) {
3489 Py_DECREF(prefix);
3490 return;
3491 }
3492
3493 stack[0] = prefix;
3494 stack[1] = uri;
3495 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3496 Py_DECREF(uri);
3497 Py_DECREF(prefix);
3498 }
3499
3500 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501}
3502
3503static void
3504expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3505{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003506 PyObject *res = NULL;
3507 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003508
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003509 if (PyErr_Occurred())
3510 return;
3511
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003512 if (!prefix_in)
3513 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003514
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003515 if (TreeBuilder_CheckExact(self->target)) {
3516 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3517 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3518
3519 if (target->events_append && target->end_ns_event_obj) {
3520 res = treebuilder_handle_end_ns(target, Py_None);
3521 }
3522 } else if (self->handle_end_ns) {
3523 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3524 if (!prefix)
3525 return;
3526
3527 res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3528 Py_DECREF(prefix);
3529 }
3530
3531 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003532}
3533
3534static void
3535expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3536{
Stefan Behnelbb697892019-07-24 20:46:01 +02003537 PyObject* comment;
3538 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003539
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003540 if (PyErr_Occurred())
3541 return;
3542
Stefan Behnel43851a22019-05-01 21:20:38 +02003543 if (TreeBuilder_CheckExact(self->target)) {
3544 /* shortcut */
3545 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3546
Neal Norwitz0269b912007-08-08 06:56:02 +00003547 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003548 if (!comment)
3549 return; /* parser will look for errors */
3550
3551 res = treebuilder_handle_comment(target, comment);
Stefan Behnelbb697892019-07-24 20:46:01 +02003552 Py_XDECREF(res);
3553 Py_DECREF(comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02003554 } else if (self->handle_comment) {
3555 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3556 if (!comment)
3557 return;
3558
3559 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
Stefan Behnelbb697892019-07-24 20:46:01 +02003560 Py_XDECREF(res);
3561 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003562 }
3563}
3564
Eli Bendersky45839902013-01-13 05:14:47 -08003565static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003566expat_start_doctype_handler(XMLParserObject *self,
3567 const XML_Char *doctype_name,
3568 const XML_Char *sysid,
3569 const XML_Char *pubid,
3570 int has_internal_subset)
3571{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003572 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003573 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003574 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003575
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003576 if (PyErr_Occurred())
3577 return;
3578
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003579 doctype_name_obj = makeuniversal(self, doctype_name);
3580 if (!doctype_name_obj)
3581 return;
3582
3583 if (sysid) {
3584 sysid_obj = makeuniversal(self, sysid);
3585 if (!sysid_obj) {
3586 Py_DECREF(doctype_name_obj);
3587 return;
3588 }
3589 } else {
3590 Py_INCREF(Py_None);
3591 sysid_obj = Py_None;
3592 }
3593
3594 if (pubid) {
3595 pubid_obj = makeuniversal(self, pubid);
3596 if (!pubid_obj) {
3597 Py_DECREF(doctype_name_obj);
3598 Py_DECREF(sysid_obj);
3599 return;
3600 }
3601 } else {
3602 Py_INCREF(Py_None);
3603 pubid_obj = Py_None;
3604 }
3605
3606 /* If the target has a handler for doctype, call it. */
3607 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003608 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3609 doctype_name_obj, pubid_obj,
3610 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003611 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003612 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003613 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3614 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3615 "The doctype() method of XMLParser is ignored. "
3616 "Define doctype() method on the TreeBuilder target.",
3617 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003618 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003619 }
3620
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003621 Py_DECREF(doctype_name_obj);
3622 Py_DECREF(pubid_obj);
3623 Py_DECREF(sysid_obj);
3624}
3625
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626static void
3627expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3628 const XML_Char* data_in)
3629{
Stefan Behnelbb697892019-07-24 20:46:01 +02003630 PyObject* pi_target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 PyObject* data;
3632 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003633 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003635 if (PyErr_Occurred())
3636 return;
3637
Stefan Behnel43851a22019-05-01 21:20:38 +02003638 if (TreeBuilder_CheckExact(self->target)) {
3639 /* shortcut */
3640 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3641
Stefan Behnelbb697892019-07-24 20:46:01 +02003642 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003643 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3644 if (!pi_target)
3645 goto error;
3646 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3647 if (!data)
3648 goto error;
3649 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003650 Py_XDECREF(res);
3651 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003652 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003654 } else if (self->handle_pi) {
3655 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3656 if (!pi_target)
3657 goto error;
3658 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3659 if (!data)
3660 goto error;
3661
3662 stack[0] = pi_target;
3663 stack[1] = data;
3664 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3665 Py_XDECREF(res);
3666 Py_DECREF(data);
3667 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003669
3670 return;
3671
3672 error:
3673 Py_XDECREF(pi_target);
3674 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675}
3676
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678
Eli Bendersky52467b12012-06-01 07:13:08 +03003679static PyObject *
3680xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681{
Eli Bendersky52467b12012-06-01 07:13:08 +03003682 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3683 if (self) {
3684 self->parser = NULL;
3685 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003686 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003687 self->handle_start = self->handle_data = self->handle_end = NULL;
3688 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003689 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003690 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003691 return (PyObject *)self;
3692}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693
scoderc8d8e152017-09-14 22:00:03 +02003694static int
3695ignore_attribute_error(PyObject *value)
3696{
3697 if (value == NULL) {
3698 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3699 return -1;
3700 }
3701 PyErr_Clear();
3702 }
3703 return 0;
3704}
3705
Serhiy Storchakacb985562015-05-04 15:32:48 +03003706/*[clinic input]
3707_elementtree.XMLParser.__init__
3708
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003709 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003710 target: object = NULL
Serhiy Storchakad322abb2019-09-14 13:31:50 +03003711 encoding: str(accept={str, NoneType}) = None
Serhiy Storchakacb985562015-05-04 15:32:48 +03003712
3713[clinic start generated code]*/
3714
Eli Bendersky52467b12012-06-01 07:13:08 +03003715static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003716_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3717 const char *encoding)
Serhiy Storchakad322abb2019-09-14 13:31:50 +03003718/*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003719{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003720 self->entity = PyDict_New();
3721 if (!self->entity)
3722 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003723
Serhiy Storchakacb985562015-05-04 15:32:48 +03003724 self->names = PyDict_New();
3725 if (!self->names) {
3726 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003727 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003728 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003729
Serhiy Storchakacb985562015-05-04 15:32:48 +03003730 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3731 if (!self->parser) {
3732 Py_CLEAR(self->entity);
3733 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003735 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003736 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003737 /* expat < 2.1.0 has no XML_SetHashSalt() */
3738 if (EXPAT(SetHashSalt) != NULL) {
3739 EXPAT(SetHashSalt)(self->parser,
3740 (unsigned long)_Py_HashSecret.expat.hashsalt);
3741 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003742
Eli Bendersky52467b12012-06-01 07:13:08 +03003743 if (target) {
3744 Py_INCREF(target);
3745 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003746 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003747 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003748 Py_CLEAR(self->entity);
3749 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003750 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003751 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003752 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003753 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003754
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003755 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3756 if (ignore_attribute_error(self->handle_start_ns)) {
3757 return -1;
3758 }
3759 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3760 if (ignore_attribute_error(self->handle_end_ns)) {
3761 return -1;
3762 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003763 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003764 if (ignore_attribute_error(self->handle_start)) {
3765 return -1;
3766 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003767 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003768 if (ignore_attribute_error(self->handle_data)) {
3769 return -1;
3770 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003771 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003772 if (ignore_attribute_error(self->handle_end)) {
3773 return -1;
3774 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003775 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003776 if (ignore_attribute_error(self->handle_comment)) {
3777 return -1;
3778 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003779 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003780 if (ignore_attribute_error(self->handle_pi)) {
3781 return -1;
3782 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003783 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003784 if (ignore_attribute_error(self->handle_close)) {
3785 return -1;
3786 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003787 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003788 if (ignore_attribute_error(self->handle_doctype)) {
3789 return -1;
3790 }
Eli Bendersky45839902013-01-13 05:14:47 -08003791
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003792 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003793 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003794 if (self->handle_start_ns || self->handle_end_ns)
3795 EXPAT(SetNamespaceDeclHandler)(
3796 self->parser,
3797 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3798 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3799 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003800 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003801 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003802 (XML_StartElementHandler) expat_start_handler,
3803 (XML_EndElementHandler) expat_end_handler
3804 );
3805 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003806 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003807 (XML_DefaultHandler) expat_default_handler
3808 );
3809 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003810 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003811 (XML_CharacterDataHandler) expat_data_handler
3812 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003813 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003814 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003815 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003816 (XML_CommentHandler) expat_comment_handler
3817 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003818 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003819 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003820 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003821 (XML_ProcessingInstructionHandler) expat_pi_handler
3822 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003823 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003824 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003825 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3826 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003827 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003828 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003829 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003830 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003831
Eli Bendersky52467b12012-06-01 07:13:08 +03003832 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003833}
3834
Eli Bendersky52467b12012-06-01 07:13:08 +03003835static int
3836xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3837{
3838 Py_VISIT(self->handle_close);
3839 Py_VISIT(self->handle_pi);
3840 Py_VISIT(self->handle_comment);
3841 Py_VISIT(self->handle_end);
3842 Py_VISIT(self->handle_data);
3843 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003844 Py_VISIT(self->handle_start_ns);
3845 Py_VISIT(self->handle_end_ns);
3846 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003847
3848 Py_VISIT(self->target);
3849 Py_VISIT(self->entity);
3850 Py_VISIT(self->names);
3851
3852 return 0;
3853}
3854
3855static int
3856xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003857{
Victor Stinnere727d412017-09-18 05:29:37 -07003858 if (self->parser != NULL) {
3859 XML_Parser parser = self->parser;
3860 self->parser = NULL;
3861 EXPAT(ParserFree)(parser);
3862 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003863
Antoine Pitrouc1948842012-10-01 23:40:37 +02003864 Py_CLEAR(self->handle_close);
3865 Py_CLEAR(self->handle_pi);
3866 Py_CLEAR(self->handle_comment);
3867 Py_CLEAR(self->handle_end);
3868 Py_CLEAR(self->handle_data);
3869 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003870 Py_CLEAR(self->handle_start_ns);
3871 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003872 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003873
Antoine Pitrouc1948842012-10-01 23:40:37 +02003874 Py_CLEAR(self->target);
3875 Py_CLEAR(self->entity);
3876 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003877
Eli Bendersky52467b12012-06-01 07:13:08 +03003878 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003879}
3880
Eli Bendersky52467b12012-06-01 07:13:08 +03003881static void
3882xmlparser_dealloc(XMLParserObject* self)
3883{
3884 PyObject_GC_UnTrack(self);
3885 xmlparser_gc_clear(self);
3886 Py_TYPE(self)->tp_free((PyObject *)self);
3887}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003888
3889LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003890expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003891{
3892 int ok;
3893
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003894 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003895 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3896
3897 if (PyErr_Occurred())
3898 return NULL;
3899
3900 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003901 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003902 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003903 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003904 EXPAT(GetErrorColumnNumber)(self->parser),
3905 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003906 );
3907 return NULL;
3908 }
3909
3910 Py_RETURN_NONE;
3911}
3912
Serhiy Storchakacb985562015-05-04 15:32:48 +03003913/*[clinic input]
3914_elementtree.XMLParser.close
3915
3916[clinic start generated code]*/
3917
3918static PyObject *
3919_elementtree_XMLParser_close_impl(XMLParserObject *self)
3920/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003921{
3922 /* end feeding data to parser */
3923
3924 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003925 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003926 if (!res)
3927 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003929 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003930 Py_DECREF(res);
3931 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003932 }
3933 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003934 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003935 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003936 }
3937 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003938 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003939 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003940}
3941
Serhiy Storchakacb985562015-05-04 15:32:48 +03003942/*[clinic input]
3943_elementtree.XMLParser.feed
3944
3945 data: object
3946 /
3947
3948[clinic start generated code]*/
3949
3950static PyObject *
3951_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3952/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003953{
3954 /* feed data to parser */
3955
Serhiy Storchakacb985562015-05-04 15:32:48 +03003956 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003957 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003958 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3959 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003960 return NULL;
3961 if (data_len > INT_MAX) {
3962 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3963 return NULL;
3964 }
3965 /* Explicitly set UTF-8 encoding. Return code ignored. */
3966 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003967 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003968 }
3969 else {
3970 Py_buffer view;
3971 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003972 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003973 return NULL;
3974 if (view.len > INT_MAX) {
3975 PyBuffer_Release(&view);
3976 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3977 return NULL;
3978 }
3979 res = expat_parse(self, view.buf, (int)view.len, 0);
3980 PyBuffer_Release(&view);
3981 return res;
3982 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003983}
3984
Serhiy Storchakacb985562015-05-04 15:32:48 +03003985/*[clinic input]
3986_elementtree.XMLParser._parse_whole
3987
3988 file: object
3989 /
3990
3991[clinic start generated code]*/
3992
3993static PyObject *
3994_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3995/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003996{
Eli Benderskya3699232013-05-19 18:47:23 -07003997 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003998 PyObject* reader;
3999 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02004000 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004001 PyObject* res;
4002
Serhiy Storchakacb985562015-05-04 15:32:48 +03004003 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004004 if (!reader)
4005 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01004006
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004007 /* read from open file object */
4008 for (;;) {
4009
4010 buffer = PyObject_CallFunction(reader, "i", 64*1024);
4011
4012 if (!buffer) {
4013 /* read failed (e.g. due to KeyboardInterrupt) */
4014 Py_DECREF(reader);
4015 return NULL;
4016 }
4017
Eli Benderskyf996e772012-03-16 05:53:30 +02004018 if (PyUnicode_CheckExact(buffer)) {
4019 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01004020 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02004021 Py_DECREF(buffer);
4022 break;
4023 }
4024 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02004025 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02004026 if (!temp) {
4027 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02004028 Py_DECREF(reader);
4029 return NULL;
4030 }
Eli Benderskyf996e772012-03-16 05:53:30 +02004031 buffer = temp;
4032 }
4033 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004034 Py_DECREF(buffer);
4035 break;
4036 }
4037
Serhiy Storchaka26861b02015-02-16 20:52:17 +02004038 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
4039 Py_DECREF(buffer);
4040 Py_DECREF(reader);
4041 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
4042 return NULL;
4043 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004044 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02004045 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004046 );
4047
4048 Py_DECREF(buffer);
4049
4050 if (!res) {
4051 Py_DECREF(reader);
4052 return NULL;
4053 }
4054 Py_DECREF(res);
4055
4056 }
4057
4058 Py_DECREF(reader);
4059
4060 res = expat_parse(self, "", 0, 1);
4061
4062 if (res && TreeBuilder_CheckExact(self->target)) {
4063 Py_DECREF(res);
4064 return treebuilder_done((TreeBuilderObject*) self->target);
4065 }
4066
4067 return res;
4068}
4069
Serhiy Storchakacb985562015-05-04 15:32:48 +03004070/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004071_elementtree.XMLParser._setevents
4072
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004073 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004074 events_to_report: object = None
4075 /
4076
4077[clinic start generated code]*/
4078
4079static PyObject *
4080_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4081 PyObject *events_queue,
4082 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004083/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004084{
4085 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004086 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004087 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004088 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004089
4090 if (!TreeBuilder_CheckExact(self->target)) {
4091 PyErr_SetString(
4092 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004093 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004094 "targets"
4095 );
4096 return NULL;
4097 }
4098
4099 target = (TreeBuilderObject*) self->target;
4100
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004101 events_append = PyObject_GetAttrString(events_queue, "append");
4102 if (events_append == NULL)
4103 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004104 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004105
4106 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004107 Py_CLEAR(target->start_event_obj);
4108 Py_CLEAR(target->end_event_obj);
4109 Py_CLEAR(target->start_ns_event_obj);
4110 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004111 Py_CLEAR(target->comment_event_obj);
4112 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004113
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004114 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004115 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004116 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004117 Py_RETURN_NONE;
4118 }
4119
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004120 if (!(events_seq = PySequence_Fast(events_to_report,
4121 "events must be a sequence"))) {
4122 return NULL;
4123 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004124
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004125 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004126 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004127 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004128 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004129 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004130 } else if (PyBytes_Check(event_name_obj)) {
4131 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004132 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004133 if (event_name == NULL) {
4134 Py_DECREF(events_seq);
4135 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4136 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004137 }
4138
4139 Py_INCREF(event_name_obj);
4140 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004141 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004142 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004143 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004144 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004145 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004146 EXPAT(SetNamespaceDeclHandler)(
4147 self->parser,
4148 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4149 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4150 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004151 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004152 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004153 EXPAT(SetNamespaceDeclHandler)(
4154 self->parser,
4155 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4156 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4157 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004158 } else if (strcmp(event_name, "comment") == 0) {
4159 Py_XSETREF(target->comment_event_obj, event_name_obj);
4160 EXPAT(SetCommentHandler)(
4161 self->parser,
4162 (XML_CommentHandler) expat_comment_handler
4163 );
4164 } else if (strcmp(event_name, "pi") == 0) {
4165 Py_XSETREF(target->pi_event_obj, event_name_obj);
4166 EXPAT(SetProcessingInstructionHandler)(
4167 self->parser,
4168 (XML_ProcessingInstructionHandler) expat_pi_handler
4169 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004170 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004171 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004172 Py_DECREF(events_seq);
4173 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004174 return NULL;
4175 }
4176 }
4177
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004178 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004179 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004180}
4181
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004182static PyMemberDef xmlparser_members[] = {
4183 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4184 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4185 {NULL}
4186};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004187
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004188static PyObject*
4189xmlparser_version_getter(XMLParserObject *self, void *closure)
4190{
4191 return PyUnicode_FromFormat(
4192 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4193 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004194}
4195
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004196static PyGetSetDef xmlparser_getsetlist[] = {
4197 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4198 {NULL},
4199};
4200
Serhiy Storchakacb985562015-05-04 15:32:48 +03004201#include "clinic/_elementtree.c.h"
4202
4203static PyMethodDef element_methods[] = {
4204
4205 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4206
4207 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4208 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4209
4210 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4211 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4212 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4213
4214 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4215 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4216 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4217 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4218
4219 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4220 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4221 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4222
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004223 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004224 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4225
4226 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4227 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4228
4229 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4230
4231 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4232 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4233 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4234 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4235 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4236
4237 {NULL, NULL}
4238};
4239
4240static PyMappingMethods element_as_mapping = {
4241 (lenfunc) element_length,
4242 (binaryfunc) element_subscr,
4243 (objobjargproc) element_ass_subscr,
4244};
4245
Serhiy Storchakadde08152015-11-25 15:28:13 +02004246static PyGetSetDef element_getsetlist[] = {
4247 {"tag",
4248 (getter)element_tag_getter,
4249 (setter)element_tag_setter,
4250 "A string identifying what kind of data this element represents"},
4251 {"text",
4252 (getter)element_text_getter,
4253 (setter)element_text_setter,
4254 "A string of text directly after the start tag, or None"},
4255 {"tail",
4256 (getter)element_tail_getter,
4257 (setter)element_tail_setter,
4258 "A string of text directly after the end tag, or None"},
4259 {"attrib",
4260 (getter)element_attrib_getter,
4261 (setter)element_attrib_setter,
4262 "A dictionary containing the element's attributes"},
4263 {NULL},
4264};
4265
Serhiy Storchakacb985562015-05-04 15:32:48 +03004266static PyTypeObject Element_Type = {
4267 PyVarObject_HEAD_INIT(NULL, 0)
4268 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4269 /* methods */
4270 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004271 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004272 0, /* tp_getattr */
4273 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004274 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004275 (reprfunc)element_repr, /* tp_repr */
4276 0, /* tp_as_number */
4277 &element_as_sequence, /* tp_as_sequence */
4278 &element_as_mapping, /* tp_as_mapping */
4279 0, /* tp_hash */
4280 0, /* tp_call */
4281 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004282 PyObject_GenericGetAttr, /* tp_getattro */
4283 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004284 0, /* tp_as_buffer */
4285 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4286 /* tp_flags */
4287 0, /* tp_doc */
4288 (traverseproc)element_gc_traverse, /* tp_traverse */
4289 (inquiry)element_gc_clear, /* tp_clear */
4290 0, /* tp_richcompare */
4291 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4292 0, /* tp_iter */
4293 0, /* tp_iternext */
4294 element_methods, /* tp_methods */
4295 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004296 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004297 0, /* tp_base */
4298 0, /* tp_dict */
4299 0, /* tp_descr_get */
4300 0, /* tp_descr_set */
4301 0, /* tp_dictoffset */
4302 (initproc)element_init, /* tp_init */
4303 PyType_GenericAlloc, /* tp_alloc */
4304 element_new, /* tp_new */
4305 0, /* tp_free */
4306};
4307
4308static PyMethodDef treebuilder_methods[] = {
4309 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4310 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4311 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004312 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4313 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004314 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4315 {NULL, NULL}
4316};
4317
4318static PyTypeObject TreeBuilder_Type = {
4319 PyVarObject_HEAD_INIT(NULL, 0)
4320 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4321 /* methods */
4322 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004323 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004324 0, /* tp_getattr */
4325 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004326 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004327 0, /* tp_repr */
4328 0, /* tp_as_number */
4329 0, /* tp_as_sequence */
4330 0, /* tp_as_mapping */
4331 0, /* tp_hash */
4332 0, /* tp_call */
4333 0, /* tp_str */
4334 0, /* tp_getattro */
4335 0, /* tp_setattro */
4336 0, /* tp_as_buffer */
4337 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4338 /* tp_flags */
4339 0, /* tp_doc */
4340 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4341 (inquiry)treebuilder_gc_clear, /* tp_clear */
4342 0, /* tp_richcompare */
4343 0, /* tp_weaklistoffset */
4344 0, /* tp_iter */
4345 0, /* tp_iternext */
4346 treebuilder_methods, /* tp_methods */
4347 0, /* tp_members */
4348 0, /* tp_getset */
4349 0, /* tp_base */
4350 0, /* tp_dict */
4351 0, /* tp_descr_get */
4352 0, /* tp_descr_set */
4353 0, /* tp_dictoffset */
4354 _elementtree_TreeBuilder___init__, /* tp_init */
4355 PyType_GenericAlloc, /* tp_alloc */
4356 treebuilder_new, /* tp_new */
4357 0, /* tp_free */
4358};
4359
4360static PyMethodDef xmlparser_methods[] = {
4361 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4362 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4363 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4364 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004365 {NULL, NULL}
4366};
4367
Neal Norwitz227b5332006-03-22 09:28:35 +00004368static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004369 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004370 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004371 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004372 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004373 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004374 0, /* tp_getattr */
4375 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004376 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004377 0, /* tp_repr */
4378 0, /* tp_as_number */
4379 0, /* tp_as_sequence */
4380 0, /* tp_as_mapping */
4381 0, /* tp_hash */
4382 0, /* tp_call */
4383 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004384 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004385 0, /* tp_setattro */
4386 0, /* tp_as_buffer */
4387 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4388 /* tp_flags */
4389 0, /* tp_doc */
4390 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4391 (inquiry)xmlparser_gc_clear, /* tp_clear */
4392 0, /* tp_richcompare */
4393 0, /* tp_weaklistoffset */
4394 0, /* tp_iter */
4395 0, /* tp_iternext */
4396 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004397 xmlparser_members, /* tp_members */
4398 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004399 0, /* tp_base */
4400 0, /* tp_dict */
4401 0, /* tp_descr_get */
4402 0, /* tp_descr_set */
4403 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004404 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004405 PyType_GenericAlloc, /* tp_alloc */
4406 xmlparser_new, /* tp_new */
4407 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004408};
4409
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004410/* ==================================================================== */
4411/* python module interface */
4412
4413static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004414 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004415 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004416 {NULL, NULL}
4417};
4418
Martin v. Löwis1a214512008-06-11 05:26:20 +00004419
Eli Bendersky532d03e2013-08-10 08:00:39 -07004420static struct PyModuleDef elementtreemodule = {
4421 PyModuleDef_HEAD_INIT,
4422 "_elementtree",
4423 NULL,
4424 sizeof(elementtreestate),
4425 _functions,
4426 NULL,
4427 elementtree_traverse,
4428 elementtree_clear,
4429 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004430};
4431
Neal Norwitzf6657e62006-12-28 04:47:50 +00004432PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004433PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004434{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004435 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004436 elementtreestate *st;
4437
4438 m = PyState_FindModule(&elementtreemodule);
4439 if (m) {
4440 Py_INCREF(m);
4441 return m;
4442 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004443
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004444 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004445 if (PyType_Ready(&ElementIter_Type) < 0)
4446 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004447 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004448 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004449 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004450 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004451 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004452 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004453
Eli Bendersky532d03e2013-08-10 08:00:39 -07004454 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004455 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004456 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004457 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004458
Eli Bendersky828efde2012-04-05 05:40:58 +03004459 if (!(temp = PyImport_ImportModule("copy")))
4460 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004461 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004462 Py_XDECREF(temp);
4463
Victor Stinnerb136f112017-07-10 22:28:02 +02004464 if (st->deepcopy_obj == NULL) {
4465 return NULL;
4466 }
4467
4468 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004469 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004470 return NULL;
4471
Eli Bendersky20d41742012-06-01 09:48:37 +03004472 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004473 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4474 if (expat_capi) {
4475 /* check that it's usable */
4476 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004477 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004478 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4479 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004480 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004481 PyErr_SetString(PyExc_ImportError,
4482 "pyexpat version is incompatible");
4483 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004484 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004485 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004486 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004487 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004488
Eli Bendersky532d03e2013-08-10 08:00:39 -07004489 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004490 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004491 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004492 Py_INCREF(st->parseerror_obj);
4493 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004494
Eli Bendersky092af1f2012-03-04 07:14:03 +02004495 Py_INCREF((PyObject *)&Element_Type);
4496 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4497
Eli Bendersky58d548d2012-05-29 15:45:16 +03004498 Py_INCREF((PyObject *)&TreeBuilder_Type);
4499 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4500
Eli Bendersky52467b12012-06-01 07:13:08 +03004501 Py_INCREF((PyObject *)&XMLParser_Type);
4502 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004503
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004504 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004505}