blob: 1e58cd05b5123710c0b7695c61907ac251abf0a8 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600339 if (attrib_str == NULL) {
340 return NULL;
341 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200342 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356 Py_DECREF(attrib);
357 attrib = NULL;
358 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200359 }
360 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600366 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
367 Py_DECREF(attrib);
368 return NULL;
369 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 PyObject *attrib = NULL;
386 ElementObject *self_elem;
387
388 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
389 return -1;
390
Eli Bendersky737b1732012-05-29 06:02:56 +0300391 if (attrib) {
392 /* attrib passed as positional arg */
393 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 if (!attrib)
395 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300396 if (kwds) {
397 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200398 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300399 return -1;
400 }
401 }
402 } else if (kwds) {
403 /* have keywords args */
404 attrib = get_attrib_from_keywords(kwds);
405 if (!attrib)
406 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 }
408
409 self_elem = (ElementObject *)self;
410
Antoine Pitrouc1948842012-10-01 23:40:37 +0200411 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200413 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 return -1;
415 }
416 }
417
Eli Bendersky48d358b2012-05-30 17:57:50 +0300418 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420
421 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200422 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300423 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424
Eli Bendersky092af1f2012-03-04 07:14:03 +0200425 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300426 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200427
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300429 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
431 return 0;
432}
433
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200435element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000436{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200437 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000438 PyObject* *children;
439
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300440 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441 /* make sure self->children can hold the given number of extra
442 elements. set an exception and return -1 if allocation failed */
443
Victor Stinner5f0af232013-07-11 23:01:36 +0200444 if (!self->extra) {
445 if (create_extra(self, NULL) < 0)
446 return -1;
447 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000448
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200449 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000450
451 if (size > self->extra->allocated) {
452 /* use Python 2.4's list growth strategy */
453 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000454 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100455 * which needs at least 4 bytes.
456 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000457 * be safe.
458 */
459 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200460 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
461 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000462 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100464 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000465 * false alarm always assume at least one child to be safe.
466 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000467 children = PyObject_Realloc(self->extra->children,
468 size * sizeof(PyObject*));
469 if (!children)
470 goto nomemory;
471 } else {
472 children = PyObject_Malloc(size * sizeof(PyObject*));
473 if (!children)
474 goto nomemory;
475 /* copy existing children from static area to malloc buffer */
476 memcpy(children, self->extra->children,
477 self->extra->length * sizeof(PyObject*));
478 }
479 self->extra->children = children;
480 self->extra->allocated = size;
481 }
482
483 return 0;
484
485 nomemory:
486 PyErr_NoMemory();
487 return -1;
488}
489
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300490LOCAL(void)
491raise_type_error(PyObject *element)
492{
493 PyErr_Format(PyExc_TypeError,
494 "expected an Element, not \"%.200s\"",
495 Py_TYPE(element)->tp_name);
496}
497
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000498LOCAL(int)
499element_add_subelement(ElementObject* self, PyObject* element)
500{
501 /* add a child element to a parent */
502
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300503 if (!Element_Check(element)) {
504 raise_type_error(element);
505 return -1;
506 }
507
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000508 if (element_resize(self, 1) < 0)
509 return -1;
510
511 Py_INCREF(element);
512 self->extra->children[self->extra->length] = element;
513
514 self->extra->length++;
515
516 return 0;
517}
518
519LOCAL(PyObject*)
520element_get_attrib(ElementObject* self)
521{
522 /* return borrowed reference to attrib dictionary */
523 /* note: this function assumes that the extra section exists */
524
525 PyObject* res = self->extra->attrib;
526
527 if (res == Py_None) {
528 /* create missing dictionary */
529 res = PyDict_New();
530 if (!res)
531 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200532 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000533 self->extra->attrib = res;
534 }
535
536 return res;
537}
538
539LOCAL(PyObject*)
540element_get_text(ElementObject* self)
541{
542 /* return borrowed reference to text attribute */
543
Serhiy Storchaka576def02017-03-30 09:47:31 +0300544 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000545
546 if (JOIN_GET(res)) {
547 res = JOIN_OBJ(res);
548 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300549 PyObject *tmp = list_join(res);
550 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300552 self->text = tmp;
553 Py_DECREF(res);
554 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000555 }
556 }
557
558 return res;
559}
560
561LOCAL(PyObject*)
562element_get_tail(ElementObject* self)
563{
564 /* return borrowed reference to text attribute */
565
Serhiy Storchaka576def02017-03-30 09:47:31 +0300566 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000567
568 if (JOIN_GET(res)) {
569 res = JOIN_OBJ(res);
570 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300571 PyObject *tmp = list_join(res);
572 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300574 self->tail = tmp;
575 Py_DECREF(res);
576 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 }
578 }
579
580 return res;
581}
582
583static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300584subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585{
586 PyObject* elem;
587
588 ElementObject* parent;
589 PyObject* tag;
590 PyObject* attrib = NULL;
591 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
592 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800593 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800595 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596
Eli Bendersky737b1732012-05-29 06:02:56 +0300597 if (attrib) {
598 /* attrib passed as positional arg */
599 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 if (!attrib)
601 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600602 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
603 Py_DECREF(attrib);
604 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300605 }
606 } else if (kwds) {
607 /* have keyword args */
608 attrib = get_attrib_from_keywords(kwds);
609 if (!attrib)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300612 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000613 Py_INCREF(Py_None);
614 attrib = Py_None;
615 }
616
Eli Bendersky092af1f2012-03-04 07:14:03 +0200617 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200619 if (elem == NULL)
620 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000621
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000622 if (element_add_subelement(parent, elem) < 0) {
623 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000625 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000626
627 return elem;
628}
629
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630static int
631element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
632{
633 Py_VISIT(self->tag);
634 Py_VISIT(JOIN_OBJ(self->text));
635 Py_VISIT(JOIN_OBJ(self->tail));
636
637 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200638 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 Py_VISIT(self->extra->attrib);
640
641 for (i = 0; i < self->extra->length; ++i)
642 Py_VISIT(self->extra->children[i]);
643 }
644 return 0;
645}
646
647static int
648element_gc_clear(ElementObject *self)
649{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300650 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700651 _clear_joined_ptr(&self->text);
652 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300653
654 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300657 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300658 return 0;
659}
660
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000661static void
662element_dealloc(ElementObject* self)
663{
INADA Naokia6296d32017-08-24 14:55:17 +0900664 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300665 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200666 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300667
668 if (self->weakreflist != NULL)
669 PyObject_ClearWeakRefs((PyObject *) self);
670
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 /* element_gc_clear clears all references and deallocates extra
672 */
673 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674
675 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200676 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200677 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678}
679
680/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
Serhiy Storchakacb985562015-05-04 15:32:48 +0300682/*[clinic input]
683_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685 subelement: object(subclass_of='&Element_Type')
686 /
687
688[clinic start generated code]*/
689
690static PyObject *
691_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
692/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
693{
694 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 return NULL;
696
697 Py_RETURN_NONE;
698}
699
Serhiy Storchakacb985562015-05-04 15:32:48 +0300700/*[clinic input]
701_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702
Serhiy Storchakacb985562015-05-04 15:32:48 +0300703[clinic start generated code]*/
704
705static PyObject *
706_elementtree_Element_clear_impl(ElementObject *self)
707/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
708{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300709 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300712 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300715 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_RETURN_NONE;
718}
719
Serhiy Storchakacb985562015-05-04 15:32:48 +0300720/*[clinic input]
721_elementtree.Element.__copy__
722
723[clinic start generated code]*/
724
725static PyObject *
726_elementtree_Element___copy___impl(ElementObject *self)
727/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200729 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 ElementObject* element;
731
Eli Bendersky092af1f2012-03-04 07:14:03 +0200732 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800733 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734 if (!element)
735 return NULL;
736
Oren Milman39ecb9c2017-10-10 23:26:24 +0300737 Py_INCREF(JOIN_OBJ(self->text));
738 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
Oren Milman39ecb9c2017-10-10 23:26:24 +0300740 Py_INCREF(JOIN_OBJ(self->tail));
741 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300743 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000745 if (element_resize(element, self->extra->length) < 0) {
746 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000748 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000749
750 for (i = 0; i < self->extra->length; i++) {
751 Py_INCREF(self->extra->children[i]);
752 element->extra->children[i] = self->extra->children[i];
753 }
754
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300755 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000756 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000757 }
758
759 return (PyObject*) element;
760}
761
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200762/* Helper for a deep copy. */
763LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
764
Serhiy Storchakacb985562015-05-04 15:32:48 +0300765/*[clinic input]
766_elementtree.Element.__deepcopy__
767
Oren Milmand0568182017-09-12 17:39:15 +0300768 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300769 /
770
771[clinic start generated code]*/
772
773static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300774_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
775/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200777 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000778 ElementObject* element;
779 PyObject* tag;
780 PyObject* attrib;
781 PyObject* text;
782 PyObject* tail;
783 PyObject* id;
784
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785 tag = deepcopy(self->tag, memo);
786 if (!tag)
787 return NULL;
788
789 if (self->extra) {
790 attrib = deepcopy(self->extra->attrib, memo);
791 if (!attrib) {
792 Py_DECREF(tag);
793 return NULL;
794 }
795 } else {
796 Py_INCREF(Py_None);
797 attrib = Py_None;
798 }
799
Eli Bendersky092af1f2012-03-04 07:14:03 +0200800 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801
802 Py_DECREF(tag);
803 Py_DECREF(attrib);
804
805 if (!element)
806 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100807
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808 text = deepcopy(JOIN_OBJ(self->text), memo);
809 if (!text)
810 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300811 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000812
813 tail = deepcopy(JOIN_OBJ(self->tail), memo);
814 if (!tail)
815 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300816 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300818 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 if (element_resize(element, self->extra->length) < 0)
821 goto error;
822
823 for (i = 0; i < self->extra->length; i++) {
824 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300825 if (!child || !Element_Check(child)) {
826 if (child) {
827 raise_type_error(child);
828 Py_DECREF(child);
829 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000830 element->extra->length = i;
831 goto error;
832 }
833 element->extra->children[i] = child;
834 }
835
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300836 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000838 }
839
840 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700841 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000842 if (!id)
843 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844
845 i = PyDict_SetItem(memo, id, (PyObject*) element);
846
847 Py_DECREF(id);
848
849 if (i < 0)
850 goto error;
851
852 return (PyObject*) element;
853
854 error:
855 Py_DECREF(element);
856 return NULL;
857}
858
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200859LOCAL(PyObject *)
860deepcopy(PyObject *object, PyObject *memo)
861{
862 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200863 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200864 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865
866 /* Fast paths */
867 if (object == Py_None || PyUnicode_CheckExact(object)) {
868 Py_INCREF(object);
869 return object;
870 }
871
872 if (Py_REFCNT(object) == 1) {
873 if (PyDict_CheckExact(object)) {
874 PyObject *key, *value;
875 Py_ssize_t pos = 0;
876 int simple = 1;
877 while (PyDict_Next(object, &pos, &key, &value)) {
878 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
879 simple = 0;
880 break;
881 }
882 }
883 if (simple)
884 return PyDict_Copy(object);
885 /* Fall through to general case */
886 }
887 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300888 return _elementtree_Element___deepcopy___impl(
889 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200890 }
891 }
892
893 /* General case */
894 st = ET_STATE_GLOBAL;
895 if (!st->deepcopy_obj) {
896 PyErr_SetString(PyExc_RuntimeError,
897 "deepcopy helper not found");
898 return NULL;
899 }
900
Victor Stinner7fbac452016-08-20 01:34:44 +0200901 stack[0] = object;
902 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200903 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200904}
905
906
Serhiy Storchakacb985562015-05-04 15:32:48 +0300907/*[clinic input]
908_elementtree.Element.__sizeof__ -> Py_ssize_t
909
910[clinic start generated code]*/
911
912static Py_ssize_t
913_elementtree_Element___sizeof___impl(ElementObject *self)
914/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200915{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200916 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200917 if (self->extra) {
918 result += sizeof(ElementObjectExtra);
919 if (self->extra->children != self->extra->_children)
920 result += sizeof(PyObject*) * self->extra->allocated;
921 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300922 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923}
924
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925/* dict keys for getstate/setstate. */
926#define PICKLED_TAG "tag"
927#define PICKLED_CHILDREN "_children"
928#define PICKLED_ATTRIB "attrib"
929#define PICKLED_TAIL "tail"
930#define PICKLED_TEXT "text"
931
932/* __getstate__ returns a fabricated instance dict as in the pure-Python
933 * Element implementation, for interoperability/interchangeability. This
934 * makes the pure-Python implementation details an API, but (a) there aren't
935 * any unnecessary structures there; and (b) it buys compatibility with 3.2
936 * pickles. See issue #16076.
937 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300938/*[clinic input]
939_elementtree.Element.__getstate__
940
941[clinic start generated code]*/
942
Eli Bendersky698bdb22013-01-10 06:01:06 -0800943static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944_elementtree_Element___getstate___impl(ElementObject *self)
945/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800946{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200947 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800948 PyObject *instancedict = NULL, *children;
949
950 /* Build a list of children. */
951 children = PyList_New(self->extra ? self->extra->length : 0);
952 if (!children)
953 return NULL;
954 for (i = 0; i < PyList_GET_SIZE(children); i++) {
955 PyObject *child = self->extra->children[i];
956 Py_INCREF(child);
957 PyList_SET_ITEM(children, i, child);
958 }
959
960 /* Construct the state object. */
961 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
962 if (noattrib)
963 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
964 PICKLED_TAG, self->tag,
965 PICKLED_CHILDREN, children,
966 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700967 PICKLED_TEXT, JOIN_OBJ(self->text),
968 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800969 else
970 instancedict = Py_BuildValue("{sOsOsOsOsO}",
971 PICKLED_TAG, self->tag,
972 PICKLED_CHILDREN, children,
973 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700974 PICKLED_TEXT, JOIN_OBJ(self->text),
975 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800976 if (instancedict) {
977 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800979 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800980 else {
981 for (i = 0; i < PyList_GET_SIZE(children); i++)
982 Py_DECREF(PyList_GET_ITEM(children, i));
983 Py_DECREF(children);
984
985 return NULL;
986 }
987}
988
989static PyObject *
990element_setstate_from_attributes(ElementObject *self,
991 PyObject *tag,
992 PyObject *attrib,
993 PyObject *text,
994 PyObject *tail,
995 PyObject *children)
996{
997 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300998 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800999
1000 if (!tag) {
1001 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1002 return NULL;
1003 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001005 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001006 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007
Oren Milman39ecb9c2017-10-10 23:26:24 +03001008 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1009 Py_INCREF(JOIN_OBJ(text));
1010 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011
Oren Milman39ecb9c2017-10-10 23:26:24 +03001012 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1013 Py_INCREF(JOIN_OBJ(tail));
1014 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001015
1016 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001017 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001019 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001020
1021 /* Compute 'nchildren'. */
1022 if (children) {
1023 if (!PyList_Check(children)) {
1024 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1025 return NULL;
1026 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001027 nchildren = PyList_GET_SIZE(children);
1028
1029 /* (Re-)allocate 'extra'.
1030 Avoid DECREFs calling into this code again (cycles, etc.)
1031 */
1032 oldextra = self->extra;
1033 self->extra = NULL;
1034 if (element_resize(self, nchildren)) {
1035 assert(!self->extra || !self->extra->length);
1036 clear_extra(self);
1037 self->extra = oldextra;
1038 return NULL;
1039 }
1040 assert(self->extra);
1041 assert(self->extra->allocated >= nchildren);
1042 if (oldextra) {
1043 assert(self->extra->attrib == Py_None);
1044 self->extra->attrib = oldextra->attrib;
1045 oldextra->attrib = Py_None;
1046 }
1047
1048 /* Copy children */
1049 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001050 PyObject *child = PyList_GET_ITEM(children, i);
1051 if (!Element_Check(child)) {
1052 raise_type_error(child);
1053 self->extra->length = i;
1054 dealloc_extra(oldextra);
1055 return NULL;
1056 }
1057 Py_INCREF(child);
1058 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001059 }
1060
1061 assert(!self->extra->length);
1062 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001063 }
1064 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 if (element_resize(self, 0)) {
1066 return NULL;
1067 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001068 }
1069
Eli Bendersky698bdb22013-01-10 06:01:06 -08001070 /* Stash attrib. */
1071 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001072 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001073 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001075 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076
1077 Py_RETURN_NONE;
1078}
1079
1080/* __setstate__ for Element instance from the Python implementation.
1081 * 'state' should be the instance dict.
1082 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001083
Eli Bendersky698bdb22013-01-10 06:01:06 -08001084static PyObject *
1085element_setstate_from_Python(ElementObject *self, PyObject *state)
1086{
1087 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1088 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1089 PyObject *args;
1090 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001091 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092
Eli Bendersky698bdb22013-01-10 06:01:06 -08001093 tag = attrib = text = tail = children = NULL;
1094 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001095 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001096 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097
1098 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1099 &attrib, &text, &tail, &children))
1100 retval = element_setstate_from_attributes(self, tag, attrib, text,
1101 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103 retval = NULL;
1104
1105 Py_DECREF(args);
1106 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001107}
1108
Serhiy Storchakacb985562015-05-04 15:32:48 +03001109/*[clinic input]
1110_elementtree.Element.__setstate__
1111
1112 state: object
1113 /
1114
1115[clinic start generated code]*/
1116
Eli Bendersky698bdb22013-01-10 06:01:06 -08001117static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001118_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1119/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001120{
1121 if (!PyDict_CheckExact(state)) {
1122 PyErr_Format(PyExc_TypeError,
1123 "Don't know how to unpickle \"%.200R\" as an Element",
1124 state);
1125 return NULL;
1126 }
1127 else
1128 return element_setstate_from_Python(self, state);
1129}
1130
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001131LOCAL(int)
1132checkpath(PyObject* tag)
1133{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001134 Py_ssize_t i;
1135 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136
1137 /* check if a tag contains an xpath character */
1138
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001139#define PATHCHAR(ch) \
1140 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1144 void *data = PyUnicode_DATA(tag);
1145 unsigned int kind = PyUnicode_KIND(tag);
1146 for (i = 0; i < len; i++) {
1147 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1148 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153 return 1;
1154 }
1155 return 0;
1156 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001157 if (PyBytes_Check(tag)) {
1158 char *p = PyBytes_AS_STRING(tag);
1159 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 if (p[i] == '{')
1161 check = 0;
1162 else if (p[i] == '}')
1163 check = 1;
1164 else if (check && PATHCHAR(p[i]))
1165 return 1;
1166 }
1167 return 0;
1168 }
1169
1170 return 1; /* unknown type; might be path expression */
1171}
1172
Serhiy Storchakacb985562015-05-04 15:32:48 +03001173/*[clinic input]
1174_elementtree.Element.extend
1175
1176 elements: object
1177 /
1178
1179[clinic start generated code]*/
1180
1181static PyObject *
1182_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1183/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001184{
1185 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001186 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001187
Serhiy Storchakacb985562015-05-04 15:32:48 +03001188 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001189 if (!seq) {
1190 PyErr_Format(
1191 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193 );
1194 return NULL;
1195 }
1196
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001197 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001198 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001199 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001200 if (element_add_subelement(self, element) < 0) {
1201 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001202 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001203 return NULL;
1204 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 }
1207
1208 Py_DECREF(seq);
1209
1210 Py_RETURN_NONE;
1211}
1212
Serhiy Storchakacb985562015-05-04 15:32:48 +03001213/*[clinic input]
1214_elementtree.Element.find
1215
1216 path: object
1217 namespaces: object = None
1218
1219[clinic start generated code]*/
1220
1221static PyObject *
1222_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1223 PyObject *namespaces)
1224/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001226 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001227 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001228
Serhiy Storchakacb985562015-05-04 15:32:48 +03001229 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001230 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001231 return _PyObject_CallMethodIdObjArgs(
1232 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001234 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235
1236 if (!self->extra)
1237 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001238
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 for (i = 0; i < self->extra->length; i++) {
1240 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001242 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001244 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_DECREF(item);
1248 if (rc < 0)
1249 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 }
1251
1252 Py_RETURN_NONE;
1253}
1254
Serhiy Storchakacb985562015-05-04 15:32:48 +03001255/*[clinic input]
1256_elementtree.Element.findtext
1257
1258 path: object
1259 default: object = None
1260 namespaces: object = None
1261
1262[clinic start generated code]*/
1263
1264static PyObject *
1265_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1266 PyObject *default_value,
1267 PyObject *namespaces)
1268/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001270 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001271 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001272 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001273
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001275 return _PyObject_CallMethodIdObjArgs(
1276 st->elementpath_obj, &PyId_findtext,
1277 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 );
1279
1280 if (!self->extra) {
1281 Py_INCREF(default_value);
1282 return default_value;
1283 }
1284
1285 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001286 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001287 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001288 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001289 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001290 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001292 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 if (text == Py_None) {
1294 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001295 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001296 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001297 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001298 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001299 return text;
1300 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001301 Py_DECREF(item);
1302 if (rc < 0)
1303 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304 }
1305
1306 Py_INCREF(default_value);
1307 return default_value;
1308}
1309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310/*[clinic input]
1311_elementtree.Element.findall
1312
1313 path: object
1314 namespaces: object = None
1315
1316[clinic start generated code]*/
1317
1318static PyObject *
1319_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1320 PyObject *namespaces)
1321/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001322{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001323 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001324 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001325 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001326
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001327 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001328 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001329 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001330 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001332 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001333
1334 out = PyList_New(0);
1335 if (!out)
1336 return NULL;
1337
1338 if (!self->extra)
1339 return out;
1340
1341 for (i = 0; i < self->extra->length; i++) {
1342 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001343 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001344 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001345 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001346 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001347 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1348 Py_DECREF(item);
1349 Py_DECREF(out);
1350 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001351 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001352 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001353 }
1354
1355 return out;
1356}
1357
Serhiy Storchakacb985562015-05-04 15:32:48 +03001358/*[clinic input]
1359_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001360
Serhiy Storchakacb985562015-05-04 15:32:48 +03001361 path: object
1362 namespaces: object = None
1363
1364[clinic start generated code]*/
1365
1366static PyObject *
1367_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1368 PyObject *namespaces)
1369/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1370{
1371 PyObject* tag = path;
1372 _Py_IDENTIFIER(iterfind);
1373 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001374
Victor Stinnerf5616342016-12-09 15:26:00 +01001375 return _PyObject_CallMethodIdObjArgs(
1376 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001377}
1378
Serhiy Storchakacb985562015-05-04 15:32:48 +03001379/*[clinic input]
1380_elementtree.Element.get
1381
1382 key: object
1383 default: object = None
1384
1385[clinic start generated code]*/
1386
1387static PyObject *
1388_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1389 PyObject *default_value)
1390/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001391{
1392 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001393
1394 if (!self->extra || self->extra->attrib == Py_None)
1395 value = default_value;
1396 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001397 value = PyDict_GetItemWithError(self->extra->attrib, key);
1398 if (!value) {
1399 if (PyErr_Occurred()) {
1400 return NULL;
1401 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001403 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404 }
1405
1406 Py_INCREF(value);
1407 return value;
1408}
1409
Serhiy Storchakacb985562015-05-04 15:32:48 +03001410/*[clinic input]
1411_elementtree.Element.getchildren
1412
1413[clinic start generated code]*/
1414
1415static PyObject *
1416_elementtree_Element_getchildren_impl(ElementObject *self)
1417/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001418{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001419 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001420 PyObject* list;
1421
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001422 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1423 "This method will be removed in future versions. "
1424 "Use 'list(elem)' or iteration over elem instead.",
1425 1) < 0) {
1426 return NULL;
1427 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001428
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429 if (!self->extra)
1430 return PyList_New(0);
1431
1432 list = PyList_New(self->extra->length);
1433 if (!list)
1434 return NULL;
1435
1436 for (i = 0; i < self->extra->length; i++) {
1437 PyObject* item = self->extra->children[i];
1438 Py_INCREF(item);
1439 PyList_SET_ITEM(list, i, item);
1440 }
1441
1442 return list;
1443}
1444
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001445
Eli Bendersky64d11e62012-06-15 07:42:50 +03001446static PyObject *
1447create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1448
1449
Serhiy Storchakacb985562015-05-04 15:32:48 +03001450/*[clinic input]
1451_elementtree.Element.iter
1452
1453 tag: object = None
1454
1455[clinic start generated code]*/
1456
Eli Bendersky64d11e62012-06-15 07:42:50 +03001457static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001458_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1459/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001460{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001461 if (PyUnicode_Check(tag)) {
1462 if (PyUnicode_READY(tag) < 0)
1463 return NULL;
1464 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1465 tag = Py_None;
1466 }
1467 else if (PyBytes_Check(tag)) {
1468 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1469 tag = Py_None;
1470 }
1471
Eli Bendersky64d11e62012-06-15 07:42:50 +03001472 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001473}
1474
1475
Serhiy Storchakacb985562015-05-04 15:32:48 +03001476/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001477_elementtree.Element.getiterator
1478
1479 tag: object = None
1480
1481[clinic start generated code]*/
1482
1483static PyObject *
1484_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1485/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1486{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001487 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001488 "This method will be removed in future versions. "
1489 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1490 1) < 0) {
1491 return NULL;
1492 }
1493 return _elementtree_Element_iter_impl(self, tag);
1494}
1495
1496
1497/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001498_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001499
Serhiy Storchakacb985562015-05-04 15:32:48 +03001500[clinic start generated code]*/
1501
1502static PyObject *
1503_elementtree_Element_itertext_impl(ElementObject *self)
1504/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1505{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001506 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507}
1508
Eli Bendersky64d11e62012-06-15 07:42:50 +03001509
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001511element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001512{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001513 ElementObject* self = (ElementObject*) self_;
1514
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 if (!self->extra || index < 0 || index >= self->extra->length) {
1516 PyErr_SetString(
1517 PyExc_IndexError,
1518 "child index out of range"
1519 );
1520 return NULL;
1521 }
1522
1523 Py_INCREF(self->extra->children[index]);
1524 return self->extra->children[index];
1525}
1526
Serhiy Storchakacb985562015-05-04 15:32:48 +03001527/*[clinic input]
1528_elementtree.Element.insert
1529
1530 index: Py_ssize_t
1531 subelement: object(subclass_of='&Element_Type')
1532 /
1533
1534[clinic start generated code]*/
1535
1536static PyObject *
1537_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1538 PyObject *subelement)
1539/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001541 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542
Victor Stinner5f0af232013-07-11 23:01:36 +02001543 if (!self->extra) {
1544 if (create_extra(self, NULL) < 0)
1545 return NULL;
1546 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001548 if (index < 0) {
1549 index += self->extra->length;
1550 if (index < 0)
1551 index = 0;
1552 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001553 if (index > self->extra->length)
1554 index = self->extra->length;
1555
1556 if (element_resize(self, 1) < 0)
1557 return NULL;
1558
1559 for (i = self->extra->length; i > index; i--)
1560 self->extra->children[i] = self->extra->children[i-1];
1561
Serhiy Storchakacb985562015-05-04 15:32:48 +03001562 Py_INCREF(subelement);
1563 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564
1565 self->extra->length++;
1566
1567 Py_RETURN_NONE;
1568}
1569
Serhiy Storchakacb985562015-05-04 15:32:48 +03001570/*[clinic input]
1571_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572
Serhiy Storchakacb985562015-05-04 15:32:48 +03001573[clinic start generated code]*/
1574
1575static PyObject *
1576_elementtree_Element_items_impl(ElementObject *self)
1577/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1578{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579 if (!self->extra || self->extra->attrib == Py_None)
1580 return PyList_New(0);
1581
1582 return PyDict_Items(self->extra->attrib);
1583}
1584
Serhiy Storchakacb985562015-05-04 15:32:48 +03001585/*[clinic input]
1586_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587
Serhiy Storchakacb985562015-05-04 15:32:48 +03001588[clinic start generated code]*/
1589
1590static PyObject *
1591_elementtree_Element_keys_impl(ElementObject *self)
1592/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1593{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594 if (!self->extra || self->extra->attrib == Py_None)
1595 return PyList_New(0);
1596
1597 return PyDict_Keys(self->extra->attrib);
1598}
1599
Martin v. Löwis18e16552006-02-15 17:27:45 +00001600static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601element_length(ElementObject* self)
1602{
1603 if (!self->extra)
1604 return 0;
1605
1606 return self->extra->length;
1607}
1608
Serhiy Storchakacb985562015-05-04 15:32:48 +03001609/*[clinic input]
1610_elementtree.Element.makeelement
1611
1612 tag: object
1613 attrib: object
1614 /
1615
1616[clinic start generated code]*/
1617
1618static PyObject *
1619_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1620 PyObject *attrib)
1621/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622{
1623 PyObject* elem;
1624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625 attrib = PyDict_Copy(attrib);
1626 if (!attrib)
1627 return NULL;
1628
Eli Bendersky092af1f2012-03-04 07:14:03 +02001629 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630
1631 Py_DECREF(attrib);
1632
1633 return elem;
1634}
1635
Serhiy Storchakacb985562015-05-04 15:32:48 +03001636/*[clinic input]
1637_elementtree.Element.remove
1638
1639 subelement: object(subclass_of='&Element_Type')
1640 /
1641
1642[clinic start generated code]*/
1643
1644static PyObject *
1645_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1646/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001647{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001648 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001649 int rc;
1650 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652 if (!self->extra) {
1653 /* element has no children, so raise exception */
1654 PyErr_SetString(
1655 PyExc_ValueError,
1656 "list.remove(x): x not in list"
1657 );
1658 return NULL;
1659 }
1660
1661 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001662 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001663 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001664 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001665 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001667 if (rc < 0)
1668 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669 }
1670
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001671 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001672 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673 PyErr_SetString(
1674 PyExc_ValueError,
1675 "list.remove(x): x not in list"
1676 );
1677 return NULL;
1678 }
1679
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001680 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001681
1682 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001683 for (; i < self->extra->length; i++)
1684 self->extra->children[i] = self->extra->children[i+1];
1685
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001686 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001687 Py_RETURN_NONE;
1688}
1689
1690static PyObject*
1691element_repr(ElementObject* self)
1692{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001693 int status;
1694
1695 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001696 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001697
1698 status = Py_ReprEnter((PyObject *)self);
1699 if (status == 0) {
1700 PyObject *res;
1701 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1702 Py_ReprLeave((PyObject *)self);
1703 return res;
1704 }
1705 if (status > 0)
1706 PyErr_Format(PyExc_RuntimeError,
1707 "reentrant call inside %s.__repr__",
1708 Py_TYPE(self)->tp_name);
1709 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710}
1711
Serhiy Storchakacb985562015-05-04 15:32:48 +03001712/*[clinic input]
1713_elementtree.Element.set
1714
1715 key: object
1716 value: object
1717 /
1718
1719[clinic start generated code]*/
1720
1721static PyObject *
1722_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1723 PyObject *value)
1724/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001725{
1726 PyObject* attrib;
1727
Victor Stinner5f0af232013-07-11 23:01:36 +02001728 if (!self->extra) {
1729 if (create_extra(self, NULL) < 0)
1730 return NULL;
1731 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001732
1733 attrib = element_get_attrib(self);
1734 if (!attrib)
1735 return NULL;
1736
1737 if (PyDict_SetItem(attrib, key, value) < 0)
1738 return NULL;
1739
1740 Py_RETURN_NONE;
1741}
1742
1743static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001744element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001745{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001746 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001747 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001748 PyObject* old;
1749
1750 if (!self->extra || index < 0 || index >= self->extra->length) {
1751 PyErr_SetString(
1752 PyExc_IndexError,
1753 "child assignment index out of range");
1754 return -1;
1755 }
1756
1757 old = self->extra->children[index];
1758
1759 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001760 if (!Element_Check(item)) {
1761 raise_type_error(item);
1762 return -1;
1763 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764 Py_INCREF(item);
1765 self->extra->children[index] = item;
1766 } else {
1767 self->extra->length--;
1768 for (i = index; i < self->extra->length; i++)
1769 self->extra->children[i] = self->extra->children[i+1];
1770 }
1771
1772 Py_DECREF(old);
1773
1774 return 0;
1775}
1776
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777static PyObject*
1778element_subscr(PyObject* self_, PyObject* item)
1779{
1780 ElementObject* self = (ElementObject*) self_;
1781
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 if (PyIndex_Check(item)) {
1783 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784
1785 if (i == -1 && PyErr_Occurred()) {
1786 return NULL;
1787 }
1788 if (i < 0 && self->extra)
1789 i += self->extra->length;
1790 return element_getitem(self_, i);
1791 }
1792 else if (PySlice_Check(item)) {
1793 Py_ssize_t start, stop, step, slicelen, cur, i;
1794 PyObject* list;
1795
1796 if (!self->extra)
1797 return PyList_New(0);
1798
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001799 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001800 return NULL;
1801 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001802 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1803 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001804
1805 if (slicelen <= 0)
1806 return PyList_New(0);
1807 else {
1808 list = PyList_New(slicelen);
1809 if (!list)
1810 return NULL;
1811
1812 for (cur = start, i = 0; i < slicelen;
1813 cur += step, i++) {
1814 PyObject* item = self->extra->children[cur];
1815 Py_INCREF(item);
1816 PyList_SET_ITEM(list, i, item);
1817 }
1818
1819 return list;
1820 }
1821 }
1822 else {
1823 PyErr_SetString(PyExc_TypeError,
1824 "element indices must be integers");
1825 return NULL;
1826 }
1827}
1828
1829static int
1830element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1831{
1832 ElementObject* self = (ElementObject*) self_;
1833
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001834 if (PyIndex_Check(item)) {
1835 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001836
1837 if (i == -1 && PyErr_Occurred()) {
1838 return -1;
1839 }
1840 if (i < 0 && self->extra)
1841 i += self->extra->length;
1842 return element_setitem(self_, i, value);
1843 }
1844 else if (PySlice_Check(item)) {
1845 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1846
1847 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001848 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849
Victor Stinner5f0af232013-07-11 23:01:36 +02001850 if (!self->extra) {
1851 if (create_extra(self, NULL) < 0)
1852 return -1;
1853 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001855 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001856 return -1;
1857 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001858 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1859 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001860
Eli Bendersky865756a2012-03-09 13:38:15 +02001861 if (value == NULL) {
1862 /* Delete slice */
1863 size_t cur;
1864 Py_ssize_t i;
1865
1866 if (slicelen <= 0)
1867 return 0;
1868
1869 /* Since we're deleting, the direction of the range doesn't matter,
1870 * so for simplicity make it always ascending.
1871 */
1872 if (step < 0) {
1873 stop = start + 1;
1874 start = stop + step * (slicelen - 1) - 1;
1875 step = -step;
1876 }
1877
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001878 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001879
1880 /* recycle is a list that will contain all the children
1881 * scheduled for removal.
1882 */
1883 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001884 return -1;
1885 }
1886
1887 /* This loop walks over all the children that have to be deleted,
1888 * with cur pointing at them. num_moved is the amount of children
1889 * until the next deleted child that have to be "shifted down" to
1890 * occupy the deleted's places.
1891 * Note that in the ith iteration, shifting is done i+i places down
1892 * because i children were already removed.
1893 */
1894 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1895 /* Compute how many children have to be moved, clipping at the
1896 * list end.
1897 */
1898 Py_ssize_t num_moved = step - 1;
1899 if (cur + step >= (size_t)self->extra->length) {
1900 num_moved = self->extra->length - cur - 1;
1901 }
1902
1903 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1904
1905 memmove(
1906 self->extra->children + cur - i,
1907 self->extra->children + cur + 1,
1908 num_moved * sizeof(PyObject *));
1909 }
1910
1911 /* Leftover "tail" after the last removed child */
1912 cur = start + (size_t)slicelen * step;
1913 if (cur < (size_t)self->extra->length) {
1914 memmove(
1915 self->extra->children + cur - slicelen,
1916 self->extra->children + cur,
1917 (self->extra->length - cur) * sizeof(PyObject *));
1918 }
1919
1920 self->extra->length -= slicelen;
1921
1922 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001923 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001924 return 0;
1925 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001926
1927 /* A new slice is actually being assigned */
1928 seq = PySequence_Fast(value, "");
1929 if (!seq) {
1930 PyErr_Format(
1931 PyExc_TypeError,
1932 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1933 );
1934 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001935 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001936 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001937
1938 if (step != 1 && newlen != slicelen)
1939 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001940 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001941 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001942 "attempt to assign sequence of size %zd "
1943 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944 newlen, slicelen
1945 );
1946 return -1;
1947 }
1948
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001949 /* Resize before creating the recycle bin, to prevent refleaks. */
1950 if (newlen > slicelen) {
1951 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001952 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001953 return -1;
1954 }
1955 }
1956
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001957 for (i = 0; i < newlen; i++) {
1958 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1959 if (!Element_Check(element)) {
1960 raise_type_error(element);
1961 Py_DECREF(seq);
1962 return -1;
1963 }
1964 }
1965
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001966 if (slicelen > 0) {
1967 /* to avoid recursive calls to this method (via decref), move
1968 old items to the recycle bin here, and get rid of them when
1969 we're done modifying the element */
1970 recycle = PyList_New(slicelen);
1971 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001972 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001973 return -1;
1974 }
1975 for (cur = start, i = 0; i < slicelen;
1976 cur += step, i++)
1977 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1978 }
1979
1980 if (newlen < slicelen) {
1981 /* delete slice */
1982 for (i = stop; i < self->extra->length; i++)
1983 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1984 } else if (newlen > slicelen) {
1985 /* insert slice */
1986 for (i = self->extra->length-1; i >= stop; i--)
1987 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1988 }
1989
1990 /* replace the slice */
1991 for (cur = start, i = 0; i < newlen;
1992 cur += step, i++) {
1993 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1994 Py_INCREF(element);
1995 self->extra->children[cur] = element;
1996 }
1997
1998 self->extra->length += newlen - slicelen;
1999
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002000 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002001
2002 /* discard the recycle bin, and everything in it */
2003 Py_XDECREF(recycle);
2004
2005 return 0;
2006 }
2007 else {
2008 PyErr_SetString(PyExc_TypeError,
2009 "element indices must be integers");
2010 return -1;
2011 }
2012}
2013
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002014static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002015element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002017 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002018 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019 return res;
2020}
2021
Serhiy Storchakadde08152015-11-25 15:28:13 +02002022static PyObject*
2023element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002024{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002025 PyObject *res = element_get_text(self);
2026 Py_XINCREF(res);
2027 return res;
2028}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002029
Serhiy Storchakadde08152015-11-25 15:28:13 +02002030static PyObject*
2031element_tail_getter(ElementObject *self, void *closure)
2032{
2033 PyObject *res = element_get_tail(self);
2034 Py_XINCREF(res);
2035 return res;
2036}
2037
2038static PyObject*
2039element_attrib_getter(ElementObject *self, void *closure)
2040{
2041 PyObject *res;
2042 if (!self->extra) {
2043 if (create_extra(self, NULL) < 0)
2044 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002045 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002046 res = element_get_attrib(self);
2047 Py_XINCREF(res);
2048 return res;
2049}
Victor Stinner4d463432013-07-11 23:05:03 +02002050
Serhiy Storchakadde08152015-11-25 15:28:13 +02002051/* macro for setter validation */
2052#define _VALIDATE_ATTR_VALUE(V) \
2053 if ((V) == NULL) { \
2054 PyErr_SetString( \
2055 PyExc_AttributeError, \
2056 "can't delete element attribute"); \
2057 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002058 }
2059
Serhiy Storchakadde08152015-11-25 15:28:13 +02002060static int
2061element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2062{
2063 _VALIDATE_ATTR_VALUE(value);
2064 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002065 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002066 return 0;
2067}
2068
2069static int
2070element_text_setter(ElementObject *self, PyObject *value, void *closure)
2071{
2072 _VALIDATE_ATTR_VALUE(value);
2073 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002074 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002075 return 0;
2076}
2077
2078static int
2079element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2080{
2081 _VALIDATE_ATTR_VALUE(value);
2082 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002083 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002084 return 0;
2085}
2086
2087static int
2088element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2089{
2090 _VALIDATE_ATTR_VALUE(value);
2091 if (!self->extra) {
2092 if (create_extra(self, NULL) < 0)
2093 return -1;
2094 }
2095 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002096 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002097 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002098}
2099
2100static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002101 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002102 0, /* sq_concat */
2103 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002104 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002105 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002106 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002107 0,
2108};
2109
Eli Bendersky64d11e62012-06-15 07:42:50 +03002110/******************************* Element iterator ****************************/
2111
2112/* ElementIterObject represents the iteration state over an XML element in
2113 * pre-order traversal. To keep track of which sub-element should be returned
2114 * next, a stack of parents is maintained. This is a standard stack-based
2115 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002116 * The stack is managed using a continuous array.
2117 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118 * the current one is exhausted, and the next child to examine in that parent.
2119 */
2120typedef struct ParentLocator_t {
2121 ElementObject *parent;
2122 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123} ParentLocator;
2124
2125typedef struct {
2126 PyObject_HEAD
2127 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002128 Py_ssize_t parent_stack_used;
2129 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002130 ElementObject *root_element;
2131 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132 int gettext;
2133} ElementIterObject;
2134
2135
2136static void
2137elementiter_dealloc(ElementIterObject *it)
2138{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002139 Py_ssize_t i = it->parent_stack_used;
2140 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002141 /* bpo-31095: UnTrack is needed before calling any callbacks */
2142 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002143 while (i--)
2144 Py_XDECREF(it->parent_stack[i].parent);
2145 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002146
2147 Py_XDECREF(it->sought_tag);
2148 Py_XDECREF(it->root_element);
2149
Eli Bendersky64d11e62012-06-15 07:42:50 +03002150 PyObject_GC_Del(it);
2151}
2152
2153static int
2154elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2155{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002156 Py_ssize_t i = it->parent_stack_used;
2157 while (i--)
2158 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002159
2160 Py_VISIT(it->root_element);
2161 Py_VISIT(it->sought_tag);
2162 return 0;
2163}
2164
2165/* Helper function for elementiter_next. Add a new parent to the parent stack.
2166 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002167static int
2168parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002169{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002170 ParentLocator *item;
2171
2172 if (it->parent_stack_used >= it->parent_stack_size) {
2173 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2174 ParentLocator *parent_stack = it->parent_stack;
2175 PyMem_Resize(parent_stack, ParentLocator, new_size);
2176 if (parent_stack == NULL)
2177 return -1;
2178 it->parent_stack = parent_stack;
2179 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002180 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002181 item = it->parent_stack + it->parent_stack_used++;
2182 Py_INCREF(parent);
2183 item->parent = parent;
2184 item->child_index = 0;
2185 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186}
2187
2188static PyObject *
2189elementiter_next(ElementIterObject *it)
2190{
2191 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002192 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002193 * A short note on gettext: this function serves both the iter() and
2194 * itertext() methods to avoid code duplication. However, there are a few
2195 * small differences in the way these iterations work. Namely:
2196 * - itertext() only yields text from nodes that have it, and continues
2197 * iterating when a node doesn't have text (so it doesn't return any
2198 * node like iter())
2199 * - itertext() also has to handle tail, after finishing with all the
2200 * children of a node.
2201 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002202 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002203 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002204 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002205
2206 while (1) {
2207 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002208 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002209 * iterator is exhausted.
2210 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002211 if (!it->parent_stack_used) {
2212 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002213 PyErr_SetNone(PyExc_StopIteration);
2214 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002215 }
2216
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002217 elem = it->root_element; /* steals a reference */
2218 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002219 }
2220 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002221 /* See if there are children left to traverse in the current parent. If
2222 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002223 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002224 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2225 Py_ssize_t child_index = item->child_index;
2226 ElementObjectExtra *extra;
2227 elem = item->parent;
2228 extra = elem->extra;
2229 if (!extra || child_index >= extra->length) {
2230 it->parent_stack_used--;
2231 /* Note that extra condition on it->parent_stack_used here;
2232 * this is because itertext() is supposed to only return *inner*
2233 * text, not text following the element it began iteration with.
2234 */
2235 if (it->gettext && it->parent_stack_used) {
2236 text = element_get_tail(elem);
2237 goto gettext;
2238 }
2239 Py_DECREF(elem);
2240 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002241 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002242
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002243 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002244 elem = (ElementObject *)extra->children[child_index];
2245 item->child_index++;
2246 Py_INCREF(elem);
2247 }
2248
2249 if (parent_stack_push_new(it, elem) < 0) {
2250 Py_DECREF(elem);
2251 PyErr_NoMemory();
2252 return NULL;
2253 }
2254 if (it->gettext) {
2255 text = element_get_text(elem);
2256 goto gettext;
2257 }
2258
2259 if (it->sought_tag == Py_None)
2260 return (PyObject *)elem;
2261
2262 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2263 if (rc > 0)
2264 return (PyObject *)elem;
2265
2266 Py_DECREF(elem);
2267 if (rc < 0)
2268 return NULL;
2269 continue;
2270
2271gettext:
2272 if (!text) {
2273 Py_DECREF(elem);
2274 return NULL;
2275 }
2276 if (text == Py_None) {
2277 Py_DECREF(elem);
2278 }
2279 else {
2280 Py_INCREF(text);
2281 Py_DECREF(elem);
2282 rc = PyObject_IsTrue(text);
2283 if (rc > 0)
2284 return text;
2285 Py_DECREF(text);
2286 if (rc < 0)
2287 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002288 }
2289 }
2290
2291 return NULL;
2292}
2293
2294
2295static PyTypeObject ElementIter_Type = {
2296 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002297 /* Using the module's name since the pure-Python implementation does not
2298 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002299 "_elementtree._element_iterator", /* tp_name */
2300 sizeof(ElementIterObject), /* tp_basicsize */
2301 0, /* tp_itemsize */
2302 /* methods */
2303 (destructor)elementiter_dealloc, /* tp_dealloc */
2304 0, /* tp_print */
2305 0, /* tp_getattr */
2306 0, /* tp_setattr */
2307 0, /* tp_reserved */
2308 0, /* tp_repr */
2309 0, /* tp_as_number */
2310 0, /* tp_as_sequence */
2311 0, /* tp_as_mapping */
2312 0, /* tp_hash */
2313 0, /* tp_call */
2314 0, /* tp_str */
2315 0, /* tp_getattro */
2316 0, /* tp_setattro */
2317 0, /* tp_as_buffer */
2318 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2319 0, /* tp_doc */
2320 (traverseproc)elementiter_traverse, /* tp_traverse */
2321 0, /* tp_clear */
2322 0, /* tp_richcompare */
2323 0, /* tp_weaklistoffset */
2324 PyObject_SelfIter, /* tp_iter */
2325 (iternextfunc)elementiter_next, /* tp_iternext */
2326 0, /* tp_methods */
2327 0, /* tp_members */
2328 0, /* tp_getset */
2329 0, /* tp_base */
2330 0, /* tp_dict */
2331 0, /* tp_descr_get */
2332 0, /* tp_descr_set */
2333 0, /* tp_dictoffset */
2334 0, /* tp_init */
2335 0, /* tp_alloc */
2336 0, /* tp_new */
2337};
2338
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002339#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002340
2341static PyObject *
2342create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2343{
2344 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002345
2346 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2347 if (!it)
2348 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002349
Victor Stinner4d463432013-07-11 23:05:03 +02002350 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002351 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002352 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002353 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002354 it->root_element = self;
2355
Eli Bendersky64d11e62012-06-15 07:42:50 +03002356 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002357
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002358 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002359 if (it->parent_stack == NULL) {
2360 Py_DECREF(it);
2361 PyErr_NoMemory();
2362 return NULL;
2363 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002364 it->parent_stack_used = 0;
2365 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002366
Eli Bendersky64d11e62012-06-15 07:42:50 +03002367 return (PyObject *)it;
2368}
2369
2370
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371/* ==================================================================== */
2372/* the tree builder type */
2373
2374typedef struct {
2375 PyObject_HEAD
2376
Eli Bendersky58d548d2012-05-29 15:45:16 +03002377 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378
Antoine Pitrouee329312012-10-04 19:53:29 +02002379 PyObject *this; /* current node */
2380 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381
Eli Bendersky58d548d2012-05-29 15:45:16 +03002382 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383
Eli Bendersky58d548d2012-05-29 15:45:16 +03002384 PyObject *stack; /* element stack */
2385 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387 PyObject *element_factory;
2388
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002390 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2392 PyObject *end_event_obj;
2393 PyObject *start_ns_event_obj;
2394 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395} TreeBuilderObject;
2396
Christian Heimes90aa7642007-12-19 02:45:37 +00002397#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398
2399/* -------------------------------------------------------------------- */
2400/* constructor and destructor */
2401
Eli Bendersky58d548d2012-05-29 15:45:16 +03002402static PyObject *
2403treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002405 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2406 if (t != NULL) {
2407 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408
Eli Bendersky58d548d2012-05-29 15:45:16 +03002409 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002410 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002411 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002412 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002413
Eli Bendersky58d548d2012-05-29 15:45:16 +03002414 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002415 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002416 t->stack = PyList_New(20);
2417 if (!t->stack) {
2418 Py_DECREF(t->this);
2419 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002420 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002421 return NULL;
2422 }
2423 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002425 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002426 t->start_event_obj = t->end_event_obj = NULL;
2427 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2428 }
2429 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430}
2431
Serhiy Storchakacb985562015-05-04 15:32:48 +03002432/*[clinic input]
2433_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002434
Serhiy Storchakacb985562015-05-04 15:32:48 +03002435 element_factory: object = NULL
2436
2437[clinic start generated code]*/
2438
2439static int
2440_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2441 PyObject *element_factory)
2442/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2443{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002444 if (element_factory) {
2445 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002446 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002447 }
2448
Eli Bendersky58d548d2012-05-29 15:45:16 +03002449 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450}
2451
Eli Bendersky48d358b2012-05-30 17:57:50 +03002452static int
2453treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2454{
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002455 Py_VISIT(self->end_ns_event_obj);
2456 Py_VISIT(self->start_ns_event_obj);
2457 Py_VISIT(self->end_event_obj);
2458 Py_VISIT(self->start_event_obj);
2459 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002460 Py_VISIT(self->root);
2461 Py_VISIT(self->this);
2462 Py_VISIT(self->last);
2463 Py_VISIT(self->data);
2464 Py_VISIT(self->stack);
2465 Py_VISIT(self->element_factory);
2466 return 0;
2467}
2468
2469static int
2470treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002472 Py_CLEAR(self->end_ns_event_obj);
2473 Py_CLEAR(self->start_ns_event_obj);
2474 Py_CLEAR(self->end_event_obj);
2475 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002476 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002477 Py_CLEAR(self->stack);
2478 Py_CLEAR(self->data);
2479 Py_CLEAR(self->last);
2480 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002481 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002482 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002483 return 0;
2484}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485
Eli Bendersky48d358b2012-05-30 17:57:50 +03002486static void
2487treebuilder_dealloc(TreeBuilderObject *self)
2488{
2489 PyObject_GC_UnTrack(self);
2490 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002491 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492}
2493
2494/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002495/* helpers for handling of arbitrary element-like objects */
2496
2497static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002498treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002499 PyObject **dest, _Py_Identifier *name)
2500{
2501 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002502 PyObject *tmp = JOIN_OBJ(*dest);
2503 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2504 *data = NULL;
2505 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002506 return 0;
2507 }
2508 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002509 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002510 int r;
2511 if (joined == NULL)
2512 return -1;
2513 r = _PyObject_SetAttrId(element, name, joined);
2514 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002515 if (r < 0)
2516 return -1;
2517 Py_CLEAR(*data);
2518 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002519 }
2520}
2521
Serhiy Storchaka576def02017-03-30 09:47:31 +03002522LOCAL(int)
2523treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002524{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002525 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002526
Serhiy Storchaka576def02017-03-30 09:47:31 +03002527 if (!self->data) {
2528 return 0;
2529 }
2530
2531 if (self->this == element) {
2532 _Py_IDENTIFIER(text);
2533 return treebuilder_set_element_text_or_tail(
2534 element, &self->data,
2535 &((ElementObject *) element)->text, &PyId_text);
2536 }
2537 else {
2538 _Py_IDENTIFIER(tail);
2539 return treebuilder_set_element_text_or_tail(
2540 element, &self->data,
2541 &((ElementObject *) element)->tail, &PyId_tail);
2542 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002543}
2544
2545static int
2546treebuilder_add_subelement(PyObject *element, PyObject *child)
2547{
2548 _Py_IDENTIFIER(append);
2549 if (Element_CheckExact(element)) {
2550 ElementObject *elem = (ElementObject *) element;
2551 return element_add_subelement(elem, child);
2552 }
2553 else {
2554 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002555 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002556 if (res == NULL)
2557 return -1;
2558 Py_DECREF(res);
2559 return 0;
2560 }
2561}
2562
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002563LOCAL(int)
2564treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2565 PyObject *node)
2566{
2567 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002568 PyObject *res;
2569 PyObject *event = PyTuple_Pack(2, action, node);
2570 if (event == NULL)
2571 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002572 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002573 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002574 if (res == NULL)
2575 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002576 Py_DECREF(res);
2577 }
2578 return 0;
2579}
2580
Antoine Pitrouee329312012-10-04 19:53:29 +02002581/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582/* handlers */
2583
2584LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002585treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2586 PyObject* attrib)
2587{
2588 PyObject* node;
2589 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002590 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591
Serhiy Storchaka576def02017-03-30 09:47:31 +03002592 if (treebuilder_flush_data(self) < 0) {
2593 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 }
2595
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002596 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002597 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002598 } else if (attrib == Py_None) {
2599 attrib = PyDict_New();
2600 if (!attrib)
2601 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002602 node = PyObject_CallFunctionObjArgs(self->element_factory,
2603 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002604 Py_DECREF(attrib);
2605 }
2606 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002607 node = PyObject_CallFunctionObjArgs(self->element_factory,
2608 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002609 }
2610 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002612 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613
Antoine Pitrouee329312012-10-04 19:53:29 +02002614 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615
2616 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002617 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002618 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002619 } else {
2620 if (self->root) {
2621 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002622 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623 "multiple elements on top level"
2624 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002625 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626 }
2627 Py_INCREF(node);
2628 self->root = node;
2629 }
2630
2631 if (self->index < PyList_GET_SIZE(self->stack)) {
2632 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002633 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634 Py_INCREF(this);
2635 } else {
2636 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002637 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638 }
2639 self->index++;
2640
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002642 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002644 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002646 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2647 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648
2649 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002650
2651 error:
2652 Py_DECREF(node);
2653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654}
2655
2656LOCAL(PyObject*)
2657treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2658{
2659 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002660 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002661 /* ignore calls to data before the first call to start */
2662 Py_RETURN_NONE;
2663 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664 /* store the first item as is */
2665 Py_INCREF(data); self->data = data;
2666 } else {
2667 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002668 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2669 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002670 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671 /* expat often generates single character data sections; handle
2672 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002673 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2674 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002676 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677 } else if (PyList_CheckExact(self->data)) {
2678 if (PyList_Append(self->data, data) < 0)
2679 return NULL;
2680 } else {
2681 PyObject* list = PyList_New(2);
2682 if (!list)
2683 return NULL;
2684 PyList_SET_ITEM(list, 0, self->data);
2685 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2686 self->data = list;
2687 }
2688 }
2689
2690 Py_RETURN_NONE;
2691}
2692
2693LOCAL(PyObject*)
2694treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2695{
2696 PyObject* item;
2697
Serhiy Storchaka576def02017-03-30 09:47:31 +03002698 if (treebuilder_flush_data(self) < 0) {
2699 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700 }
2701
2702 if (self->index == 0) {
2703 PyErr_SetString(
2704 PyExc_IndexError,
2705 "pop from empty stack"
2706 );
2707 return NULL;
2708 }
2709
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002710 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002711 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002712 self->index--;
2713 self->this = PyList_GET_ITEM(self->stack, self->index);
2714 Py_INCREF(self->this);
2715 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002717 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2718 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719
2720 Py_INCREF(self->last);
2721 return (PyObject*) self->last;
2722}
2723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724/* -------------------------------------------------------------------- */
2725/* methods (in alphabetical order) */
2726
Serhiy Storchakacb985562015-05-04 15:32:48 +03002727/*[clinic input]
2728_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729
Serhiy Storchakacb985562015-05-04 15:32:48 +03002730 data: object
2731 /
2732
2733[clinic start generated code]*/
2734
2735static PyObject *
2736_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2737/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2738{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739 return treebuilder_handle_data(self, data);
2740}
2741
Serhiy Storchakacb985562015-05-04 15:32:48 +03002742/*[clinic input]
2743_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744
Serhiy Storchakacb985562015-05-04 15:32:48 +03002745 tag: object
2746 /
2747
2748[clinic start generated code]*/
2749
2750static PyObject *
2751_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2752/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2753{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754 return treebuilder_handle_end(self, tag);
2755}
2756
2757LOCAL(PyObject*)
2758treebuilder_done(TreeBuilderObject* self)
2759{
2760 PyObject* res;
2761
2762 /* FIXME: check stack size? */
2763
2764 if (self->root)
2765 res = self->root;
2766 else
2767 res = Py_None;
2768
2769 Py_INCREF(res);
2770 return res;
2771}
2772
Serhiy Storchakacb985562015-05-04 15:32:48 +03002773/*[clinic input]
2774_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775
Serhiy Storchakacb985562015-05-04 15:32:48 +03002776[clinic start generated code]*/
2777
2778static PyObject *
2779_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2780/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2781{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782 return treebuilder_done(self);
2783}
2784
Serhiy Storchakacb985562015-05-04 15:32:48 +03002785/*[clinic input]
2786_elementtree.TreeBuilder.start
2787
2788 tag: object
2789 attrs: object = None
2790 /
2791
2792[clinic start generated code]*/
2793
2794static PyObject *
2795_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2796 PyObject *attrs)
2797/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002799 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800}
2801
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802/* ==================================================================== */
2803/* the expat interface */
2804
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002807
2808/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2809 * cached globally without being in per-module state.
2810 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002811static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813
Eli Bendersky52467b12012-06-01 07:13:08 +03002814static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2815 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2816
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817typedef struct {
2818 PyObject_HEAD
2819
2820 XML_Parser parser;
2821
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002822 PyObject *target;
2823 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002825 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002827 PyObject *handle_start;
2828 PyObject *handle_data;
2829 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002831 PyObject *handle_comment;
2832 PyObject *handle_pi;
2833 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002835 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002836
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837} XMLParserObject;
2838
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002839/* helpers */
2840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841LOCAL(PyObject*)
2842makeuniversal(XMLParserObject* self, const char* string)
2843{
2844 /* convert a UTF-8 tag/attribute name from the expat parser
2845 to a universal name string */
2846
Antoine Pitrouc1948842012-10-01 23:40:37 +02002847 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848 PyObject* key;
2849 PyObject* value;
2850
2851 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002852 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002853 if (!key)
2854 return NULL;
2855
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002856 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002857
2858 if (value) {
2859 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002860 }
2861 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002862 /* new name. convert to universal name, and decode as
2863 necessary */
2864
2865 PyObject* tag;
2866 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002867 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002868
2869 /* look for namespace separator */
2870 for (i = 0; i < size; i++)
2871 if (string[i] == '}')
2872 break;
2873 if (i != size) {
2874 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002875 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002876 if (tag == NULL) {
2877 Py_DECREF(key);
2878 return NULL;
2879 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002880 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002881 p[0] = '{';
2882 memcpy(p+1, string, size);
2883 size++;
2884 } else {
2885 /* plain name; use key as tag */
2886 Py_INCREF(key);
2887 tag = key;
2888 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002889
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002891 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002892 value = PyUnicode_DecodeUTF8(p, size, "strict");
2893 Py_DECREF(tag);
2894 if (!value) {
2895 Py_DECREF(key);
2896 return NULL;
2897 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898
2899 /* add to names dictionary */
2900 if (PyDict_SetItem(self->names, key, value) < 0) {
2901 Py_DECREF(key);
2902 Py_DECREF(value);
2903 return NULL;
2904 }
2905 }
2906
2907 Py_DECREF(key);
2908 return value;
2909}
2910
Eli Bendersky5b77d812012-03-16 08:20:05 +02002911/* Set the ParseError exception with the given parameters.
2912 * If message is not NULL, it's used as the error string. Otherwise, the
2913 * message string is the default for the given error_code.
2914*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002915static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002916expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2917 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002918{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002919 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002920 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002921
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002922 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002923 message ? message : EXPAT(ErrorString)(error_code),
2924 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002925 if (errmsg == NULL)
2926 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002927
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002928 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002929 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002930 if (!error)
2931 return;
2932
Eli Bendersky5b77d812012-03-16 08:20:05 +02002933 /* Add code and position attributes */
2934 code = PyLong_FromLong((long)error_code);
2935 if (!code) {
2936 Py_DECREF(error);
2937 return;
2938 }
2939 if (PyObject_SetAttrString(error, "code", code) == -1) {
2940 Py_DECREF(error);
2941 Py_DECREF(code);
2942 return;
2943 }
2944 Py_DECREF(code);
2945
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002946 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002947 if (!position) {
2948 Py_DECREF(error);
2949 return;
2950 }
2951 if (PyObject_SetAttrString(error, "position", position) == -1) {
2952 Py_DECREF(error);
2953 Py_DECREF(position);
2954 return;
2955 }
2956 Py_DECREF(position);
2957
Eli Bendersky532d03e2013-08-10 08:00:39 -07002958 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002959 Py_DECREF(error);
2960}
2961
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962/* -------------------------------------------------------------------- */
2963/* handlers */
2964
2965static void
2966expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2967 int data_len)
2968{
2969 PyObject* key;
2970 PyObject* value;
2971 PyObject* res;
2972
2973 if (data_len < 2 || data_in[0] != '&')
2974 return;
2975
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002976 if (PyErr_Occurred())
2977 return;
2978
Neal Norwitz0269b912007-08-08 06:56:02 +00002979 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980 if (!key)
2981 return;
2982
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002983 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984
2985 if (value) {
2986 if (TreeBuilder_CheckExact(self->target))
2987 res = treebuilder_handle_data(
2988 (TreeBuilderObject*) self->target, value
2989 );
2990 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002991 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992 else
2993 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002995 } else if (!PyErr_Occurred()) {
2996 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002997 char message[128] = "undefined entity ";
2998 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002999 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003000 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003001 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003002 EXPAT(GetErrorColumnNumber)(self->parser),
3003 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 );
3005 }
3006
3007 Py_DECREF(key);
3008}
3009
3010static void
3011expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3012 const XML_Char **attrib_in)
3013{
3014 PyObject* res;
3015 PyObject* tag;
3016 PyObject* attrib;
3017 int ok;
3018
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003019 if (PyErr_Occurred())
3020 return;
3021
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 /* tag name */
3023 tag = makeuniversal(self, tag_in);
3024 if (!tag)
3025 return; /* parser will look for errors */
3026
3027 /* attributes */
3028 if (attrib_in[0]) {
3029 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003030 if (!attrib) {
3031 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003032 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003033 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 while (attrib_in[0] && attrib_in[1]) {
3035 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003036 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 if (!key || !value) {
3038 Py_XDECREF(value);
3039 Py_XDECREF(key);
3040 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003041 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042 return;
3043 }
3044 ok = PyDict_SetItem(attrib, key, value);
3045 Py_DECREF(value);
3046 Py_DECREF(key);
3047 if (ok < 0) {
3048 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003049 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 return;
3051 }
3052 attrib_in += 2;
3053 }
3054 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003055 Py_INCREF(Py_None);
3056 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003057 }
3058
3059 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 /* shortcut */
3061 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3062 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003063 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003064 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003065 if (attrib == Py_None) {
3066 Py_DECREF(attrib);
3067 attrib = PyDict_New();
3068 if (!attrib) {
3069 Py_DECREF(tag);
3070 return;
3071 }
3072 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003073 res = PyObject_CallFunctionObjArgs(self->handle_start,
3074 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003075 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 res = NULL;
3077
3078 Py_DECREF(tag);
3079 Py_DECREF(attrib);
3080
3081 Py_XDECREF(res);
3082}
3083
3084static void
3085expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3086 int data_len)
3087{
3088 PyObject* data;
3089 PyObject* res;
3090
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003091 if (PyErr_Occurred())
3092 return;
3093
Neal Norwitz0269b912007-08-08 06:56:02 +00003094 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003095 if (!data)
3096 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097
3098 if (TreeBuilder_CheckExact(self->target))
3099 /* shortcut */
3100 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3101 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003102 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103 else
3104 res = NULL;
3105
3106 Py_DECREF(data);
3107
3108 Py_XDECREF(res);
3109}
3110
3111static void
3112expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3113{
3114 PyObject* tag;
3115 PyObject* res = NULL;
3116
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003117 if (PyErr_Occurred())
3118 return;
3119
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120 if (TreeBuilder_CheckExact(self->target))
3121 /* shortcut */
3122 /* the standard tree builder doesn't look at the end tag */
3123 res = treebuilder_handle_end(
3124 (TreeBuilderObject*) self->target, Py_None
3125 );
3126 else if (self->handle_end) {
3127 tag = makeuniversal(self, tag_in);
3128 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003129 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003130 Py_DECREF(tag);
3131 }
3132 }
3133
3134 Py_XDECREF(res);
3135}
3136
3137static void
3138expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3139 const XML_Char *uri)
3140{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003141 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3142 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003143
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003144 if (PyErr_Occurred())
3145 return;
3146
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003147 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003148 return;
3149
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003150 if (!uri)
3151 uri = "";
3152 if (!prefix)
3153 prefix = "";
3154
3155 parcel = Py_BuildValue("ss", prefix, uri);
3156 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003157 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003158 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3159 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160}
3161
3162static void
3163expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3164{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003165 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3166
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003167 if (PyErr_Occurred())
3168 return;
3169
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003170 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003171 return;
3172
3173 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174}
3175
3176static void
3177expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3178{
3179 PyObject* comment;
3180 PyObject* res;
3181
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003182 if (PyErr_Occurred())
3183 return;
3184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003186 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003187 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003188 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3189 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190 Py_XDECREF(res);
3191 Py_DECREF(comment);
3192 }
3193 }
3194}
3195
Eli Bendersky45839902013-01-13 05:14:47 -08003196static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003197expat_start_doctype_handler(XMLParserObject *self,
3198 const XML_Char *doctype_name,
3199 const XML_Char *sysid,
3200 const XML_Char *pubid,
3201 int has_internal_subset)
3202{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003203 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003204 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003205 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003206
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003207 if (PyErr_Occurred())
3208 return;
3209
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003210 doctype_name_obj = makeuniversal(self, doctype_name);
3211 if (!doctype_name_obj)
3212 return;
3213
3214 if (sysid) {
3215 sysid_obj = makeuniversal(self, sysid);
3216 if (!sysid_obj) {
3217 Py_DECREF(doctype_name_obj);
3218 return;
3219 }
3220 } else {
3221 Py_INCREF(Py_None);
3222 sysid_obj = Py_None;
3223 }
3224
3225 if (pubid) {
3226 pubid_obj = makeuniversal(self, pubid);
3227 if (!pubid_obj) {
3228 Py_DECREF(doctype_name_obj);
3229 Py_DECREF(sysid_obj);
3230 return;
3231 }
3232 } else {
3233 Py_INCREF(Py_None);
3234 pubid_obj = Py_None;
3235 }
3236
3237 /* If the target has a handler for doctype, call it. */
3238 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003239 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3240 doctype_name_obj, pubid_obj,
3241 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003242 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003243 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003244 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3245 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3246 "The doctype() method of XMLParser is ignored. "
3247 "Define doctype() method on the TreeBuilder target.",
3248 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003249 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003250 }
3251
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003252 Py_DECREF(doctype_name_obj);
3253 Py_DECREF(pubid_obj);
3254 Py_DECREF(sysid_obj);
3255}
3256
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257static void
3258expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3259 const XML_Char* data_in)
3260{
3261 PyObject* target;
3262 PyObject* data;
3263 PyObject* res;
3264
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003265 if (PyErr_Occurred())
3266 return;
3267
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003269 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3270 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003272 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3273 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 Py_XDECREF(res);
3275 Py_DECREF(data);
3276 Py_DECREF(target);
3277 } else {
3278 Py_XDECREF(data);
3279 Py_XDECREF(target);
3280 }
3281 }
3282}
3283
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
Eli Bendersky52467b12012-06-01 07:13:08 +03003286static PyObject *
3287xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288{
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3290 if (self) {
3291 self->parser = NULL;
3292 self->target = self->entity = self->names = NULL;
3293 self->handle_start = self->handle_data = self->handle_end = NULL;
3294 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003295 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003297 return (PyObject *)self;
3298}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299
scoderc8d8e152017-09-14 22:00:03 +02003300static int
3301ignore_attribute_error(PyObject *value)
3302{
3303 if (value == NULL) {
3304 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3305 return -1;
3306 }
3307 PyErr_Clear();
3308 }
3309 return 0;
3310}
3311
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312/*[clinic input]
3313_elementtree.XMLParser.__init__
3314
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003315 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003316 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003317 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318
3319[clinic start generated code]*/
3320
Eli Bendersky52467b12012-06-01 07:13:08 +03003321static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003322_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3323 const char *encoding)
3324/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003325{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->entity = PyDict_New();
3327 if (!self->entity)
3328 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329
Serhiy Storchakacb985562015-05-04 15:32:48 +03003330 self->names = PyDict_New();
3331 if (!self->names) {
3332 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003333 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003335
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3337 if (!self->parser) {
3338 Py_CLEAR(self->entity);
3339 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003341 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003343 /* expat < 2.1.0 has no XML_SetHashSalt() */
3344 if (EXPAT(SetHashSalt) != NULL) {
3345 EXPAT(SetHashSalt)(self->parser,
3346 (unsigned long)_Py_HashSecret.expat.hashsalt);
3347 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
Eli Bendersky52467b12012-06-01 07:13:08 +03003349 if (target) {
3350 Py_INCREF(target);
3351 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003352 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003354 Py_CLEAR(self->entity);
3355 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003356 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003358 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360
Serhiy Storchakacb985562015-05-04 15:32:48 +03003361 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003362 if (ignore_attribute_error(self->handle_start)) {
3363 return -1;
3364 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003366 if (ignore_attribute_error(self->handle_data)) {
3367 return -1;
3368 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003370 if (ignore_attribute_error(self->handle_end)) {
3371 return -1;
3372 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003373 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003374 if (ignore_attribute_error(self->handle_comment)) {
3375 return -1;
3376 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003378 if (ignore_attribute_error(self->handle_pi)) {
3379 return -1;
3380 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003381 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003382 if (ignore_attribute_error(self->handle_close)) {
3383 return -1;
3384 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003385 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003386 if (ignore_attribute_error(self->handle_doctype)) {
3387 return -1;
3388 }
Eli Bendersky45839902013-01-13 05:14:47 -08003389
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003393 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 (XML_StartElementHandler) expat_start_handler,
3395 (XML_EndElementHandler) expat_end_handler
3396 );
3397 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003398 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 (XML_DefaultHandler) expat_default_handler
3400 );
3401 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003402 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403 (XML_CharacterDataHandler) expat_data_handler
3404 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003405 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003407 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408 (XML_CommentHandler) expat_comment_handler
3409 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003410 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003412 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 (XML_ProcessingInstructionHandler) expat_pi_handler
3414 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003415 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003416 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003417 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3418 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003421 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003423
Eli Bendersky52467b12012-06-01 07:13:08 +03003424 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003425}
3426
Eli Bendersky52467b12012-06-01 07:13:08 +03003427static int
3428xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3429{
3430 Py_VISIT(self->handle_close);
3431 Py_VISIT(self->handle_pi);
3432 Py_VISIT(self->handle_comment);
3433 Py_VISIT(self->handle_end);
3434 Py_VISIT(self->handle_data);
3435 Py_VISIT(self->handle_start);
3436
3437 Py_VISIT(self->target);
3438 Py_VISIT(self->entity);
3439 Py_VISIT(self->names);
3440
3441 return 0;
3442}
3443
3444static int
3445xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446{
Victor Stinnere727d412017-09-18 05:29:37 -07003447 if (self->parser != NULL) {
3448 XML_Parser parser = self->parser;
3449 self->parser = NULL;
3450 EXPAT(ParserFree)(parser);
3451 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452
Antoine Pitrouc1948842012-10-01 23:40:37 +02003453 Py_CLEAR(self->handle_close);
3454 Py_CLEAR(self->handle_pi);
3455 Py_CLEAR(self->handle_comment);
3456 Py_CLEAR(self->handle_end);
3457 Py_CLEAR(self->handle_data);
3458 Py_CLEAR(self->handle_start);
3459 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460
Antoine Pitrouc1948842012-10-01 23:40:37 +02003461 Py_CLEAR(self->target);
3462 Py_CLEAR(self->entity);
3463 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464
Eli Bendersky52467b12012-06-01 07:13:08 +03003465 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003466}
3467
Eli Bendersky52467b12012-06-01 07:13:08 +03003468static void
3469xmlparser_dealloc(XMLParserObject* self)
3470{
3471 PyObject_GC_UnTrack(self);
3472 xmlparser_gc_clear(self);
3473 Py_TYPE(self)->tp_free((PyObject *)self);
3474}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475
3476LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003477expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478{
3479 int ok;
3480
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003481 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3483
3484 if (PyErr_Occurred())
3485 return NULL;
3486
3487 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003488 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003489 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003491 EXPAT(GetErrorColumnNumber)(self->parser),
3492 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003493 );
3494 return NULL;
3495 }
3496
3497 Py_RETURN_NONE;
3498}
3499
Serhiy Storchakacb985562015-05-04 15:32:48 +03003500/*[clinic input]
3501_elementtree.XMLParser.close
3502
3503[clinic start generated code]*/
3504
3505static PyObject *
3506_elementtree_XMLParser_close_impl(XMLParserObject *self)
3507/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508{
3509 /* end feeding data to parser */
3510
3511 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003513 if (!res)
3514 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003516 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 Py_DECREF(res);
3518 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003519 }
3520 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003521 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003522 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003523 }
3524 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003525 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527}
3528
Serhiy Storchakacb985562015-05-04 15:32:48 +03003529/*[clinic input]
3530_elementtree.XMLParser.feed
3531
3532 data: object
3533 /
3534
3535[clinic start generated code]*/
3536
3537static PyObject *
3538_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3539/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540{
3541 /* feed data to parser */
3542
Serhiy Storchakacb985562015-05-04 15:32:48 +03003543 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003544 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3546 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003547 return NULL;
3548 if (data_len > INT_MAX) {
3549 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3550 return NULL;
3551 }
3552 /* Explicitly set UTF-8 encoding. Return code ignored. */
3553 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003554 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003555 }
3556 else {
3557 Py_buffer view;
3558 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003559 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003560 return NULL;
3561 if (view.len > INT_MAX) {
3562 PyBuffer_Release(&view);
3563 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3564 return NULL;
3565 }
3566 res = expat_parse(self, view.buf, (int)view.len, 0);
3567 PyBuffer_Release(&view);
3568 return res;
3569 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003570}
3571
Serhiy Storchakacb985562015-05-04 15:32:48 +03003572/*[clinic input]
3573_elementtree.XMLParser._parse_whole
3574
3575 file: object
3576 /
3577
3578[clinic start generated code]*/
3579
3580static PyObject *
3581_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3582/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583{
Eli Benderskya3699232013-05-19 18:47:23 -07003584 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 PyObject* reader;
3586 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003587 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 PyObject* res;
3589
Serhiy Storchakacb985562015-05-04 15:32:48 +03003590 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 if (!reader)
3592 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003593
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 /* read from open file object */
3595 for (;;) {
3596
3597 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3598
3599 if (!buffer) {
3600 /* read failed (e.g. due to KeyboardInterrupt) */
3601 Py_DECREF(reader);
3602 return NULL;
3603 }
3604
Eli Benderskyf996e772012-03-16 05:53:30 +02003605 if (PyUnicode_CheckExact(buffer)) {
3606 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003607 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003608 Py_DECREF(buffer);
3609 break;
3610 }
3611 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003612 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003613 if (!temp) {
3614 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003615 Py_DECREF(reader);
3616 return NULL;
3617 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003618 buffer = temp;
3619 }
3620 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621 Py_DECREF(buffer);
3622 break;
3623 }
3624
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003625 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3626 Py_DECREF(buffer);
3627 Py_DECREF(reader);
3628 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3629 return NULL;
3630 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003632 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 );
3634
3635 Py_DECREF(buffer);
3636
3637 if (!res) {
3638 Py_DECREF(reader);
3639 return NULL;
3640 }
3641 Py_DECREF(res);
3642
3643 }
3644
3645 Py_DECREF(reader);
3646
3647 res = expat_parse(self, "", 0, 1);
3648
3649 if (res && TreeBuilder_CheckExact(self->target)) {
3650 Py_DECREF(res);
3651 return treebuilder_done((TreeBuilderObject*) self->target);
3652 }
3653
3654 return res;
3655}
3656
Serhiy Storchakacb985562015-05-04 15:32:48 +03003657/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003658_elementtree.XMLParser._setevents
3659
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003660 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003661 events_to_report: object = None
3662 /
3663
3664[clinic start generated code]*/
3665
3666static PyObject *
3667_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3668 PyObject *events_queue,
3669 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003670/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671{
3672 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003673 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003674 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003675 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003676
3677 if (!TreeBuilder_CheckExact(self->target)) {
3678 PyErr_SetString(
3679 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003680 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 "targets"
3682 );
3683 return NULL;
3684 }
3685
3686 target = (TreeBuilderObject*) self->target;
3687
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003688 events_append = PyObject_GetAttrString(events_queue, "append");
3689 if (events_append == NULL)
3690 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003691 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692
3693 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003694 Py_CLEAR(target->start_event_obj);
3695 Py_CLEAR(target->end_event_obj);
3696 Py_CLEAR(target->start_ns_event_obj);
3697 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003698
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003699 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003701 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003702 Py_RETURN_NONE;
3703 }
3704
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003705 if (!(events_seq = PySequence_Fast(events_to_report,
3706 "events must be a sequence"))) {
3707 return NULL;
3708 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003709
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003710 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003711 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003712 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003713 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003714 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003715 } else if (PyBytes_Check(event_name_obj)) {
3716 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003717 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003718 if (event_name == NULL) {
3719 Py_DECREF(events_seq);
3720 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3721 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003722 }
3723
3724 Py_INCREF(event_name_obj);
3725 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003726 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003727 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003728 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003729 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003730 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003731 EXPAT(SetNamespaceDeclHandler)(
3732 self->parser,
3733 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3734 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3735 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003736 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003737 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 EXPAT(SetNamespaceDeclHandler)(
3739 self->parser,
3740 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3741 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3742 );
3743 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003744 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003745 Py_DECREF(events_seq);
3746 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003747 return NULL;
3748 }
3749 }
3750
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003751 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003752 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003753}
3754
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003755static PyMemberDef xmlparser_members[] = {
3756 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3757 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3758 {NULL}
3759};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003760
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003761static PyObject*
3762xmlparser_version_getter(XMLParserObject *self, void *closure)
3763{
3764 return PyUnicode_FromFormat(
3765 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3766 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003767}
3768
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003769static PyGetSetDef xmlparser_getsetlist[] = {
3770 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3771 {NULL},
3772};
3773
Serhiy Storchakacb985562015-05-04 15:32:48 +03003774#include "clinic/_elementtree.c.h"
3775
3776static PyMethodDef element_methods[] = {
3777
3778 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3779
3780 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3781 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3782
3783 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3784 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3785 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3786
3787 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3788 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3789 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3790 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3791
3792 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3793 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3794 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3795
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003796 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003797 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3798
3799 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3800 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3801
3802 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3803
3804 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3805 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3806 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3807 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3808 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3809
3810 {NULL, NULL}
3811};
3812
3813static PyMappingMethods element_as_mapping = {
3814 (lenfunc) element_length,
3815 (binaryfunc) element_subscr,
3816 (objobjargproc) element_ass_subscr,
3817};
3818
Serhiy Storchakadde08152015-11-25 15:28:13 +02003819static PyGetSetDef element_getsetlist[] = {
3820 {"tag",
3821 (getter)element_tag_getter,
3822 (setter)element_tag_setter,
3823 "A string identifying what kind of data this element represents"},
3824 {"text",
3825 (getter)element_text_getter,
3826 (setter)element_text_setter,
3827 "A string of text directly after the start tag, or None"},
3828 {"tail",
3829 (getter)element_tail_getter,
3830 (setter)element_tail_setter,
3831 "A string of text directly after the end tag, or None"},
3832 {"attrib",
3833 (getter)element_attrib_getter,
3834 (setter)element_attrib_setter,
3835 "A dictionary containing the element's attributes"},
3836 {NULL},
3837};
3838
Serhiy Storchakacb985562015-05-04 15:32:48 +03003839static PyTypeObject Element_Type = {
3840 PyVarObject_HEAD_INIT(NULL, 0)
3841 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3842 /* methods */
3843 (destructor)element_dealloc, /* tp_dealloc */
3844 0, /* tp_print */
3845 0, /* tp_getattr */
3846 0, /* tp_setattr */
3847 0, /* tp_reserved */
3848 (reprfunc)element_repr, /* tp_repr */
3849 0, /* tp_as_number */
3850 &element_as_sequence, /* tp_as_sequence */
3851 &element_as_mapping, /* tp_as_mapping */
3852 0, /* tp_hash */
3853 0, /* tp_call */
3854 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003855 PyObject_GenericGetAttr, /* tp_getattro */
3856 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003857 0, /* tp_as_buffer */
3858 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3859 /* tp_flags */
3860 0, /* tp_doc */
3861 (traverseproc)element_gc_traverse, /* tp_traverse */
3862 (inquiry)element_gc_clear, /* tp_clear */
3863 0, /* tp_richcompare */
3864 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3865 0, /* tp_iter */
3866 0, /* tp_iternext */
3867 element_methods, /* tp_methods */
3868 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003869 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003870 0, /* tp_base */
3871 0, /* tp_dict */
3872 0, /* tp_descr_get */
3873 0, /* tp_descr_set */
3874 0, /* tp_dictoffset */
3875 (initproc)element_init, /* tp_init */
3876 PyType_GenericAlloc, /* tp_alloc */
3877 element_new, /* tp_new */
3878 0, /* tp_free */
3879};
3880
3881static PyMethodDef treebuilder_methods[] = {
3882 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3883 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3884 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3885 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3886 {NULL, NULL}
3887};
3888
3889static PyTypeObject TreeBuilder_Type = {
3890 PyVarObject_HEAD_INIT(NULL, 0)
3891 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3892 /* methods */
3893 (destructor)treebuilder_dealloc, /* tp_dealloc */
3894 0, /* tp_print */
3895 0, /* tp_getattr */
3896 0, /* tp_setattr */
3897 0, /* tp_reserved */
3898 0, /* tp_repr */
3899 0, /* tp_as_number */
3900 0, /* tp_as_sequence */
3901 0, /* tp_as_mapping */
3902 0, /* tp_hash */
3903 0, /* tp_call */
3904 0, /* tp_str */
3905 0, /* tp_getattro */
3906 0, /* tp_setattro */
3907 0, /* tp_as_buffer */
3908 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3909 /* tp_flags */
3910 0, /* tp_doc */
3911 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3912 (inquiry)treebuilder_gc_clear, /* tp_clear */
3913 0, /* tp_richcompare */
3914 0, /* tp_weaklistoffset */
3915 0, /* tp_iter */
3916 0, /* tp_iternext */
3917 treebuilder_methods, /* tp_methods */
3918 0, /* tp_members */
3919 0, /* tp_getset */
3920 0, /* tp_base */
3921 0, /* tp_dict */
3922 0, /* tp_descr_get */
3923 0, /* tp_descr_set */
3924 0, /* tp_dictoffset */
3925 _elementtree_TreeBuilder___init__, /* tp_init */
3926 PyType_GenericAlloc, /* tp_alloc */
3927 treebuilder_new, /* tp_new */
3928 0, /* tp_free */
3929};
3930
3931static PyMethodDef xmlparser_methods[] = {
3932 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3933 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3934 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3935 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003936 {NULL, NULL}
3937};
3938
Neal Norwitz227b5332006-03-22 09:28:35 +00003939static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003940 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003941 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003942 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003943 (destructor)xmlparser_dealloc, /* tp_dealloc */
3944 0, /* tp_print */
3945 0, /* tp_getattr */
3946 0, /* tp_setattr */
3947 0, /* tp_reserved */
3948 0, /* tp_repr */
3949 0, /* tp_as_number */
3950 0, /* tp_as_sequence */
3951 0, /* tp_as_mapping */
3952 0, /* tp_hash */
3953 0, /* tp_call */
3954 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003955 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003956 0, /* tp_setattro */
3957 0, /* tp_as_buffer */
3958 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3959 /* tp_flags */
3960 0, /* tp_doc */
3961 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3962 (inquiry)xmlparser_gc_clear, /* tp_clear */
3963 0, /* tp_richcompare */
3964 0, /* tp_weaklistoffset */
3965 0, /* tp_iter */
3966 0, /* tp_iternext */
3967 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003968 xmlparser_members, /* tp_members */
3969 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003970 0, /* tp_base */
3971 0, /* tp_dict */
3972 0, /* tp_descr_get */
3973 0, /* tp_descr_set */
3974 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003975 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003976 PyType_GenericAlloc, /* tp_alloc */
3977 xmlparser_new, /* tp_new */
3978 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003979};
3980
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003981/* ==================================================================== */
3982/* python module interface */
3983
3984static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02003985 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003986 {NULL, NULL}
3987};
3988
Martin v. Löwis1a214512008-06-11 05:26:20 +00003989
Eli Bendersky532d03e2013-08-10 08:00:39 -07003990static struct PyModuleDef elementtreemodule = {
3991 PyModuleDef_HEAD_INIT,
3992 "_elementtree",
3993 NULL,
3994 sizeof(elementtreestate),
3995 _functions,
3996 NULL,
3997 elementtree_traverse,
3998 elementtree_clear,
3999 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004000};
4001
Neal Norwitzf6657e62006-12-28 04:47:50 +00004002PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004003PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004004{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004005 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004006 elementtreestate *st;
4007
4008 m = PyState_FindModule(&elementtreemodule);
4009 if (m) {
4010 Py_INCREF(m);
4011 return m;
4012 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004013
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004014 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004015 if (PyType_Ready(&ElementIter_Type) < 0)
4016 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004017 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004018 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004019 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004020 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004021 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004022 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004023
Eli Bendersky532d03e2013-08-10 08:00:39 -07004024 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004025 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004026 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004027 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004028
Eli Bendersky828efde2012-04-05 05:40:58 +03004029 if (!(temp = PyImport_ImportModule("copy")))
4030 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004031 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004032 Py_XDECREF(temp);
4033
Victor Stinnerb136f112017-07-10 22:28:02 +02004034 if (st->deepcopy_obj == NULL) {
4035 return NULL;
4036 }
4037
4038 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004039 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004040 return NULL;
4041
Eli Bendersky20d41742012-06-01 09:48:37 +03004042 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004043 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4044 if (expat_capi) {
4045 /* check that it's usable */
4046 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004047 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004048 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4049 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004050 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004051 PyErr_SetString(PyExc_ImportError,
4052 "pyexpat version is incompatible");
4053 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004054 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004055 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004056 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004057 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004058
Eli Bendersky532d03e2013-08-10 08:00:39 -07004059 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004060 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004061 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004062 Py_INCREF(st->parseerror_obj);
4063 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004064
Eli Bendersky092af1f2012-03-04 07:14:03 +02004065 Py_INCREF((PyObject *)&Element_Type);
4066 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4067
Eli Bendersky58d548d2012-05-29 15:45:16 +03004068 Py_INCREF((PyObject *)&TreeBuilder_Type);
4069 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4070
Eli Bendersky52467b12012-06-01 07:13:08 +03004071 Py_INCREF((PyObject *)&XMLParser_Type);
4072 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004073
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004074 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004075}