blob: 2d6f26b3df0877713ea329732ea6f3b4803fa3cb [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200173 /* attributes (a dictionary object), or NULL if no attributes */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Dong-hee Na1b55b652020-02-17 19:09:15 +0900212#define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200228 Py_XINCREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229 self->extra->attrib = attrib;
230
231 self->extra->length = 0;
232 self->extra->allocated = STATIC_CHILDREN;
233 self->extra->children = self->extra->_children;
234
235 return 0;
236}
237
238LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300239dealloc_extra(ElementObjectExtra *extra)
240{
241 Py_ssize_t i;
242
243 if (!extra)
244 return;
245
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200246 Py_XDECREF(extra->attrib);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300247
248 for (i = 0; i < extra->length; i++)
249 Py_DECREF(extra->children[i]);
250
251 if (extra->children != extra->_children)
252 PyObject_Free(extra->children);
253
254 PyObject_Free(extra);
255}
256
257LOCAL(void)
258clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000259{
Eli Bendersky08b85292012-04-04 15:55:07 +0300260 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300261
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 if (!self->extra)
263 return;
264
265 /* Avoid DECREFs calling into this code again (cycles, etc.)
266 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300267 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 self->extra = NULL;
269
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300270 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271}
272
Eli Bendersky092af1f2012-03-04 07:14:03 +0200273/* Convenience internal function to create new Element objects with the given
274 * tag and attributes.
275*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000276LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278{
279 ElementObject* self;
280
Eli Bendersky0192ba32012-03-30 16:38:33 +0300281 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282 if (self == NULL)
283 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 self->extra = NULL;
285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 Py_INCREF(tag);
287 self->tag = tag;
288
289 Py_INCREF(Py_None);
290 self->text = Py_None;
291
292 Py_INCREF(Py_None);
293 self->tail = Py_None;
294
Eli Benderskyebf37a22012-04-03 22:02:37 +0300295 self->weakreflist = NULL;
296
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200297 ALLOC(sizeof(ElementObject), "create element");
298 PyObject_GC_Track(self);
299
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200300 if (attrib != NULL && !is_empty_dict(attrib)) {
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200301 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200302 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 return NULL;
304 }
305 }
306
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000307 return (PyObject*) self;
308}
309
Eli Bendersky092af1f2012-03-04 07:14:03 +0200310static PyObject *
311element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
312{
313 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
314 if (e != NULL) {
315 Py_INCREF(Py_None);
316 e->tag = Py_None;
317
318 Py_INCREF(Py_None);
319 e->text = Py_None;
320
321 Py_INCREF(Py_None);
322 e->tail = Py_None;
323
324 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300325 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200326 }
327 return (PyObject *)e;
328}
329
Eli Bendersky737b1732012-05-29 06:02:56 +0300330/* Helper function for extracting the attrib dictionary from a keywords dict.
331 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800332 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300333 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700334 *
335 * Return a dictionary with the content of kwds merged into the content of
336 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 */
338static PyObject*
339get_attrib_from_keywords(PyObject *kwds)
340{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700341 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600342 if (attrib_str == NULL) {
343 return NULL;
344 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200345 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300346
347 if (attrib) {
348 /* If attrib was found in kwds, copy its value and remove it from
349 * kwds
350 */
351 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700352 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300353 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
354 Py_TYPE(attrib)->tp_name);
355 return NULL;
356 }
357 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200358 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
359 Py_DECREF(attrib);
360 attrib = NULL;
361 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200362 }
363 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300364 attrib = PyDict_New();
365 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700366
367 Py_DECREF(attrib_str);
368
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600369 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
370 Py_DECREF(attrib);
371 return NULL;
372 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 return attrib;
374}
375
Serhiy Storchakacb985562015-05-04 15:32:48 +0300376/*[clinic input]
377module _elementtree
378class _elementtree.Element "ElementObject *" "&Element_Type"
379class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
380class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
381[clinic start generated code]*/
382/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
383
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384static int
385element_init(PyObject *self, PyObject *args, PyObject *kwds)
386{
387 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200388 PyObject *attrib = NULL;
389 ElementObject *self_elem;
390
391 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
392 return -1;
393
Eli Bendersky737b1732012-05-29 06:02:56 +0300394 if (attrib) {
395 /* attrib passed as positional arg */
396 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 if (!attrib)
398 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300399 if (kwds) {
400 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200401 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 return -1;
403 }
404 }
405 } else if (kwds) {
406 /* have keywords args */
407 attrib = get_attrib_from_keywords(kwds);
408 if (!attrib)
409 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 }
411
412 self_elem = (ElementObject *)self;
413
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200416 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200417 return -1;
418 }
419 }
420
Eli Bendersky48d358b2012-05-30 17:57:50 +0300421 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200422 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
424 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200425 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300426 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200427
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300429 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
434 return 0;
435}
436
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200438element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000439{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441 PyObject* *children;
442
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300443 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 /* make sure self->children can hold the given number of extra
445 elements. set an exception and return -1 if allocation failed */
446
Victor Stinner5f0af232013-07-11 23:01:36 +0200447 if (!self->extra) {
448 if (create_extra(self, NULL) < 0)
449 return -1;
450 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000451
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200452 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000453
454 if (size > self->extra->allocated) {
455 /* use Python 2.4's list growth strategy */
456 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000457 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100458 * which needs at least 4 bytes.
459 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 * be safe.
461 */
462 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200463 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
464 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000465 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100467 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000468 * false alarm always assume at least one child to be safe.
469 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000470 children = PyObject_Realloc(self->extra->children,
471 size * sizeof(PyObject*));
472 if (!children)
473 goto nomemory;
474 } else {
475 children = PyObject_Malloc(size * sizeof(PyObject*));
476 if (!children)
477 goto nomemory;
478 /* copy existing children from static area to malloc buffer */
479 memcpy(children, self->extra->children,
480 self->extra->length * sizeof(PyObject*));
481 }
482 self->extra->children = children;
483 self->extra->allocated = size;
484 }
485
486 return 0;
487
488 nomemory:
489 PyErr_NoMemory();
490 return -1;
491}
492
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300493LOCAL(void)
494raise_type_error(PyObject *element)
495{
496 PyErr_Format(PyExc_TypeError,
497 "expected an Element, not \"%.200s\"",
498 Py_TYPE(element)->tp_name);
499}
500
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000501LOCAL(int)
502element_add_subelement(ElementObject* self, PyObject* element)
503{
504 /* add a child element to a parent */
505
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300506 if (!Element_Check(element)) {
507 raise_type_error(element);
508 return -1;
509 }
510
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000511 if (element_resize(self, 1) < 0)
512 return -1;
513
514 Py_INCREF(element);
515 self->extra->children[self->extra->length] = element;
516
517 self->extra->length++;
518
519 return 0;
520}
521
522LOCAL(PyObject*)
523element_get_attrib(ElementObject* self)
524{
525 /* return borrowed reference to attrib dictionary */
526 /* note: this function assumes that the extra section exists */
527
528 PyObject* res = self->extra->attrib;
529
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200530 if (!res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000531 /* create missing dictionary */
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200532 res = self->extra->attrib = PyDict_New();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000533 }
534
535 return res;
536}
537
538LOCAL(PyObject*)
539element_get_text(ElementObject* self)
540{
541 /* return borrowed reference to text attribute */
542
Serhiy Storchaka576def02017-03-30 09:47:31 +0300543 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
545 if (JOIN_GET(res)) {
546 res = JOIN_OBJ(res);
547 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300548 PyObject *tmp = list_join(res);
549 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300551 self->text = tmp;
552 Py_DECREF(res);
553 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554 }
555 }
556
557 return res;
558}
559
560LOCAL(PyObject*)
561element_get_tail(ElementObject* self)
562{
563 /* return borrowed reference to text attribute */
564
Serhiy Storchaka576def02017-03-30 09:47:31 +0300565 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000566
567 if (JOIN_GET(res)) {
568 res = JOIN_OBJ(res);
569 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300570 PyObject *tmp = list_join(res);
571 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000572 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300573 self->tail = tmp;
574 Py_DECREF(res);
575 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576 }
577 }
578
579 return res;
580}
581
582static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300583subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584{
585 PyObject* elem;
586
587 ElementObject* parent;
588 PyObject* tag;
589 PyObject* attrib = NULL;
590 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
591 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800592 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000593 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800594 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595
Eli Bendersky737b1732012-05-29 06:02:56 +0300596 if (attrib) {
597 /* attrib passed as positional arg */
598 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 if (!attrib)
600 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600601 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
602 Py_DECREF(attrib);
603 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300604 }
605 } else if (kwds) {
606 /* have keyword args */
607 attrib = get_attrib_from_keywords(kwds);
608 if (!attrib)
609 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000610 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 }
613
Eli Bendersky092af1f2012-03-04 07:14:03 +0200614 elem = create_new_element(tag, attrib);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200615 Py_XDECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200616 if (elem == NULL)
617 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000619 if (element_add_subelement(parent, elem) < 0) {
620 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000621 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000622 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623
624 return elem;
625}
626
Eli Bendersky0192ba32012-03-30 16:38:33 +0300627static int
628element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
629{
630 Py_VISIT(self->tag);
631 Py_VISIT(JOIN_OBJ(self->text));
632 Py_VISIT(JOIN_OBJ(self->tail));
633
634 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200635 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 Py_VISIT(self->extra->attrib);
637
638 for (i = 0; i < self->extra->length; ++i)
639 Py_VISIT(self->extra->children[i]);
640 }
641 return 0;
642}
643
644static int
645element_gc_clear(ElementObject *self)
646{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300647 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700648 _clear_joined_ptr(&self->text);
649 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300650
651 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300652 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300653 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300654 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300655 return 0;
656}
657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658static void
659element_dealloc(ElementObject* self)
660{
INADA Naokia6296d32017-08-24 14:55:17 +0900661 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200663 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300664
665 if (self->weakreflist != NULL)
666 PyObject_ClearWeakRefs((PyObject *) self);
667
Eli Bendersky0192ba32012-03-30 16:38:33 +0300668 /* element_gc_clear clears all references and deallocates extra
669 */
670 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
672 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200673 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200674 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000675}
676
677/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
Serhiy Storchakacb985562015-05-04 15:32:48 +0300679/*[clinic input]
680_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
Serhiy Storchakacb985562015-05-04 15:32:48 +0300682 subelement: object(subclass_of='&Element_Type')
683 /
684
685[clinic start generated code]*/
686
687static PyObject *
688_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
689/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
690{
691 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000692 return NULL;
693
694 Py_RETURN_NONE;
695}
696
Serhiy Storchakacb985562015-05-04 15:32:48 +0300697/*[clinic input]
698_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
Serhiy Storchakacb985562015-05-04 15:32:48 +0300700[clinic start generated code]*/
701
702static PyObject *
703_elementtree_Element_clear_impl(ElementObject *self)
704/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
705{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300706 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707
708 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300709 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300712 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 Py_RETURN_NONE;
715}
716
Serhiy Storchakacb985562015-05-04 15:32:48 +0300717/*[clinic input]
718_elementtree.Element.__copy__
719
720[clinic start generated code]*/
721
722static PyObject *
723_elementtree_Element___copy___impl(ElementObject *self)
724/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200726 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727 ElementObject* element;
728
Eli Bendersky092af1f2012-03-04 07:14:03 +0200729 element = (ElementObject*) create_new_element(
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200730 self->tag, self->extra ? self->extra->attrib : NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731 if (!element)
732 return NULL;
733
Oren Milman39ecb9c2017-10-10 23:26:24 +0300734 Py_INCREF(JOIN_OBJ(self->text));
735 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736
Oren Milman39ecb9c2017-10-10 23:26:24 +0300737 Py_INCREF(JOIN_OBJ(self->tail));
738 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300740 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000742 if (element_resize(element, self->extra->length) < 0) {
743 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000745 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746
747 for (i = 0; i < self->extra->length; i++) {
748 Py_INCREF(self->extra->children[i]);
749 element->extra->children[i] = self->extra->children[i];
750 }
751
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300752 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000754 }
755
756 return (PyObject*) element;
757}
758
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200759/* Helper for a deep copy. */
760LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
761
Serhiy Storchakacb985562015-05-04 15:32:48 +0300762/*[clinic input]
763_elementtree.Element.__deepcopy__
764
Oren Milmand0568182017-09-12 17:39:15 +0300765 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300766 /
767
768[clinic start generated code]*/
769
770static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300771_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
772/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200774 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000775 ElementObject* element;
776 PyObject* tag;
777 PyObject* attrib;
778 PyObject* text;
779 PyObject* tail;
780 PyObject* id;
781
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782 tag = deepcopy(self->tag, memo);
783 if (!tag)
784 return NULL;
785
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200786 if (self->extra && self->extra->attrib) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787 attrib = deepcopy(self->extra->attrib, memo);
788 if (!attrib) {
789 Py_DECREF(tag);
790 return NULL;
791 }
792 } else {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200793 attrib = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 }
795
Eli Bendersky092af1f2012-03-04 07:14:03 +0200796 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797
798 Py_DECREF(tag);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200799 Py_XDECREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800
801 if (!element)
802 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804 text = deepcopy(JOIN_OBJ(self->text), memo);
805 if (!text)
806 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300807 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808
809 tail = deepcopy(JOIN_OBJ(self->tail), memo);
810 if (!tail)
811 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300812 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300814 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000815 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816 if (element_resize(element, self->extra->length) < 0)
817 goto error;
818
819 for (i = 0; i < self->extra->length; i++) {
820 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300821 if (!child || !Element_Check(child)) {
822 if (child) {
823 raise_type_error(child);
824 Py_DECREF(child);
825 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 element->extra->length = i;
827 goto error;
828 }
829 element->extra->children[i] = child;
830 }
831
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300832 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000834 }
835
836 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700837 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000838 if (!id)
839 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000840
841 i = PyDict_SetItem(memo, id, (PyObject*) element);
842
843 Py_DECREF(id);
844
845 if (i < 0)
846 goto error;
847
848 return (PyObject*) element;
849
850 error:
851 Py_DECREF(element);
852 return NULL;
853}
854
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200855LOCAL(PyObject *)
856deepcopy(PyObject *object, PyObject *memo)
857{
858 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200859 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200860 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200861
862 /* Fast paths */
863 if (object == Py_None || PyUnicode_CheckExact(object)) {
864 Py_INCREF(object);
865 return object;
866 }
867
868 if (Py_REFCNT(object) == 1) {
869 if (PyDict_CheckExact(object)) {
870 PyObject *key, *value;
871 Py_ssize_t pos = 0;
872 int simple = 1;
873 while (PyDict_Next(object, &pos, &key, &value)) {
874 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
875 simple = 0;
876 break;
877 }
878 }
879 if (simple)
880 return PyDict_Copy(object);
881 /* Fall through to general case */
882 }
883 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300884 return _elementtree_Element___deepcopy___impl(
885 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200886 }
887 }
888
889 /* General case */
890 st = ET_STATE_GLOBAL;
891 if (!st->deepcopy_obj) {
892 PyErr_SetString(PyExc_RuntimeError,
893 "deepcopy helper not found");
894 return NULL;
895 }
896
Victor Stinner7fbac452016-08-20 01:34:44 +0200897 stack[0] = object;
898 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200899 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200900}
901
902
Serhiy Storchakacb985562015-05-04 15:32:48 +0300903/*[clinic input]
904_elementtree.Element.__sizeof__ -> Py_ssize_t
905
906[clinic start generated code]*/
907
908static Py_ssize_t
909_elementtree_Element___sizeof___impl(ElementObject *self)
910/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200911{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200912 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200913 if (self->extra) {
914 result += sizeof(ElementObjectExtra);
915 if (self->extra->children != self->extra->_children)
916 result += sizeof(PyObject*) * self->extra->allocated;
917 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300918 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200919}
920
Eli Bendersky698bdb22013-01-10 06:01:06 -0800921/* dict keys for getstate/setstate. */
922#define PICKLED_TAG "tag"
923#define PICKLED_CHILDREN "_children"
924#define PICKLED_ATTRIB "attrib"
925#define PICKLED_TAIL "tail"
926#define PICKLED_TEXT "text"
927
928/* __getstate__ returns a fabricated instance dict as in the pure-Python
929 * Element implementation, for interoperability/interchangeability. This
930 * makes the pure-Python implementation details an API, but (a) there aren't
931 * any unnecessary structures there; and (b) it buys compatibility with 3.2
932 * pickles. See issue #16076.
933 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300934/*[clinic input]
935_elementtree.Element.__getstate__
936
937[clinic start generated code]*/
938
Eli Bendersky698bdb22013-01-10 06:01:06 -0800939static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300940_elementtree_Element___getstate___impl(ElementObject *self)
941/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942{
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200943 Py_ssize_t i;
944 PyObject *children, *attrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945
946 /* Build a list of children. */
947 children = PyList_New(self->extra ? self->extra->length : 0);
948 if (!children)
949 return NULL;
950 for (i = 0; i < PyList_GET_SIZE(children); i++) {
951 PyObject *child = self->extra->children[i];
952 Py_INCREF(child);
953 PyList_SET_ITEM(children, i, child);
954 }
955
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200956 if (self->extra && self->extra->attrib) {
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200957 attrib = self->extra->attrib;
958 Py_INCREF(attrib);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800959 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800960 else {
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200961 attrib = PyDict_New();
962 if (!attrib) {
963 Py_DECREF(children);
964 return NULL;
965 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966 }
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200967
968 return Py_BuildValue("{sOsNsNsOsO}",
969 PICKLED_TAG, self->tag,
970 PICKLED_CHILDREN, children,
971 PICKLED_ATTRIB, attrib,
972 PICKLED_TEXT, JOIN_OBJ(self->text),
973 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974}
975
976static PyObject *
977element_setstate_from_attributes(ElementObject *self,
978 PyObject *tag,
979 PyObject *attrib,
980 PyObject *text,
981 PyObject *tail,
982 PyObject *children)
983{
984 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300985 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986
987 if (!tag) {
988 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
989 return NULL;
990 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800991
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200992 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300993 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800994
Oren Milman39ecb9c2017-10-10 23:26:24 +0300995 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
996 Py_INCREF(JOIN_OBJ(text));
997 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800998
Oren Milman39ecb9c2017-10-10 23:26:24 +0300999 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1000 Py_INCREF(JOIN_OBJ(tail));
1001 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001002
1003 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001006 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007
1008 /* Compute 'nchildren'. */
1009 if (children) {
1010 if (!PyList_Check(children)) {
1011 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1012 return NULL;
1013 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001014 nchildren = PyList_GET_SIZE(children);
1015
1016 /* (Re-)allocate 'extra'.
1017 Avoid DECREFs calling into this code again (cycles, etc.)
1018 */
1019 oldextra = self->extra;
1020 self->extra = NULL;
1021 if (element_resize(self, nchildren)) {
1022 assert(!self->extra || !self->extra->length);
1023 clear_extra(self);
1024 self->extra = oldextra;
1025 return NULL;
1026 }
1027 assert(self->extra);
1028 assert(self->extra->allocated >= nchildren);
1029 if (oldextra) {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001030 assert(self->extra->attrib == NULL);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001031 self->extra->attrib = oldextra->attrib;
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001032 oldextra->attrib = NULL;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 }
1034
1035 /* Copy children */
1036 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001037 PyObject *child = PyList_GET_ITEM(children, i);
1038 if (!Element_Check(child)) {
1039 raise_type_error(child);
1040 self->extra->length = i;
1041 dealloc_extra(oldextra);
1042 return NULL;
1043 }
1044 Py_INCREF(child);
1045 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001046 }
1047
1048 assert(!self->extra->length);
1049 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001050 }
1051 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001052 if (element_resize(self, 0)) {
1053 return NULL;
1054 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001055 }
1056
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057 /* Stash attrib. */
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001058 Py_XINCREF(attrib);
1059 Py_XSETREF(self->extra->attrib, attrib);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001060 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001061
1062 Py_RETURN_NONE;
1063}
1064
1065/* __setstate__ for Element instance from the Python implementation.
1066 * 'state' should be the instance dict.
1067 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001068
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069static PyObject *
1070element_setstate_from_Python(ElementObject *self, PyObject *state)
1071{
1072 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1073 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1074 PyObject *args;
1075 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001076 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001077
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 tag = attrib = text = tail = children = NULL;
1079 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001080 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001081 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001082
1083 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1084 &attrib, &text, &tail, &children))
1085 retval = element_setstate_from_attributes(self, tag, attrib, text,
1086 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001087 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001088 retval = NULL;
1089
1090 Py_DECREF(args);
1091 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092}
1093
Serhiy Storchakacb985562015-05-04 15:32:48 +03001094/*[clinic input]
1095_elementtree.Element.__setstate__
1096
1097 state: object
1098 /
1099
1100[clinic start generated code]*/
1101
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1104/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001105{
1106 if (!PyDict_CheckExact(state)) {
1107 PyErr_Format(PyExc_TypeError,
1108 "Don't know how to unpickle \"%.200R\" as an Element",
1109 state);
1110 return NULL;
1111 }
1112 else
1113 return element_setstate_from_Python(self, state);
1114}
1115
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116LOCAL(int)
1117checkpath(PyObject* tag)
1118{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001119 Py_ssize_t i;
1120 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121
1122 /* check if a tag contains an xpath character */
1123
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124#define PATHCHAR(ch) \
1125 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001127 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1129 void *data = PyUnicode_DATA(tag);
1130 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001131 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1132 PyUnicode_READ(kind, data, 1) == '}' || (
1133 PyUnicode_READ(kind, data, 1) == '*' &&
1134 PyUnicode_READ(kind, data, 2) == '}'))) {
1135 /* wildcard: '{}tag' or '{*}tag' */
1136 return 1;
1137 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001138 for (i = 0; i < len; i++) {
1139 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1140 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001143 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145 return 1;
1146 }
1147 return 0;
1148 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001149 if (PyBytes_Check(tag)) {
1150 char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001151 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1152 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001153 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001154 /* wildcard: '{}tag' or '{*}tag' */
1155 return 1;
1156 }
1157 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158 if (p[i] == '{')
1159 check = 0;
1160 else if (p[i] == '}')
1161 check = 1;
1162 else if (check && PATHCHAR(p[i]))
1163 return 1;
1164 }
1165 return 0;
1166 }
1167
1168 return 1; /* unknown type; might be path expression */
1169}
1170
Serhiy Storchakacb985562015-05-04 15:32:48 +03001171/*[clinic input]
1172_elementtree.Element.extend
1173
1174 elements: object
1175 /
1176
1177[clinic start generated code]*/
1178
1179static PyObject *
1180_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1181/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001182{
1183 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001184 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001185
Serhiy Storchakacb985562015-05-04 15:32:48 +03001186 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001187 if (!seq) {
1188 PyErr_Format(
1189 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001190 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191 );
1192 return NULL;
1193 }
1194
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001196 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001197 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001198 if (element_add_subelement(self, element) < 0) {
1199 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001200 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001201 return NULL;
1202 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001203 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 }
1205
1206 Py_DECREF(seq);
1207
1208 Py_RETURN_NONE;
1209}
1210
Serhiy Storchakacb985562015-05-04 15:32:48 +03001211/*[clinic input]
1212_elementtree.Element.find
1213
1214 path: object
1215 namespaces: object = None
1216
1217[clinic start generated code]*/
1218
1219static PyObject *
1220_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1221 PyObject *namespaces)
1222/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001224 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001225 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001226
Serhiy Storchakacb985562015-05-04 15:32:48 +03001227 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001228 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001229 return _PyObject_CallMethodIdObjArgs(
1230 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001232 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233
1234 if (!self->extra)
1235 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001236
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 for (i = 0; i < self->extra->length; i++) {
1238 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001240 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001242 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 Py_DECREF(item);
1246 if (rc < 0)
1247 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248 }
1249
1250 Py_RETURN_NONE;
1251}
1252
Serhiy Storchakacb985562015-05-04 15:32:48 +03001253/*[clinic input]
1254_elementtree.Element.findtext
1255
1256 path: object
1257 default: object = None
1258 namespaces: object = None
1259
1260[clinic start generated code]*/
1261
1262static PyObject *
1263_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1264 PyObject *default_value,
1265 PyObject *namespaces)
1266/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001268 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001269 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001270 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001271
Serhiy Storchakacb985562015-05-04 15:32:48 +03001272 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001273 return _PyObject_CallMethodIdObjArgs(
1274 st->elementpath_obj, &PyId_findtext,
1275 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001276 );
1277
1278 if (!self->extra) {
1279 Py_INCREF(default_value);
1280 return default_value;
1281 }
1282
1283 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001284 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001285 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001286 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001287 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001288 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001289 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001290 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 if (text == Py_None) {
1292 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001293 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001295 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001296 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297 return text;
1298 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001299 Py_DECREF(item);
1300 if (rc < 0)
1301 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 }
1303
1304 Py_INCREF(default_value);
1305 return default_value;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.findall
1310
1311 path: object
1312 namespaces: object = None
1313
1314[clinic start generated code]*/
1315
1316static PyObject *
1317_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1318 PyObject *namespaces)
1319/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001321 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001322 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001323 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001324
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001325 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001326 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001327 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001328 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001329 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001330 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331
1332 out = PyList_New(0);
1333 if (!out)
1334 return NULL;
1335
1336 if (!self->extra)
1337 return out;
1338
1339 for (i = 0; i < self->extra->length; i++) {
1340 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001341 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001342 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001343 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001344 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001345 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1346 Py_DECREF(item);
1347 Py_DECREF(out);
1348 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001349 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001350 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001351 }
1352
1353 return out;
1354}
1355
Serhiy Storchakacb985562015-05-04 15:32:48 +03001356/*[clinic input]
1357_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001358
Serhiy Storchakacb985562015-05-04 15:32:48 +03001359 path: object
1360 namespaces: object = None
1361
1362[clinic start generated code]*/
1363
1364static PyObject *
1365_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1366 PyObject *namespaces)
1367/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1368{
1369 PyObject* tag = path;
1370 _Py_IDENTIFIER(iterfind);
1371 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001372
Victor Stinnerf5616342016-12-09 15:26:00 +01001373 return _PyObject_CallMethodIdObjArgs(
1374 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001375}
1376
Serhiy Storchakacb985562015-05-04 15:32:48 +03001377/*[clinic input]
1378_elementtree.Element.get
1379
1380 key: object
1381 default: object = None
1382
1383[clinic start generated code]*/
1384
1385static PyObject *
1386_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1387 PyObject *default_value)
1388/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001389{
1390 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001391
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001392 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001393 value = default_value;
1394 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001395 value = PyDict_GetItemWithError(self->extra->attrib, key);
1396 if (!value) {
1397 if (PyErr_Occurred()) {
1398 return NULL;
1399 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001400 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001401 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402 }
1403
1404 Py_INCREF(value);
1405 return value;
1406}
1407
Eli Bendersky64d11e62012-06-15 07:42:50 +03001408static PyObject *
1409create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1410
1411
Serhiy Storchakacb985562015-05-04 15:32:48 +03001412/*[clinic input]
1413_elementtree.Element.iter
1414
1415 tag: object = None
1416
1417[clinic start generated code]*/
1418
Eli Bendersky64d11e62012-06-15 07:42:50 +03001419static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001420_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1421/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001422{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001423 if (PyUnicode_Check(tag)) {
1424 if (PyUnicode_READY(tag) < 0)
1425 return NULL;
1426 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1427 tag = Py_None;
1428 }
1429 else if (PyBytes_Check(tag)) {
1430 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1431 tag = Py_None;
1432 }
1433
Eli Bendersky64d11e62012-06-15 07:42:50 +03001434 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001435}
1436
1437
Serhiy Storchakacb985562015-05-04 15:32:48 +03001438/*[clinic input]
1439_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001440
Serhiy Storchakacb985562015-05-04 15:32:48 +03001441[clinic start generated code]*/
1442
1443static PyObject *
1444_elementtree_Element_itertext_impl(ElementObject *self)
1445/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1446{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001447 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448}
1449
Eli Bendersky64d11e62012-06-15 07:42:50 +03001450
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001451static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001452element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001454 ElementObject* self = (ElementObject*) self_;
1455
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456 if (!self->extra || index < 0 || index >= self->extra->length) {
1457 PyErr_SetString(
1458 PyExc_IndexError,
1459 "child index out of range"
1460 );
1461 return NULL;
1462 }
1463
1464 Py_INCREF(self->extra->children[index]);
1465 return self->extra->children[index];
1466}
1467
Serhiy Storchakacb985562015-05-04 15:32:48 +03001468/*[clinic input]
1469_elementtree.Element.insert
1470
1471 index: Py_ssize_t
1472 subelement: object(subclass_of='&Element_Type')
1473 /
1474
1475[clinic start generated code]*/
1476
1477static PyObject *
1478_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1479 PyObject *subelement)
1480/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483
Victor Stinner5f0af232013-07-11 23:01:36 +02001484 if (!self->extra) {
1485 if (create_extra(self, NULL) < 0)
1486 return NULL;
1487 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001489 if (index < 0) {
1490 index += self->extra->length;
1491 if (index < 0)
1492 index = 0;
1493 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494 if (index > self->extra->length)
1495 index = self->extra->length;
1496
1497 if (element_resize(self, 1) < 0)
1498 return NULL;
1499
1500 for (i = self->extra->length; i > index; i--)
1501 self->extra->children[i] = self->extra->children[i-1];
1502
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503 Py_INCREF(subelement);
1504 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505
1506 self->extra->length++;
1507
1508 Py_RETURN_NONE;
1509}
1510
Serhiy Storchakacb985562015-05-04 15:32:48 +03001511/*[clinic input]
1512_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513
Serhiy Storchakacb985562015-05-04 15:32:48 +03001514[clinic start generated code]*/
1515
1516static PyObject *
1517_elementtree_Element_items_impl(ElementObject *self)
1518/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1519{
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001520 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521 return PyList_New(0);
1522
1523 return PyDict_Items(self->extra->attrib);
1524}
1525
Serhiy Storchakacb985562015-05-04 15:32:48 +03001526/*[clinic input]
1527_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001528
Serhiy Storchakacb985562015-05-04 15:32:48 +03001529[clinic start generated code]*/
1530
1531static PyObject *
1532_elementtree_Element_keys_impl(ElementObject *self)
1533/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1534{
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001535 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001536 return PyList_New(0);
1537
1538 return PyDict_Keys(self->extra->attrib);
1539}
1540
Martin v. Löwis18e16552006-02-15 17:27:45 +00001541static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542element_length(ElementObject* self)
1543{
1544 if (!self->extra)
1545 return 0;
1546
1547 return self->extra->length;
1548}
1549
Serhiy Storchakacb985562015-05-04 15:32:48 +03001550/*[clinic input]
1551_elementtree.Element.makeelement
1552
1553 tag: object
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001554 attrib: object(subclass_of='&PyDict_Type')
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555 /
1556
1557[clinic start generated code]*/
1558
1559static PyObject *
1560_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1561 PyObject *attrib)
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001562/*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563{
1564 PyObject* elem;
1565
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566 attrib = PyDict_Copy(attrib);
1567 if (!attrib)
1568 return NULL;
1569
Eli Bendersky092af1f2012-03-04 07:14:03 +02001570 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571
1572 Py_DECREF(attrib);
1573
1574 return elem;
1575}
1576
Serhiy Storchakacb985562015-05-04 15:32:48 +03001577/*[clinic input]
1578_elementtree.Element.remove
1579
1580 subelement: object(subclass_of='&Element_Type')
1581 /
1582
1583[clinic start generated code]*/
1584
1585static PyObject *
1586_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1587/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001589 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001590 int rc;
1591 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593 if (!self->extra) {
1594 /* element has no children, so raise exception */
1595 PyErr_SetString(
1596 PyExc_ValueError,
1597 "list.remove(x): x not in list"
1598 );
1599 return NULL;
1600 }
1601
1602 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001603 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001605 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001606 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001608 if (rc < 0)
1609 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 }
1611
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001612 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001613 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614 PyErr_SetString(
1615 PyExc_ValueError,
1616 "list.remove(x): x not in list"
1617 );
1618 return NULL;
1619 }
1620
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001621 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622
1623 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001624 for (; i < self->extra->length; i++)
1625 self->extra->children[i] = self->extra->children[i+1];
1626
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001627 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628 Py_RETURN_NONE;
1629}
1630
1631static PyObject*
1632element_repr(ElementObject* self)
1633{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001634 int status;
1635
1636 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001637 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001638
1639 status = Py_ReprEnter((PyObject *)self);
1640 if (status == 0) {
1641 PyObject *res;
1642 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1643 Py_ReprLeave((PyObject *)self);
1644 return res;
1645 }
1646 if (status > 0)
1647 PyErr_Format(PyExc_RuntimeError,
1648 "reentrant call inside %s.__repr__",
1649 Py_TYPE(self)->tp_name);
1650 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651}
1652
Serhiy Storchakacb985562015-05-04 15:32:48 +03001653/*[clinic input]
1654_elementtree.Element.set
1655
1656 key: object
1657 value: object
1658 /
1659
1660[clinic start generated code]*/
1661
1662static PyObject *
1663_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1664 PyObject *value)
1665/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666{
1667 PyObject* attrib;
1668
Victor Stinner5f0af232013-07-11 23:01:36 +02001669 if (!self->extra) {
1670 if (create_extra(self, NULL) < 0)
1671 return NULL;
1672 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673
1674 attrib = element_get_attrib(self);
1675 if (!attrib)
1676 return NULL;
1677
1678 if (PyDict_SetItem(attrib, key, value) < 0)
1679 return NULL;
1680
1681 Py_RETURN_NONE;
1682}
1683
1684static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001685element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001686{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001687 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001688 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001689 PyObject* old;
1690
1691 if (!self->extra || index < 0 || index >= self->extra->length) {
1692 PyErr_SetString(
1693 PyExc_IndexError,
1694 "child assignment index out of range");
1695 return -1;
1696 }
1697
1698 old = self->extra->children[index];
1699
1700 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001701 if (!Element_Check(item)) {
1702 raise_type_error(item);
1703 return -1;
1704 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001705 Py_INCREF(item);
1706 self->extra->children[index] = item;
1707 } else {
1708 self->extra->length--;
1709 for (i = index; i < self->extra->length; i++)
1710 self->extra->children[i] = self->extra->children[i+1];
1711 }
1712
1713 Py_DECREF(old);
1714
1715 return 0;
1716}
1717
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001718static PyObject*
1719element_subscr(PyObject* self_, PyObject* item)
1720{
1721 ElementObject* self = (ElementObject*) self_;
1722
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001723 if (PyIndex_Check(item)) {
1724 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001725
1726 if (i == -1 && PyErr_Occurred()) {
1727 return NULL;
1728 }
1729 if (i < 0 && self->extra)
1730 i += self->extra->length;
1731 return element_getitem(self_, i);
1732 }
1733 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001734 Py_ssize_t start, stop, step, slicelen, i;
1735 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001736 PyObject* list;
1737
1738 if (!self->extra)
1739 return PyList_New(0);
1740
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001741 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 return NULL;
1743 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001744 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1745 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746
1747 if (slicelen <= 0)
1748 return PyList_New(0);
1749 else {
1750 list = PyList_New(slicelen);
1751 if (!list)
1752 return NULL;
1753
1754 for (cur = start, i = 0; i < slicelen;
1755 cur += step, i++) {
1756 PyObject* item = self->extra->children[cur];
1757 Py_INCREF(item);
1758 PyList_SET_ITEM(list, i, item);
1759 }
1760
1761 return list;
1762 }
1763 }
1764 else {
1765 PyErr_SetString(PyExc_TypeError,
1766 "element indices must be integers");
1767 return NULL;
1768 }
1769}
1770
1771static int
1772element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1773{
1774 ElementObject* self = (ElementObject*) self_;
1775
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776 if (PyIndex_Check(item)) {
1777 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778
1779 if (i == -1 && PyErr_Occurred()) {
1780 return -1;
1781 }
1782 if (i < 0 && self->extra)
1783 i += self->extra->length;
1784 return element_setitem(self_, i, value);
1785 }
1786 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001787 Py_ssize_t start, stop, step, slicelen, newlen, i;
1788 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789
1790 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001791 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001792
Victor Stinner5f0af232013-07-11 23:01:36 +02001793 if (!self->extra) {
1794 if (create_extra(self, NULL) < 0)
1795 return -1;
1796 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001797
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001798 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799 return -1;
1800 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001801 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1802 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
Eli Bendersky865756a2012-03-09 13:38:15 +02001804 if (value == NULL) {
1805 /* Delete slice */
1806 size_t cur;
1807 Py_ssize_t i;
1808
1809 if (slicelen <= 0)
1810 return 0;
1811
1812 /* Since we're deleting, the direction of the range doesn't matter,
1813 * so for simplicity make it always ascending.
1814 */
1815 if (step < 0) {
1816 stop = start + 1;
1817 start = stop + step * (slicelen - 1) - 1;
1818 step = -step;
1819 }
1820
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001821 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001822
1823 /* recycle is a list that will contain all the children
1824 * scheduled for removal.
1825 */
1826 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001827 return -1;
1828 }
1829
1830 /* This loop walks over all the children that have to be deleted,
1831 * with cur pointing at them. num_moved is the amount of children
1832 * until the next deleted child that have to be "shifted down" to
1833 * occupy the deleted's places.
1834 * Note that in the ith iteration, shifting is done i+i places down
1835 * because i children were already removed.
1836 */
1837 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1838 /* Compute how many children have to be moved, clipping at the
1839 * list end.
1840 */
1841 Py_ssize_t num_moved = step - 1;
1842 if (cur + step >= (size_t)self->extra->length) {
1843 num_moved = self->extra->length - cur - 1;
1844 }
1845
1846 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1847
1848 memmove(
1849 self->extra->children + cur - i,
1850 self->extra->children + cur + 1,
1851 num_moved * sizeof(PyObject *));
1852 }
1853
1854 /* Leftover "tail" after the last removed child */
1855 cur = start + (size_t)slicelen * step;
1856 if (cur < (size_t)self->extra->length) {
1857 memmove(
1858 self->extra->children + cur - slicelen,
1859 self->extra->children + cur,
1860 (self->extra->length - cur) * sizeof(PyObject *));
1861 }
1862
1863 self->extra->length -= slicelen;
1864
1865 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001866 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001867 return 0;
1868 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869
1870 /* A new slice is actually being assigned */
1871 seq = PySequence_Fast(value, "");
1872 if (!seq) {
1873 PyErr_Format(
1874 PyExc_TypeError,
1875 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1876 );
1877 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001879 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880
1881 if (step != 1 && newlen != slicelen)
1882 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001883 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001885 "attempt to assign sequence of size %zd "
1886 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 newlen, slicelen
1888 );
1889 return -1;
1890 }
1891
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001892 /* Resize before creating the recycle bin, to prevent refleaks. */
1893 if (newlen > slicelen) {
1894 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001895 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896 return -1;
1897 }
1898 }
1899
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001900 for (i = 0; i < newlen; i++) {
1901 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1902 if (!Element_Check(element)) {
1903 raise_type_error(element);
1904 Py_DECREF(seq);
1905 return -1;
1906 }
1907 }
1908
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001909 if (slicelen > 0) {
1910 /* to avoid recursive calls to this method (via decref), move
1911 old items to the recycle bin here, and get rid of them when
1912 we're done modifying the element */
1913 recycle = PyList_New(slicelen);
1914 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001915 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001916 return -1;
1917 }
1918 for (cur = start, i = 0; i < slicelen;
1919 cur += step, i++)
1920 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1921 }
1922
1923 if (newlen < slicelen) {
1924 /* delete slice */
1925 for (i = stop; i < self->extra->length; i++)
1926 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1927 } else if (newlen > slicelen) {
1928 /* insert slice */
1929 for (i = self->extra->length-1; i >= stop; i--)
1930 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1931 }
1932
1933 /* replace the slice */
1934 for (cur = start, i = 0; i < newlen;
1935 cur += step, i++) {
1936 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1937 Py_INCREF(element);
1938 self->extra->children[cur] = element;
1939 }
1940
1941 self->extra->length += newlen - slicelen;
1942
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001943 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944
1945 /* discard the recycle bin, and everything in it */
1946 Py_XDECREF(recycle);
1947
1948 return 0;
1949 }
1950 else {
1951 PyErr_SetString(PyExc_TypeError,
1952 "element indices must be integers");
1953 return -1;
1954 }
1955}
1956
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001957static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001958element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001960 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001961 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 return res;
1963}
1964
Serhiy Storchakadde08152015-11-25 15:28:13 +02001965static PyObject*
1966element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001968 PyObject *res = element_get_text(self);
1969 Py_XINCREF(res);
1970 return res;
1971}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001972
Serhiy Storchakadde08152015-11-25 15:28:13 +02001973static PyObject*
1974element_tail_getter(ElementObject *self, void *closure)
1975{
1976 PyObject *res = element_get_tail(self);
1977 Py_XINCREF(res);
1978 return res;
1979}
1980
1981static PyObject*
1982element_attrib_getter(ElementObject *self, void *closure)
1983{
1984 PyObject *res;
1985 if (!self->extra) {
1986 if (create_extra(self, NULL) < 0)
1987 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001988 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001989 res = element_get_attrib(self);
1990 Py_XINCREF(res);
1991 return res;
1992}
Victor Stinner4d463432013-07-11 23:05:03 +02001993
Serhiy Storchakadde08152015-11-25 15:28:13 +02001994/* macro for setter validation */
1995#define _VALIDATE_ATTR_VALUE(V) \
1996 if ((V) == NULL) { \
1997 PyErr_SetString( \
1998 PyExc_AttributeError, \
1999 "can't delete element attribute"); \
2000 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002001 }
2002
Serhiy Storchakadde08152015-11-25 15:28:13 +02002003static int
2004element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2005{
2006 _VALIDATE_ATTR_VALUE(value);
2007 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002008 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002009 return 0;
2010}
2011
2012static int
2013element_text_setter(ElementObject *self, PyObject *value, void *closure)
2014{
2015 _VALIDATE_ATTR_VALUE(value);
2016 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002017 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002018 return 0;
2019}
2020
2021static int
2022element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2023{
2024 _VALIDATE_ATTR_VALUE(value);
2025 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002026 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002027 return 0;
2028}
2029
2030static int
2031element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2032{
2033 _VALIDATE_ATTR_VALUE(value);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002034 if (!PyDict_Check(value)) {
2035 PyErr_Format(PyExc_TypeError,
2036 "attrib must be dict, not %.200s",
2037 value->ob_type->tp_name);
2038 return -1;
2039 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002040 if (!self->extra) {
2041 if (create_extra(self, NULL) < 0)
2042 return -1;
2043 }
2044 Py_INCREF(value);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002045 Py_XSETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002046 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002047}
2048
2049static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002050 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002051 0, /* sq_concat */
2052 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002053 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002054 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002055 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002056 0,
2057};
2058
Eli Bendersky64d11e62012-06-15 07:42:50 +03002059/******************************* Element iterator ****************************/
2060
2061/* ElementIterObject represents the iteration state over an XML element in
2062 * pre-order traversal. To keep track of which sub-element should be returned
2063 * next, a stack of parents is maintained. This is a standard stack-based
2064 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002065 * The stack is managed using a continuous array.
2066 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067 * the current one is exhausted, and the next child to examine in that parent.
2068 */
2069typedef struct ParentLocator_t {
2070 ElementObject *parent;
2071 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002072} ParentLocator;
2073
2074typedef struct {
2075 PyObject_HEAD
2076 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077 Py_ssize_t parent_stack_used;
2078 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002079 ElementObject *root_element;
2080 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002081 int gettext;
2082} ElementIterObject;
2083
2084
2085static void
2086elementiter_dealloc(ElementIterObject *it)
2087{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002088 Py_ssize_t i = it->parent_stack_used;
2089 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002090 /* bpo-31095: UnTrack is needed before calling any callbacks */
2091 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002092 while (i--)
2093 Py_XDECREF(it->parent_stack[i].parent);
2094 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095
2096 Py_XDECREF(it->sought_tag);
2097 Py_XDECREF(it->root_element);
2098
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099 PyObject_GC_Del(it);
2100}
2101
2102static int
2103elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2104{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002105 Py_ssize_t i = it->parent_stack_used;
2106 while (i--)
2107 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002108
2109 Py_VISIT(it->root_element);
2110 Py_VISIT(it->sought_tag);
2111 return 0;
2112}
2113
2114/* Helper function for elementiter_next. Add a new parent to the parent stack.
2115 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002116static int
2117parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119 ParentLocator *item;
2120
2121 if (it->parent_stack_used >= it->parent_stack_size) {
2122 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2123 ParentLocator *parent_stack = it->parent_stack;
2124 PyMem_Resize(parent_stack, ParentLocator, new_size);
2125 if (parent_stack == NULL)
2126 return -1;
2127 it->parent_stack = parent_stack;
2128 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 item = it->parent_stack + it->parent_stack_used++;
2131 Py_INCREF(parent);
2132 item->parent = parent;
2133 item->child_index = 0;
2134 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002135}
2136
2137static PyObject *
2138elementiter_next(ElementIterObject *it)
2139{
2140 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002141 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142 * A short note on gettext: this function serves both the iter() and
2143 * itertext() methods to avoid code duplication. However, there are a few
2144 * small differences in the way these iterations work. Namely:
2145 * - itertext() only yields text from nodes that have it, and continues
2146 * iterating when a node doesn't have text (so it doesn't return any
2147 * node like iter())
2148 * - itertext() also has to handle tail, after finishing with all the
2149 * children of a node.
2150 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002151 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002152 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002153 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154
2155 while (1) {
2156 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002157 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158 * iterator is exhausted.
2159 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002160 if (!it->parent_stack_used) {
2161 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002162 PyErr_SetNone(PyExc_StopIteration);
2163 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002164 }
2165
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002166 elem = it->root_element; /* steals a reference */
2167 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002168 }
2169 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002170 /* See if there are children left to traverse in the current parent. If
2171 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002172 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002173 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2174 Py_ssize_t child_index = item->child_index;
2175 ElementObjectExtra *extra;
2176 elem = item->parent;
2177 extra = elem->extra;
2178 if (!extra || child_index >= extra->length) {
2179 it->parent_stack_used--;
2180 /* Note that extra condition on it->parent_stack_used here;
2181 * this is because itertext() is supposed to only return *inner*
2182 * text, not text following the element it began iteration with.
2183 */
2184 if (it->gettext && it->parent_stack_used) {
2185 text = element_get_tail(elem);
2186 goto gettext;
2187 }
2188 Py_DECREF(elem);
2189 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002190 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002191
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002192 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002193 elem = (ElementObject *)extra->children[child_index];
2194 item->child_index++;
2195 Py_INCREF(elem);
2196 }
2197
2198 if (parent_stack_push_new(it, elem) < 0) {
2199 Py_DECREF(elem);
2200 PyErr_NoMemory();
2201 return NULL;
2202 }
2203 if (it->gettext) {
2204 text = element_get_text(elem);
2205 goto gettext;
2206 }
2207
2208 if (it->sought_tag == Py_None)
2209 return (PyObject *)elem;
2210
2211 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2212 if (rc > 0)
2213 return (PyObject *)elem;
2214
2215 Py_DECREF(elem);
2216 if (rc < 0)
2217 return NULL;
2218 continue;
2219
2220gettext:
2221 if (!text) {
2222 Py_DECREF(elem);
2223 return NULL;
2224 }
2225 if (text == Py_None) {
2226 Py_DECREF(elem);
2227 }
2228 else {
2229 Py_INCREF(text);
2230 Py_DECREF(elem);
2231 rc = PyObject_IsTrue(text);
2232 if (rc > 0)
2233 return text;
2234 Py_DECREF(text);
2235 if (rc < 0)
2236 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237 }
2238 }
2239
2240 return NULL;
2241}
2242
2243
2244static PyTypeObject ElementIter_Type = {
2245 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002246 /* Using the module's name since the pure-Python implementation does not
2247 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002248 "_elementtree._element_iterator", /* tp_name */
2249 sizeof(ElementIterObject), /* tp_basicsize */
2250 0, /* tp_itemsize */
2251 /* methods */
2252 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002253 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254 0, /* tp_getattr */
2255 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002256 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 0, /* tp_repr */
2258 0, /* tp_as_number */
2259 0, /* tp_as_sequence */
2260 0, /* tp_as_mapping */
2261 0, /* tp_hash */
2262 0, /* tp_call */
2263 0, /* tp_str */
2264 0, /* tp_getattro */
2265 0, /* tp_setattro */
2266 0, /* tp_as_buffer */
2267 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2268 0, /* tp_doc */
2269 (traverseproc)elementiter_traverse, /* tp_traverse */
2270 0, /* tp_clear */
2271 0, /* tp_richcompare */
2272 0, /* tp_weaklistoffset */
2273 PyObject_SelfIter, /* tp_iter */
2274 (iternextfunc)elementiter_next, /* tp_iternext */
2275 0, /* tp_methods */
2276 0, /* tp_members */
2277 0, /* tp_getset */
2278 0, /* tp_base */
2279 0, /* tp_dict */
2280 0, /* tp_descr_get */
2281 0, /* tp_descr_set */
2282 0, /* tp_dictoffset */
2283 0, /* tp_init */
2284 0, /* tp_alloc */
2285 0, /* tp_new */
2286};
2287
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002288#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002289
2290static PyObject *
2291create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2292{
2293 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294
2295 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2296 if (!it)
2297 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002298
Victor Stinner4d463432013-07-11 23:05:03 +02002299 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002300 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002301 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002302 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002303 it->root_element = self;
2304
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002305 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002306 if (it->parent_stack == NULL) {
2307 Py_DECREF(it);
2308 PyErr_NoMemory();
2309 return NULL;
2310 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002311 it->parent_stack_used = 0;
2312 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002313
Victor Stinner1b184552019-10-08 00:09:31 +02002314 PyObject_GC_Track(it);
2315
Eli Bendersky64d11e62012-06-15 07:42:50 +03002316 return (PyObject *)it;
2317}
2318
2319
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320/* ==================================================================== */
2321/* the tree builder type */
2322
2323typedef struct {
2324 PyObject_HEAD
2325
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327
Antoine Pitrouee329312012-10-04 19:53:29 +02002328 PyObject *this; /* current node */
2329 PyObject *last; /* most recently created node */
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002330 PyObject *last_for_tail; /* most recently created node that takes a tail */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331
Eli Bendersky58d548d2012-05-29 15:45:16 +03002332 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002333
Eli Bendersky58d548d2012-05-29 15:45:16 +03002334 PyObject *stack; /* element stack */
2335 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336
Eli Bendersky48d358b2012-05-30 17:57:50 +03002337 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002338 PyObject *comment_factory;
2339 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002340
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002341 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002342 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002343 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2344 PyObject *end_event_obj;
2345 PyObject *start_ns_event_obj;
2346 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002347 PyObject *comment_event_obj;
2348 PyObject *pi_event_obj;
2349
2350 char insert_comments;
2351 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352} TreeBuilderObject;
2353
Andy Lesterdffe4c02020-03-04 07:15:20 -06002354#define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355
2356/* -------------------------------------------------------------------- */
2357/* constructor and destructor */
2358
Eli Bendersky58d548d2012-05-29 15:45:16 +03002359static PyObject *
2360treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002362 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2363 if (t != NULL) {
2364 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365
Eli Bendersky58d548d2012-05-29 15:45:16 +03002366 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002367 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002368 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002369 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370
Eli Bendersky58d548d2012-05-29 15:45:16 +03002371 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002372 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002373 t->comment_factory = NULL;
2374 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002375 t->stack = PyList_New(20);
2376 if (!t->stack) {
2377 Py_DECREF(t->this);
2378 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002379 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002380 return NULL;
2381 }
2382 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002384 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002385 t->start_event_obj = t->end_event_obj = NULL;
2386 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002387 t->comment_event_obj = t->pi_event_obj = NULL;
2388 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002389 }
2390 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391}
2392
Serhiy Storchakacb985562015-05-04 15:32:48 +03002393/*[clinic input]
2394_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002395
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002396 element_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002397 *
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002398 comment_factory: object = None
2399 pi_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002400 insert_comments: bool = False
2401 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002402
2403[clinic start generated code]*/
2404
2405static int
2406_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002407 PyObject *element_factory,
2408 PyObject *comment_factory,
2409 PyObject *pi_factory,
2410 int insert_comments, int insert_pis)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002411/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002412{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002413 if (element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002414 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002415 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002416 } else {
2417 Py_CLEAR(self->element_factory);
2418 }
2419
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002420 if (comment_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002421 elementtreestate *st = ET_STATE_GLOBAL;
2422 comment_factory = st->comment_factory;
2423 }
2424 if (comment_factory) {
2425 Py_INCREF(comment_factory);
2426 Py_XSETREF(self->comment_factory, comment_factory);
2427 self->insert_comments = insert_comments;
2428 } else {
2429 Py_CLEAR(self->comment_factory);
2430 self->insert_comments = 0;
2431 }
2432
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002433 if (pi_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002434 elementtreestate *st = ET_STATE_GLOBAL;
2435 pi_factory = st->pi_factory;
2436 }
2437 if (pi_factory) {
2438 Py_INCREF(pi_factory);
2439 Py_XSETREF(self->pi_factory, pi_factory);
2440 self->insert_pis = insert_pis;
2441 } else {
2442 Py_CLEAR(self->pi_factory);
2443 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002444 }
2445
Eli Bendersky58d548d2012-05-29 15:45:16 +03002446 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447}
2448
Eli Bendersky48d358b2012-05-30 17:57:50 +03002449static int
2450treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2451{
Stefan Behnel43851a22019-05-01 21:20:38 +02002452 Py_VISIT(self->pi_event_obj);
2453 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002454 Py_VISIT(self->end_ns_event_obj);
2455 Py_VISIT(self->start_ns_event_obj);
2456 Py_VISIT(self->end_event_obj);
2457 Py_VISIT(self->start_event_obj);
2458 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002459 Py_VISIT(self->root);
2460 Py_VISIT(self->this);
2461 Py_VISIT(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002462 Py_VISIT(self->last_for_tail);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002463 Py_VISIT(self->data);
2464 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002465 Py_VISIT(self->pi_factory);
2466 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002467 Py_VISIT(self->element_factory);
2468 return 0;
2469}
2470
2471static int
2472treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473{
Stefan Behnel43851a22019-05-01 21:20:38 +02002474 Py_CLEAR(self->pi_event_obj);
2475 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002476 Py_CLEAR(self->end_ns_event_obj);
2477 Py_CLEAR(self->start_ns_event_obj);
2478 Py_CLEAR(self->end_event_obj);
2479 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002480 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002481 Py_CLEAR(self->stack);
2482 Py_CLEAR(self->data);
2483 Py_CLEAR(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002484 Py_CLEAR(self->last_for_tail);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002485 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002486 Py_CLEAR(self->pi_factory);
2487 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002488 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002489 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002490 return 0;
2491}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492
Eli Bendersky48d358b2012-05-30 17:57:50 +03002493static void
2494treebuilder_dealloc(TreeBuilderObject *self)
2495{
2496 PyObject_GC_UnTrack(self);
2497 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002498 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499}
2500
2501/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002502/* helpers for handling of arbitrary element-like objects */
2503
Stefan Behnel43851a22019-05-01 21:20:38 +02002504/*[clinic input]
2505_elementtree._set_factories
2506
2507 comment_factory: object
2508 pi_factory: object
2509 /
2510
2511Change the factories used to create comments and processing instructions.
2512
2513For internal use only.
2514[clinic start generated code]*/
2515
2516static PyObject *
2517_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2518 PyObject *pi_factory)
2519/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2520{
2521 elementtreestate *st = ET_STATE_GLOBAL;
2522 PyObject *old;
2523
2524 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2525 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2526 Py_TYPE(comment_factory)->tp_name);
2527 return NULL;
2528 }
2529 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2530 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2531 Py_TYPE(pi_factory)->tp_name);
2532 return NULL;
2533 }
2534
2535 old = PyTuple_Pack(2,
2536 st->comment_factory ? st->comment_factory : Py_None,
2537 st->pi_factory ? st->pi_factory : Py_None);
2538
2539 if (comment_factory == Py_None) {
2540 Py_CLEAR(st->comment_factory);
2541 } else {
2542 Py_INCREF(comment_factory);
2543 Py_XSETREF(st->comment_factory, comment_factory);
2544 }
2545 if (pi_factory == Py_None) {
2546 Py_CLEAR(st->pi_factory);
2547 } else {
2548 Py_INCREF(pi_factory);
2549 Py_XSETREF(st->pi_factory, pi_factory);
2550 }
2551
2552 return old;
2553}
2554
Antoine Pitrouee329312012-10-04 19:53:29 +02002555static int
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002556treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2557 PyObject **dest, _Py_Identifier *name)
Antoine Pitrouee329312012-10-04 19:53:29 +02002558{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002559 /* Fast paths for the "almost always" cases. */
Antoine Pitrouee329312012-10-04 19:53:29 +02002560 if (Element_CheckExact(element)) {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002561 PyObject *dest_obj = JOIN_OBJ(*dest);
2562 if (dest_obj == Py_None) {
2563 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2564 *data = NULL;
2565 Py_DECREF(dest_obj);
2566 return 0;
2567 }
2568 else if (JOIN_GET(*dest)) {
2569 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2570 return -1;
2571 }
2572 Py_CLEAR(*data);
2573 return 0;
2574 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002575 }
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002576
2577 /* Fallback for the non-Element / non-trivial cases. */
2578 {
Antoine Pitrouee329312012-10-04 19:53:29 +02002579 int r;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002580 PyObject* joined;
2581 PyObject* previous = _PyObject_GetAttrId(element, name);
2582 if (!previous)
Antoine Pitrouee329312012-10-04 19:53:29 +02002583 return -1;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002584 joined = list_join(*data);
2585 if (!joined) {
2586 Py_DECREF(previous);
2587 return -1;
2588 }
2589 if (previous != Py_None) {
2590 PyObject *tmp = PyNumber_Add(previous, joined);
2591 Py_DECREF(joined);
2592 Py_DECREF(previous);
2593 if (!tmp)
2594 return -1;
2595 joined = tmp;
2596 } else {
2597 Py_DECREF(previous);
2598 }
2599
Antoine Pitrouee329312012-10-04 19:53:29 +02002600 r = _PyObject_SetAttrId(element, name, joined);
2601 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002602 if (r < 0)
2603 return -1;
2604 Py_CLEAR(*data);
2605 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002606 }
2607}
2608
Serhiy Storchaka576def02017-03-30 09:47:31 +03002609LOCAL(int)
2610treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002611{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002612 if (!self->data) {
2613 return 0;
2614 }
2615
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002616 if (!self->last_for_tail) {
2617 PyObject *element = self->last;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002618 _Py_IDENTIFIER(text);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002619 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002620 element, &self->data,
2621 &((ElementObject *) element)->text, &PyId_text);
2622 }
2623 else {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002624 PyObject *element = self->last_for_tail;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002625 _Py_IDENTIFIER(tail);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002626 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002627 element, &self->data,
2628 &((ElementObject *) element)->tail, &PyId_tail);
2629 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002630}
2631
2632static int
2633treebuilder_add_subelement(PyObject *element, PyObject *child)
2634{
2635 _Py_IDENTIFIER(append);
2636 if (Element_CheckExact(element)) {
2637 ElementObject *elem = (ElementObject *) element;
2638 return element_add_subelement(elem, child);
2639 }
2640 else {
2641 PyObject *res;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002642 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
Antoine Pitrouee329312012-10-04 19:53:29 +02002643 if (res == NULL)
2644 return -1;
2645 Py_DECREF(res);
2646 return 0;
2647 }
2648}
2649
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002650LOCAL(int)
2651treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2652 PyObject *node)
2653{
2654 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002655 PyObject *res;
2656 PyObject *event = PyTuple_Pack(2, action, node);
2657 if (event == NULL)
2658 return -1;
Petr Viktorinffd97532020-02-11 17:46:57 +01002659 res = PyObject_CallOneArg(self->events_append, event);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002660 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002661 if (res == NULL)
2662 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002663 Py_DECREF(res);
2664 }
2665 return 0;
2666}
2667
Antoine Pitrouee329312012-10-04 19:53:29 +02002668/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669/* handlers */
2670
2671LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2673 PyObject* attrib)
2674{
2675 PyObject* node;
2676 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002677 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678
Serhiy Storchaka576def02017-03-30 09:47:31 +03002679 if (treebuilder_flush_data(self) < 0) {
2680 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681 }
2682
Stefan Behnel43851a22019-05-01 21:20:38 +02002683 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002684 node = create_new_element(tag, attrib);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002685 } else if (attrib == NULL) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002686 attrib = PyDict_New();
2687 if (!attrib)
2688 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002689 node = PyObject_CallFunctionObjArgs(self->element_factory,
2690 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002691 Py_DECREF(attrib);
2692 }
2693 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002694 node = PyObject_CallFunctionObjArgs(self->element_factory,
2695 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002696 }
2697 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002699 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700
Antoine Pitrouee329312012-10-04 19:53:29 +02002701 this = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002702 Py_CLEAR(self->last_for_tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703
2704 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002705 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002706 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 } else {
2708 if (self->root) {
2709 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002710 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711 "multiple elements on top level"
2712 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002713 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714 }
2715 Py_INCREF(node);
2716 self->root = node;
2717 }
2718
2719 if (self->index < PyList_GET_SIZE(self->stack)) {
2720 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002721 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722 Py_INCREF(this);
2723 } else {
2724 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002725 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726 }
2727 self->index++;
2728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002730 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002732 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002734 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2735 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
2737 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002738
2739 error:
2740 Py_DECREF(node);
2741 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742}
2743
2744LOCAL(PyObject*)
2745treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2746{
2747 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002748 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002749 /* ignore calls to data before the first call to start */
2750 Py_RETURN_NONE;
2751 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752 /* store the first item as is */
2753 Py_INCREF(data); self->data = data;
2754 } else {
2755 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002756 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2757 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002758 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 /* expat often generates single character data sections; handle
2760 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002761 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2762 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002764 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 } else if (PyList_CheckExact(self->data)) {
2766 if (PyList_Append(self->data, data) < 0)
2767 return NULL;
2768 } else {
2769 PyObject* list = PyList_New(2);
2770 if (!list)
2771 return NULL;
2772 PyList_SET_ITEM(list, 0, self->data);
2773 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2774 self->data = list;
2775 }
2776 }
2777
2778 Py_RETURN_NONE;
2779}
2780
2781LOCAL(PyObject*)
2782treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2783{
2784 PyObject* item;
2785
Serhiy Storchaka576def02017-03-30 09:47:31 +03002786 if (treebuilder_flush_data(self) < 0) {
2787 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 }
2789
2790 if (self->index == 0) {
2791 PyErr_SetString(
2792 PyExc_IndexError,
2793 "pop from empty stack"
2794 );
2795 return NULL;
2796 }
2797
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002798 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002799 self->last = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002800 Py_INCREF(self->last);
2801 Py_XSETREF(self->last_for_tail, self->last);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002802 self->index--;
2803 self->this = PyList_GET_ITEM(self->stack, self->index);
2804 Py_INCREF(self->this);
2805 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002807 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2808 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809
2810 Py_INCREF(self->last);
2811 return (PyObject*) self->last;
2812}
2813
Stefan Behnel43851a22019-05-01 21:20:38 +02002814LOCAL(PyObject*)
2815treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2816{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002817 PyObject* comment;
Stefan Behnel43851a22019-05-01 21:20:38 +02002818 PyObject* this;
2819
2820 if (treebuilder_flush_data(self) < 0) {
2821 return NULL;
2822 }
2823
2824 if (self->comment_factory) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002825 comment = PyObject_CallOneArg(self->comment_factory, text);
Stefan Behnel43851a22019-05-01 21:20:38 +02002826 if (!comment)
2827 return NULL;
2828
2829 this = self->this;
2830 if (self->insert_comments && this != Py_None) {
2831 if (treebuilder_add_subelement(this, comment) < 0)
2832 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002833 Py_INCREF(comment);
2834 Py_XSETREF(self->last_for_tail, comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02002835 }
2836 } else {
2837 Py_INCREF(text);
2838 comment = text;
2839 }
2840
2841 if (self->events_append && self->comment_event_obj) {
2842 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2843 goto error;
2844 }
2845
2846 return comment;
2847
2848 error:
2849 Py_DECREF(comment);
2850 return NULL;
2851}
2852
2853LOCAL(PyObject*)
2854treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2855{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002856 PyObject* pi;
Stefan Behnel43851a22019-05-01 21:20:38 +02002857 PyObject* this;
2858 PyObject* stack[2] = {target, text};
2859
2860 if (treebuilder_flush_data(self) < 0) {
2861 return NULL;
2862 }
2863
2864 if (self->pi_factory) {
2865 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2866 if (!pi) {
2867 return NULL;
2868 }
2869
2870 this = self->this;
2871 if (self->insert_pis && this != Py_None) {
2872 if (treebuilder_add_subelement(this, pi) < 0)
2873 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002874 Py_INCREF(pi);
2875 Py_XSETREF(self->last_for_tail, pi);
Stefan Behnel43851a22019-05-01 21:20:38 +02002876 }
2877 } else {
2878 pi = PyTuple_Pack(2, target, text);
2879 if (!pi) {
2880 return NULL;
2881 }
2882 }
2883
2884 if (self->events_append && self->pi_event_obj) {
2885 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2886 goto error;
2887 }
2888
2889 return pi;
2890
2891 error:
2892 Py_DECREF(pi);
2893 return NULL;
2894}
2895
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002896LOCAL(PyObject*)
2897treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2898{
2899 PyObject* parcel;
2900
2901 if (self->events_append && self->start_ns_event_obj) {
2902 parcel = PyTuple_Pack(2, prefix, uri);
2903 if (!parcel) {
2904 return NULL;
2905 }
2906
2907 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2908 Py_DECREF(parcel);
2909 return NULL;
2910 }
2911 Py_DECREF(parcel);
2912 }
2913
2914 Py_RETURN_NONE;
2915}
2916
2917LOCAL(PyObject*)
2918treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2919{
2920 if (self->events_append && self->end_ns_event_obj) {
2921 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2922 return NULL;
2923 }
2924 }
2925
2926 Py_RETURN_NONE;
2927}
2928
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929/* -------------------------------------------------------------------- */
2930/* methods (in alphabetical order) */
2931
Serhiy Storchakacb985562015-05-04 15:32:48 +03002932/*[clinic input]
2933_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934
Serhiy Storchakacb985562015-05-04 15:32:48 +03002935 data: object
2936 /
2937
2938[clinic start generated code]*/
2939
2940static PyObject *
2941_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2942/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2943{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 return treebuilder_handle_data(self, data);
2945}
2946
Serhiy Storchakacb985562015-05-04 15:32:48 +03002947/*[clinic input]
2948_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949
Serhiy Storchakacb985562015-05-04 15:32:48 +03002950 tag: object
2951 /
2952
2953[clinic start generated code]*/
2954
2955static PyObject *
2956_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2957/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2958{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 return treebuilder_handle_end(self, tag);
2960}
2961
Stefan Behnel43851a22019-05-01 21:20:38 +02002962/*[clinic input]
2963_elementtree.TreeBuilder.comment
2964
2965 text: object
2966 /
2967
2968[clinic start generated code]*/
2969
2970static PyObject *
2971_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2972/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2973{
2974 return treebuilder_handle_comment(self, text);
2975}
2976
2977/*[clinic input]
2978_elementtree.TreeBuilder.pi
2979
2980 target: object
2981 text: object = None
2982 /
2983
2984[clinic start generated code]*/
2985
2986static PyObject *
2987_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2988 PyObject *text)
2989/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2990{
2991 return treebuilder_handle_pi(self, target, text);
2992}
2993
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994LOCAL(PyObject*)
2995treebuilder_done(TreeBuilderObject* self)
2996{
2997 PyObject* res;
2998
2999 /* FIXME: check stack size? */
3000
3001 if (self->root)
3002 res = self->root;
3003 else
3004 res = Py_None;
3005
3006 Py_INCREF(res);
3007 return res;
3008}
3009
Serhiy Storchakacb985562015-05-04 15:32:48 +03003010/*[clinic input]
3011_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012
Serhiy Storchakacb985562015-05-04 15:32:48 +03003013[clinic start generated code]*/
3014
3015static PyObject *
3016_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3017/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3018{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019 return treebuilder_done(self);
3020}
3021
Serhiy Storchakacb985562015-05-04 15:32:48 +03003022/*[clinic input]
3023_elementtree.TreeBuilder.start
3024
3025 tag: object
Shantanu4edc95c2020-03-01 22:33:24 -08003026 attrs: object(subclass_of='&PyDict_Type')
Serhiy Storchakacb985562015-05-04 15:32:48 +03003027 /
3028
3029[clinic start generated code]*/
3030
3031static PyObject *
3032_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3033 PyObject *attrs)
Shantanu4edc95c2020-03-01 22:33:24 -08003034/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003036 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037}
3038
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039/* ==================================================================== */
3040/* the expat interface */
3041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003044
3045/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3046 * cached globally without being in per-module state.
3047 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003048static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003049#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050
Eli Bendersky52467b12012-06-01 07:13:08 +03003051static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3052 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3053
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054typedef struct {
3055 PyObject_HEAD
3056
3057 XML_Parser parser;
3058
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003059 PyObject *target;
3060 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003062 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003064 PyObject *handle_start_ns;
3065 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003066 PyObject *handle_start;
3067 PyObject *handle_data;
3068 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003070 PyObject *handle_comment;
3071 PyObject *handle_pi;
3072 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003074 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003075
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076} XMLParserObject;
3077
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003078/* helpers */
3079
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003080LOCAL(PyObject*)
3081makeuniversal(XMLParserObject* self, const char* string)
3082{
3083 /* convert a UTF-8 tag/attribute name from the expat parser
3084 to a universal name string */
3085
Antoine Pitrouc1948842012-10-01 23:40:37 +02003086 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 PyObject* key;
3088 PyObject* value;
3089
3090 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003091 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092 if (!key)
3093 return NULL;
3094
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003095 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096
3097 if (value) {
3098 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003099 }
3100 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101 /* new name. convert to universal name, and decode as
3102 necessary */
3103
3104 PyObject* tag;
3105 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003106 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107
3108 /* look for namespace separator */
3109 for (i = 0; i < size; i++)
3110 if (string[i] == '}')
3111 break;
3112 if (i != size) {
3113 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003114 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003115 if (tag == NULL) {
3116 Py_DECREF(key);
3117 return NULL;
3118 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003119 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120 p[0] = '{';
3121 memcpy(p+1, string, size);
3122 size++;
3123 } else {
3124 /* plain name; use key as tag */
3125 Py_INCREF(key);
3126 tag = key;
3127 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003128
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003130 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003131 value = PyUnicode_DecodeUTF8(p, size, "strict");
3132 Py_DECREF(tag);
3133 if (!value) {
3134 Py_DECREF(key);
3135 return NULL;
3136 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003137
3138 /* add to names dictionary */
3139 if (PyDict_SetItem(self->names, key, value) < 0) {
3140 Py_DECREF(key);
3141 Py_DECREF(value);
3142 return NULL;
3143 }
3144 }
3145
3146 Py_DECREF(key);
3147 return value;
3148}
3149
Eli Bendersky5b77d812012-03-16 08:20:05 +02003150/* Set the ParseError exception with the given parameters.
3151 * If message is not NULL, it's used as the error string. Otherwise, the
3152 * message string is the default for the given error_code.
3153*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003154static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003155expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3156 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003157{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003158 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003159 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003160
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003161 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003162 message ? message : EXPAT(ErrorString)(error_code),
3163 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003164 if (errmsg == NULL)
3165 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003166
Petr Viktorinffd97532020-02-11 17:46:57 +01003167 error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003168 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003169 if (!error)
3170 return;
3171
Eli Bendersky5b77d812012-03-16 08:20:05 +02003172 /* Add code and position attributes */
3173 code = PyLong_FromLong((long)error_code);
3174 if (!code) {
3175 Py_DECREF(error);
3176 return;
3177 }
3178 if (PyObject_SetAttrString(error, "code", code) == -1) {
3179 Py_DECREF(error);
3180 Py_DECREF(code);
3181 return;
3182 }
3183 Py_DECREF(code);
3184
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003185 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003186 if (!position) {
3187 Py_DECREF(error);
3188 return;
3189 }
3190 if (PyObject_SetAttrString(error, "position", position) == -1) {
3191 Py_DECREF(error);
3192 Py_DECREF(position);
3193 return;
3194 }
3195 Py_DECREF(position);
3196
Eli Bendersky532d03e2013-08-10 08:00:39 -07003197 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003198 Py_DECREF(error);
3199}
3200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201/* -------------------------------------------------------------------- */
3202/* handlers */
3203
3204static void
3205expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3206 int data_len)
3207{
3208 PyObject* key;
3209 PyObject* value;
3210 PyObject* res;
3211
3212 if (data_len < 2 || data_in[0] != '&')
3213 return;
3214
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003215 if (PyErr_Occurred())
3216 return;
3217
Neal Norwitz0269b912007-08-08 06:56:02 +00003218 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219 if (!key)
3220 return;
3221
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003222 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223
3224 if (value) {
3225 if (TreeBuilder_CheckExact(self->target))
3226 res = treebuilder_handle_data(
3227 (TreeBuilderObject*) self->target, value
3228 );
3229 else if (self->handle_data)
Petr Viktorinffd97532020-02-11 17:46:57 +01003230 res = PyObject_CallOneArg(self->handle_data, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 else
3232 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003234 } else if (!PyErr_Occurred()) {
3235 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003236 char message[128] = "undefined entity ";
3237 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003238 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003239 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003241 EXPAT(GetErrorColumnNumber)(self->parser),
3242 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 );
3244 }
3245
3246 Py_DECREF(key);
3247}
3248
3249static void
3250expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3251 const XML_Char **attrib_in)
3252{
3253 PyObject* res;
3254 PyObject* tag;
3255 PyObject* attrib;
3256 int ok;
3257
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003258 if (PyErr_Occurred())
3259 return;
3260
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 /* tag name */
3262 tag = makeuniversal(self, tag_in);
3263 if (!tag)
3264 return; /* parser will look for errors */
3265
3266 /* attributes */
3267 if (attrib_in[0]) {
3268 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003269 if (!attrib) {
3270 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003272 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 while (attrib_in[0] && attrib_in[1]) {
3274 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003275 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 if (!key || !value) {
3277 Py_XDECREF(value);
3278 Py_XDECREF(key);
3279 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003280 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 return;
3282 }
3283 ok = PyDict_SetItem(attrib, key, value);
3284 Py_DECREF(value);
3285 Py_DECREF(key);
3286 if (ok < 0) {
3287 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003288 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 return;
3290 }
3291 attrib_in += 2;
3292 }
3293 } else {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003294 attrib = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003295 }
3296
3297 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 /* shortcut */
3299 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3300 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003301 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003302 else if (self->handle_start) {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003303 if (attrib == NULL) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003304 attrib = PyDict_New();
3305 if (!attrib) {
3306 Py_DECREF(tag);
3307 return;
3308 }
3309 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003310 res = PyObject_CallFunctionObjArgs(self->handle_start,
3311 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003312 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313 res = NULL;
3314
3315 Py_DECREF(tag);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003316 Py_XDECREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317
3318 Py_XDECREF(res);
3319}
3320
3321static void
3322expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3323 int data_len)
3324{
3325 PyObject* data;
3326 PyObject* res;
3327
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003328 if (PyErr_Occurred())
3329 return;
3330
Neal Norwitz0269b912007-08-08 06:56:02 +00003331 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003332 if (!data)
3333 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334
3335 if (TreeBuilder_CheckExact(self->target))
3336 /* shortcut */
3337 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3338 else if (self->handle_data)
Petr Viktorinffd97532020-02-11 17:46:57 +01003339 res = PyObject_CallOneArg(self->handle_data, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 else
3341 res = NULL;
3342
3343 Py_DECREF(data);
3344
3345 Py_XDECREF(res);
3346}
3347
3348static void
3349expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3350{
3351 PyObject* tag;
3352 PyObject* res = NULL;
3353
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003354 if (PyErr_Occurred())
3355 return;
3356
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 if (TreeBuilder_CheckExact(self->target))
3358 /* shortcut */
3359 /* the standard tree builder doesn't look at the end tag */
3360 res = treebuilder_handle_end(
3361 (TreeBuilderObject*) self->target, Py_None
3362 );
3363 else if (self->handle_end) {
3364 tag = makeuniversal(self, tag_in);
3365 if (tag) {
Petr Viktorinffd97532020-02-11 17:46:57 +01003366 res = PyObject_CallOneArg(self->handle_end, tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 Py_DECREF(tag);
3368 }
3369 }
3370
3371 Py_XDECREF(res);
3372}
3373
3374static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003375expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3376 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003378 PyObject* res = NULL;
3379 PyObject* uri;
3380 PyObject* prefix;
3381 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003382
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003383 if (PyErr_Occurred())
3384 return;
3385
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003386 if (!uri_in)
3387 uri_in = "";
3388 if (!prefix_in)
3389 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003390
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003391 if (TreeBuilder_CheckExact(self->target)) {
3392 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3393 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003394
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003395 if (target->events_append && target->start_ns_event_obj) {
3396 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3397 if (!prefix)
3398 return;
3399 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3400 if (!uri) {
3401 Py_DECREF(prefix);
3402 return;
3403 }
3404
3405 res = treebuilder_handle_start_ns(target, prefix, uri);
3406 Py_DECREF(uri);
3407 Py_DECREF(prefix);
3408 }
3409 } else if (self->handle_start_ns) {
3410 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3411 if (!prefix)
3412 return;
3413 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3414 if (!uri) {
3415 Py_DECREF(prefix);
3416 return;
3417 }
3418
3419 stack[0] = prefix;
3420 stack[1] = uri;
3421 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3422 Py_DECREF(uri);
3423 Py_DECREF(prefix);
3424 }
3425
3426 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427}
3428
3429static void
3430expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3431{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003432 PyObject *res = NULL;
3433 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003434
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003435 if (PyErr_Occurred())
3436 return;
3437
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003438 if (!prefix_in)
3439 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003440
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003441 if (TreeBuilder_CheckExact(self->target)) {
3442 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3443 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3444
3445 if (target->events_append && target->end_ns_event_obj) {
3446 res = treebuilder_handle_end_ns(target, Py_None);
3447 }
3448 } else if (self->handle_end_ns) {
3449 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3450 if (!prefix)
3451 return;
3452
Petr Viktorinffd97532020-02-11 17:46:57 +01003453 res = PyObject_CallOneArg(self->handle_end_ns, prefix);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003454 Py_DECREF(prefix);
3455 }
3456
3457 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458}
3459
3460static void
3461expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3462{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003463 PyObject* comment;
3464 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003466 if (PyErr_Occurred())
3467 return;
3468
Stefan Behnel43851a22019-05-01 21:20:38 +02003469 if (TreeBuilder_CheckExact(self->target)) {
3470 /* shortcut */
3471 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3472
Neal Norwitz0269b912007-08-08 06:56:02 +00003473 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003474 if (!comment)
3475 return; /* parser will look for errors */
3476
3477 res = treebuilder_handle_comment(target, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003478 Py_XDECREF(res);
3479 Py_DECREF(comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02003480 } else if (self->handle_comment) {
3481 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3482 if (!comment)
3483 return;
3484
Petr Viktorinffd97532020-02-11 17:46:57 +01003485 res = PyObject_CallOneArg(self->handle_comment, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003486 Py_XDECREF(res);
3487 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488 }
3489}
3490
Eli Bendersky45839902013-01-13 05:14:47 -08003491static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003492expat_start_doctype_handler(XMLParserObject *self,
3493 const XML_Char *doctype_name,
3494 const XML_Char *sysid,
3495 const XML_Char *pubid,
3496 int has_internal_subset)
3497{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003498 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003499 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003500 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003501
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003502 if (PyErr_Occurred())
3503 return;
3504
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003505 doctype_name_obj = makeuniversal(self, doctype_name);
3506 if (!doctype_name_obj)
3507 return;
3508
3509 if (sysid) {
3510 sysid_obj = makeuniversal(self, sysid);
3511 if (!sysid_obj) {
3512 Py_DECREF(doctype_name_obj);
3513 return;
3514 }
3515 } else {
3516 Py_INCREF(Py_None);
3517 sysid_obj = Py_None;
3518 }
3519
3520 if (pubid) {
3521 pubid_obj = makeuniversal(self, pubid);
3522 if (!pubid_obj) {
3523 Py_DECREF(doctype_name_obj);
3524 Py_DECREF(sysid_obj);
3525 return;
3526 }
3527 } else {
3528 Py_INCREF(Py_None);
3529 pubid_obj = Py_None;
3530 }
3531
3532 /* If the target has a handler for doctype, call it. */
3533 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003534 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3535 doctype_name_obj, pubid_obj,
3536 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003537 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003538 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003539 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3540 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3541 "The doctype() method of XMLParser is ignored. "
3542 "Define doctype() method on the TreeBuilder target.",
3543 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003544 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003545 }
3546
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003547 Py_DECREF(doctype_name_obj);
3548 Py_DECREF(pubid_obj);
3549 Py_DECREF(sysid_obj);
3550}
3551
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552static void
3553expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3554 const XML_Char* data_in)
3555{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003556 PyObject* pi_target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003557 PyObject* data;
3558 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003559 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003560
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003561 if (PyErr_Occurred())
3562 return;
3563
Stefan Behnel43851a22019-05-01 21:20:38 +02003564 if (TreeBuilder_CheckExact(self->target)) {
3565 /* shortcut */
3566 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3567
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003568 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003569 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3570 if (!pi_target)
3571 goto error;
3572 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3573 if (!data)
3574 goto error;
3575 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003576 Py_XDECREF(res);
3577 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003578 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003580 } else if (self->handle_pi) {
3581 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3582 if (!pi_target)
3583 goto error;
3584 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3585 if (!data)
3586 goto error;
3587
3588 stack[0] = pi_target;
3589 stack[1] = data;
3590 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3591 Py_XDECREF(res);
3592 Py_DECREF(data);
3593 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003595
3596 return;
3597
3598 error:
3599 Py_XDECREF(pi_target);
3600 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003601}
3602
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
Eli Bendersky52467b12012-06-01 07:13:08 +03003605static PyObject *
3606xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607{
Eli Bendersky52467b12012-06-01 07:13:08 +03003608 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3609 if (self) {
3610 self->parser = NULL;
3611 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003612 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003613 self->handle_start = self->handle_data = self->handle_end = NULL;
3614 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003615 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003617 return (PyObject *)self;
3618}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619
scoderc8d8e152017-09-14 22:00:03 +02003620static int
3621ignore_attribute_error(PyObject *value)
3622{
3623 if (value == NULL) {
3624 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3625 return -1;
3626 }
3627 PyErr_Clear();
3628 }
3629 return 0;
3630}
3631
Serhiy Storchakacb985562015-05-04 15:32:48 +03003632/*[clinic input]
3633_elementtree.XMLParser.__init__
3634
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003635 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003636 target: object = NULL
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003637 encoding: str(accept={str, NoneType}) = None
Serhiy Storchakacb985562015-05-04 15:32:48 +03003638
3639[clinic start generated code]*/
3640
Eli Bendersky52467b12012-06-01 07:13:08 +03003641static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003642_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3643 const char *encoding)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003644/*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003645{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003646 self->entity = PyDict_New();
3647 if (!self->entity)
3648 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003649
Serhiy Storchakacb985562015-05-04 15:32:48 +03003650 self->names = PyDict_New();
3651 if (!self->names) {
3652 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003653 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003654 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003655
Serhiy Storchakacb985562015-05-04 15:32:48 +03003656 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3657 if (!self->parser) {
3658 Py_CLEAR(self->entity);
3659 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003661 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003663 /* expat < 2.1.0 has no XML_SetHashSalt() */
3664 if (EXPAT(SetHashSalt) != NULL) {
3665 EXPAT(SetHashSalt)(self->parser,
3666 (unsigned long)_Py_HashSecret.expat.hashsalt);
3667 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668
Eli Bendersky52467b12012-06-01 07:13:08 +03003669 if (target) {
3670 Py_INCREF(target);
3671 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003672 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003674 Py_CLEAR(self->entity);
3675 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003676 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003678 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003679 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003680
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003681 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3682 if (ignore_attribute_error(self->handle_start_ns)) {
3683 return -1;
3684 }
3685 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3686 if (ignore_attribute_error(self->handle_end_ns)) {
3687 return -1;
3688 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003689 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003690 if (ignore_attribute_error(self->handle_start)) {
3691 return -1;
3692 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003693 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003694 if (ignore_attribute_error(self->handle_data)) {
3695 return -1;
3696 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003697 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003698 if (ignore_attribute_error(self->handle_end)) {
3699 return -1;
3700 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003701 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003702 if (ignore_attribute_error(self->handle_comment)) {
3703 return -1;
3704 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003705 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003706 if (ignore_attribute_error(self->handle_pi)) {
3707 return -1;
3708 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003709 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003710 if (ignore_attribute_error(self->handle_close)) {
3711 return -1;
3712 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003713 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003714 if (ignore_attribute_error(self->handle_doctype)) {
3715 return -1;
3716 }
Eli Bendersky45839902013-01-13 05:14:47 -08003717
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003718 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003719 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003720 if (self->handle_start_ns || self->handle_end_ns)
3721 EXPAT(SetNamespaceDeclHandler)(
3722 self->parser,
3723 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3724 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3725 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003726 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003727 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003728 (XML_StartElementHandler) expat_start_handler,
3729 (XML_EndElementHandler) expat_end_handler
3730 );
3731 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003732 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733 (XML_DefaultHandler) expat_default_handler
3734 );
3735 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003736 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003737 (XML_CharacterDataHandler) expat_data_handler
3738 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003739 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003740 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003741 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003742 (XML_CommentHandler) expat_comment_handler
3743 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003744 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003745 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003746 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003747 (XML_ProcessingInstructionHandler) expat_pi_handler
3748 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003749 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003750 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003751 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3752 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003753 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003754 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003755 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003756 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003757
Eli Bendersky52467b12012-06-01 07:13:08 +03003758 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003759}
3760
Eli Bendersky52467b12012-06-01 07:13:08 +03003761static int
3762xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3763{
3764 Py_VISIT(self->handle_close);
3765 Py_VISIT(self->handle_pi);
3766 Py_VISIT(self->handle_comment);
3767 Py_VISIT(self->handle_end);
3768 Py_VISIT(self->handle_data);
3769 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003770 Py_VISIT(self->handle_start_ns);
3771 Py_VISIT(self->handle_end_ns);
3772 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003773
3774 Py_VISIT(self->target);
3775 Py_VISIT(self->entity);
3776 Py_VISIT(self->names);
3777
3778 return 0;
3779}
3780
3781static int
3782xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003783{
Victor Stinnere727d412017-09-18 05:29:37 -07003784 if (self->parser != NULL) {
3785 XML_Parser parser = self->parser;
3786 self->parser = NULL;
3787 EXPAT(ParserFree)(parser);
3788 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003789
Antoine Pitrouc1948842012-10-01 23:40:37 +02003790 Py_CLEAR(self->handle_close);
3791 Py_CLEAR(self->handle_pi);
3792 Py_CLEAR(self->handle_comment);
3793 Py_CLEAR(self->handle_end);
3794 Py_CLEAR(self->handle_data);
3795 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003796 Py_CLEAR(self->handle_start_ns);
3797 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003798 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003799
Antoine Pitrouc1948842012-10-01 23:40:37 +02003800 Py_CLEAR(self->target);
3801 Py_CLEAR(self->entity);
3802 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003803
Eli Bendersky52467b12012-06-01 07:13:08 +03003804 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003805}
3806
Eli Bendersky52467b12012-06-01 07:13:08 +03003807static void
3808xmlparser_dealloc(XMLParserObject* self)
3809{
3810 PyObject_GC_UnTrack(self);
3811 xmlparser_gc_clear(self);
3812 Py_TYPE(self)->tp_free((PyObject *)self);
3813}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003814
3815LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003816expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003817{
3818 int ok;
3819
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003820 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003821 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3822
3823 if (PyErr_Occurred())
3824 return NULL;
3825
3826 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003827 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003828 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003829 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003830 EXPAT(GetErrorColumnNumber)(self->parser),
3831 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003832 );
3833 return NULL;
3834 }
3835
3836 Py_RETURN_NONE;
3837}
3838
Serhiy Storchakacb985562015-05-04 15:32:48 +03003839/*[clinic input]
3840_elementtree.XMLParser.close
3841
3842[clinic start generated code]*/
3843
3844static PyObject *
3845_elementtree_XMLParser_close_impl(XMLParserObject *self)
3846/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003847{
3848 /* end feeding data to parser */
3849
3850 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003851 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003852 if (!res)
3853 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003854
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003855 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003856 Py_DECREF(res);
3857 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003858 }
3859 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003860 Py_DECREF(res);
Victor Stinner2ff58a22019-06-17 14:27:23 +02003861 return PyObject_CallNoArgs(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003862 }
3863 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003864 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003865 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003866}
3867
Serhiy Storchakacb985562015-05-04 15:32:48 +03003868/*[clinic input]
3869_elementtree.XMLParser.feed
3870
3871 data: object
3872 /
3873
3874[clinic start generated code]*/
3875
3876static PyObject *
3877_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3878/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003879{
3880 /* feed data to parser */
3881
Serhiy Storchakacb985562015-05-04 15:32:48 +03003882 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003883 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003884 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3885 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003886 return NULL;
3887 if (data_len > INT_MAX) {
3888 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3889 return NULL;
3890 }
3891 /* Explicitly set UTF-8 encoding. Return code ignored. */
3892 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003893 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003894 }
3895 else {
3896 Py_buffer view;
3897 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003898 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003899 return NULL;
3900 if (view.len > INT_MAX) {
3901 PyBuffer_Release(&view);
3902 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3903 return NULL;
3904 }
3905 res = expat_parse(self, view.buf, (int)view.len, 0);
3906 PyBuffer_Release(&view);
3907 return res;
3908 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003909}
3910
Serhiy Storchakacb985562015-05-04 15:32:48 +03003911/*[clinic input]
3912_elementtree.XMLParser._parse_whole
3913
3914 file: object
3915 /
3916
3917[clinic start generated code]*/
3918
3919static PyObject *
3920_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3921/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003922{
Eli Benderskya3699232013-05-19 18:47:23 -07003923 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003924 PyObject* reader;
3925 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003926 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003927 PyObject* res;
3928
Serhiy Storchakacb985562015-05-04 15:32:48 +03003929 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003930 if (!reader)
3931 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003932
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003933 /* read from open file object */
3934 for (;;) {
3935
3936 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3937
3938 if (!buffer) {
3939 /* read failed (e.g. due to KeyboardInterrupt) */
3940 Py_DECREF(reader);
3941 return NULL;
3942 }
3943
Eli Benderskyf996e772012-03-16 05:53:30 +02003944 if (PyUnicode_CheckExact(buffer)) {
3945 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003946 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003947 Py_DECREF(buffer);
3948 break;
3949 }
3950 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003951 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003952 if (!temp) {
3953 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003954 Py_DECREF(reader);
3955 return NULL;
3956 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003957 buffer = temp;
3958 }
3959 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003960 Py_DECREF(buffer);
3961 break;
3962 }
3963
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003964 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3965 Py_DECREF(buffer);
3966 Py_DECREF(reader);
3967 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3968 return NULL;
3969 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003970 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003971 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972 );
3973
3974 Py_DECREF(buffer);
3975
3976 if (!res) {
3977 Py_DECREF(reader);
3978 return NULL;
3979 }
3980 Py_DECREF(res);
3981
3982 }
3983
3984 Py_DECREF(reader);
3985
3986 res = expat_parse(self, "", 0, 1);
3987
3988 if (res && TreeBuilder_CheckExact(self->target)) {
3989 Py_DECREF(res);
3990 return treebuilder_done((TreeBuilderObject*) self->target);
3991 }
3992
3993 return res;
3994}
3995
Serhiy Storchakacb985562015-05-04 15:32:48 +03003996/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003997_elementtree.XMLParser._setevents
3998
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003999 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004000 events_to_report: object = None
4001 /
4002
4003[clinic start generated code]*/
4004
4005static PyObject *
4006_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4007 PyObject *events_queue,
4008 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004009/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004010{
4011 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004012 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004013 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004014 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004015
4016 if (!TreeBuilder_CheckExact(self->target)) {
4017 PyErr_SetString(
4018 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004019 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004020 "targets"
4021 );
4022 return NULL;
4023 }
4024
4025 target = (TreeBuilderObject*) self->target;
4026
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004027 events_append = PyObject_GetAttrString(events_queue, "append");
4028 if (events_append == NULL)
4029 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004030 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004031
4032 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004033 Py_CLEAR(target->start_event_obj);
4034 Py_CLEAR(target->end_event_obj);
4035 Py_CLEAR(target->start_ns_event_obj);
4036 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004037 Py_CLEAR(target->comment_event_obj);
4038 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004039
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004040 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004041 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004042 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004043 Py_RETURN_NONE;
4044 }
4045
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004046 if (!(events_seq = PySequence_Fast(events_to_report,
4047 "events must be a sequence"))) {
4048 return NULL;
4049 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004050
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004051 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004052 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004053 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004054 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004055 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004056 } else if (PyBytes_Check(event_name_obj)) {
4057 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004058 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004059 if (event_name == NULL) {
4060 Py_DECREF(events_seq);
4061 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4062 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004063 }
4064
4065 Py_INCREF(event_name_obj);
4066 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004067 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004068 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004069 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004070 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004071 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004072 EXPAT(SetNamespaceDeclHandler)(
4073 self->parser,
4074 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4075 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4076 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004077 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004078 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004079 EXPAT(SetNamespaceDeclHandler)(
4080 self->parser,
4081 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4082 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4083 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004084 } else if (strcmp(event_name, "comment") == 0) {
4085 Py_XSETREF(target->comment_event_obj, event_name_obj);
4086 EXPAT(SetCommentHandler)(
4087 self->parser,
4088 (XML_CommentHandler) expat_comment_handler
4089 );
4090 } else if (strcmp(event_name, "pi") == 0) {
4091 Py_XSETREF(target->pi_event_obj, event_name_obj);
4092 EXPAT(SetProcessingInstructionHandler)(
4093 self->parser,
4094 (XML_ProcessingInstructionHandler) expat_pi_handler
4095 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004096 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004097 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004098 Py_DECREF(events_seq);
4099 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004100 return NULL;
4101 }
4102 }
4103
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004104 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004105 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004106}
4107
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004108static PyMemberDef xmlparser_members[] = {
4109 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4110 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4111 {NULL}
4112};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004113
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004114static PyObject*
4115xmlparser_version_getter(XMLParserObject *self, void *closure)
4116{
4117 return PyUnicode_FromFormat(
4118 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4119 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004120}
4121
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004122static PyGetSetDef xmlparser_getsetlist[] = {
4123 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4124 {NULL},
4125};
4126
Serhiy Storchakacb985562015-05-04 15:32:48 +03004127#include "clinic/_elementtree.c.h"
4128
4129static PyMethodDef element_methods[] = {
4130
4131 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4132
4133 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4134 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4135
4136 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4137 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4138 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4139
4140 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4141 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4142 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4143 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4144
4145 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4146 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4147 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4148
Serhiy Storchakacb985562015-05-04 15:32:48 +03004149 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4150 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4151
4152 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4153
4154 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4155 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4156 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4157 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4158 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4159
4160 {NULL, NULL}
4161};
4162
4163static PyMappingMethods element_as_mapping = {
4164 (lenfunc) element_length,
4165 (binaryfunc) element_subscr,
4166 (objobjargproc) element_ass_subscr,
4167};
4168
Serhiy Storchakadde08152015-11-25 15:28:13 +02004169static PyGetSetDef element_getsetlist[] = {
4170 {"tag",
4171 (getter)element_tag_getter,
4172 (setter)element_tag_setter,
4173 "A string identifying what kind of data this element represents"},
4174 {"text",
4175 (getter)element_text_getter,
4176 (setter)element_text_setter,
4177 "A string of text directly after the start tag, or None"},
4178 {"tail",
4179 (getter)element_tail_getter,
4180 (setter)element_tail_setter,
4181 "A string of text directly after the end tag, or None"},
4182 {"attrib",
4183 (getter)element_attrib_getter,
4184 (setter)element_attrib_setter,
4185 "A dictionary containing the element's attributes"},
4186 {NULL},
4187};
4188
Serhiy Storchakacb985562015-05-04 15:32:48 +03004189static PyTypeObject Element_Type = {
4190 PyVarObject_HEAD_INIT(NULL, 0)
4191 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4192 /* methods */
4193 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004194 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004195 0, /* tp_getattr */
4196 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004197 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004198 (reprfunc)element_repr, /* tp_repr */
4199 0, /* tp_as_number */
4200 &element_as_sequence, /* tp_as_sequence */
4201 &element_as_mapping, /* tp_as_mapping */
4202 0, /* tp_hash */
4203 0, /* tp_call */
4204 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004205 PyObject_GenericGetAttr, /* tp_getattro */
4206 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004207 0, /* tp_as_buffer */
4208 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4209 /* tp_flags */
4210 0, /* tp_doc */
4211 (traverseproc)element_gc_traverse, /* tp_traverse */
4212 (inquiry)element_gc_clear, /* tp_clear */
4213 0, /* tp_richcompare */
4214 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4215 0, /* tp_iter */
4216 0, /* tp_iternext */
4217 element_methods, /* tp_methods */
4218 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004219 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004220 0, /* tp_base */
4221 0, /* tp_dict */
4222 0, /* tp_descr_get */
4223 0, /* tp_descr_set */
4224 0, /* tp_dictoffset */
4225 (initproc)element_init, /* tp_init */
4226 PyType_GenericAlloc, /* tp_alloc */
4227 element_new, /* tp_new */
4228 0, /* tp_free */
4229};
4230
4231static PyMethodDef treebuilder_methods[] = {
4232 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4233 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4234 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004235 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4236 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004237 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4238 {NULL, NULL}
4239};
4240
4241static PyTypeObject TreeBuilder_Type = {
4242 PyVarObject_HEAD_INIT(NULL, 0)
4243 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4244 /* methods */
4245 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004246 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004247 0, /* tp_getattr */
4248 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004249 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004250 0, /* tp_repr */
4251 0, /* tp_as_number */
4252 0, /* tp_as_sequence */
4253 0, /* tp_as_mapping */
4254 0, /* tp_hash */
4255 0, /* tp_call */
4256 0, /* tp_str */
4257 0, /* tp_getattro */
4258 0, /* tp_setattro */
4259 0, /* tp_as_buffer */
4260 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4261 /* tp_flags */
4262 0, /* tp_doc */
4263 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4264 (inquiry)treebuilder_gc_clear, /* tp_clear */
4265 0, /* tp_richcompare */
4266 0, /* tp_weaklistoffset */
4267 0, /* tp_iter */
4268 0, /* tp_iternext */
4269 treebuilder_methods, /* tp_methods */
4270 0, /* tp_members */
4271 0, /* tp_getset */
4272 0, /* tp_base */
4273 0, /* tp_dict */
4274 0, /* tp_descr_get */
4275 0, /* tp_descr_set */
4276 0, /* tp_dictoffset */
4277 _elementtree_TreeBuilder___init__, /* tp_init */
4278 PyType_GenericAlloc, /* tp_alloc */
4279 treebuilder_new, /* tp_new */
4280 0, /* tp_free */
4281};
4282
4283static PyMethodDef xmlparser_methods[] = {
4284 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4285 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4286 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4287 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004288 {NULL, NULL}
4289};
4290
Neal Norwitz227b5332006-03-22 09:28:35 +00004291static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004292 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004293 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004294 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004295 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004296 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004297 0, /* tp_getattr */
4298 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004299 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004300 0, /* tp_repr */
4301 0, /* tp_as_number */
4302 0, /* tp_as_sequence */
4303 0, /* tp_as_mapping */
4304 0, /* tp_hash */
4305 0, /* tp_call */
4306 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004307 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004308 0, /* tp_setattro */
4309 0, /* tp_as_buffer */
4310 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4311 /* tp_flags */
4312 0, /* tp_doc */
4313 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4314 (inquiry)xmlparser_gc_clear, /* tp_clear */
4315 0, /* tp_richcompare */
4316 0, /* tp_weaklistoffset */
4317 0, /* tp_iter */
4318 0, /* tp_iternext */
4319 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004320 xmlparser_members, /* tp_members */
4321 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004322 0, /* tp_base */
4323 0, /* tp_dict */
4324 0, /* tp_descr_get */
4325 0, /* tp_descr_set */
4326 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004327 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004328 PyType_GenericAlloc, /* tp_alloc */
4329 xmlparser_new, /* tp_new */
4330 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004331};
4332
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004333/* ==================================================================== */
4334/* python module interface */
4335
4336static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004337 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004338 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004339 {NULL, NULL}
4340};
4341
Martin v. Löwis1a214512008-06-11 05:26:20 +00004342
Eli Bendersky532d03e2013-08-10 08:00:39 -07004343static struct PyModuleDef elementtreemodule = {
4344 PyModuleDef_HEAD_INIT,
4345 "_elementtree",
4346 NULL,
4347 sizeof(elementtreestate),
4348 _functions,
4349 NULL,
4350 elementtree_traverse,
4351 elementtree_clear,
4352 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004353};
4354
Neal Norwitzf6657e62006-12-28 04:47:50 +00004355PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004356PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004357{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004358 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004359 elementtreestate *st;
4360
4361 m = PyState_FindModule(&elementtreemodule);
4362 if (m) {
4363 Py_INCREF(m);
4364 return m;
4365 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004366
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004367 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004368 if (PyType_Ready(&ElementIter_Type) < 0)
4369 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004370 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004371 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004372 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004373 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004374 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004375 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004376
Eli Bendersky532d03e2013-08-10 08:00:39 -07004377 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004378 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004379 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004380 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004381
Eli Bendersky828efde2012-04-05 05:40:58 +03004382 if (!(temp = PyImport_ImportModule("copy")))
4383 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004384 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004385 Py_XDECREF(temp);
4386
Victor Stinnerb136f112017-07-10 22:28:02 +02004387 if (st->deepcopy_obj == NULL) {
4388 return NULL;
4389 }
4390
4391 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004392 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004393 return NULL;
4394
Eli Bendersky20d41742012-06-01 09:48:37 +03004395 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004396 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4397 if (expat_capi) {
4398 /* check that it's usable */
4399 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004400 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004401 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4402 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004403 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004404 PyErr_SetString(PyExc_ImportError,
4405 "pyexpat version is incompatible");
4406 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004407 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004408 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004409 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004410 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004411
Eli Bendersky532d03e2013-08-10 08:00:39 -07004412 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004413 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004414 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004415 Py_INCREF(st->parseerror_obj);
4416 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004417
Eli Bendersky092af1f2012-03-04 07:14:03 +02004418 Py_INCREF((PyObject *)&Element_Type);
4419 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4420
Eli Bendersky58d548d2012-05-29 15:45:16 +03004421 Py_INCREF((PyObject *)&TreeBuilder_Type);
4422 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4423
Eli Bendersky52467b12012-06-01 07:13:08 +03004424 Py_INCREF((PyObject *)&XMLParser_Type);
4425 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004426
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004427 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004428}