blob: 776b86cf51a9d1e361f21c773dbd85f79e48760c [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200672 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200683 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Miss Islington (bot)97bbdb22020-03-09 05:55:17 -0700953 Py_ssize_t i;
954 PyObject *children, *attrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
Miss Islington (bot)97bbdb22020-03-09 05:55:17 -0700966 if (self->extra && self->extra->attrib != Py_None) {
967 attrib = self->extra->attrib;
968 Py_INCREF(attrib);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800969 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970 else {
Miss Islington (bot)97bbdb22020-03-09 05:55:17 -0700971 attrib = PyDict_New();
972 if (!attrib) {
973 Py_DECREF(children);
974 return NULL;
975 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976 }
Miss Islington (bot)97bbdb22020-03-09 05:55:17 -0700977
978 return Py_BuildValue("{sOsNsNsOsO}",
979 PICKLED_TAG, self->tag,
980 PICKLED_CHILDREN, children,
981 PICKLED_ATTRIB, attrib,
982 PICKLED_TEXT, JOIN_OBJ(self->text),
983 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984}
985
986static PyObject *
987element_setstate_from_attributes(ElementObject *self,
988 PyObject *tag,
989 PyObject *attrib,
990 PyObject *text,
991 PyObject *tail,
992 PyObject *children)
993{
994 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300995 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800996
997 if (!tag) {
998 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
999 return NULL;
1000 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001001
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001002 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001003 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004
Oren Milman39ecb9c2017-10-10 23:26:24 +03001005 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1006 Py_INCREF(JOIN_OBJ(text));
1007 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008
Oren Milman39ecb9c2017-10-10 23:26:24 +03001009 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1010 Py_INCREF(JOIN_OBJ(tail));
1011 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012
1013 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001014 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001015 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001016 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
1018 /* Compute 'nchildren'. */
1019 if (children) {
1020 if (!PyList_Check(children)) {
1021 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1022 return NULL;
1023 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001024 nchildren = PyList_GET_SIZE(children);
1025
1026 /* (Re-)allocate 'extra'.
1027 Avoid DECREFs calling into this code again (cycles, etc.)
1028 */
1029 oldextra = self->extra;
1030 self->extra = NULL;
1031 if (element_resize(self, nchildren)) {
1032 assert(!self->extra || !self->extra->length);
1033 clear_extra(self);
1034 self->extra = oldextra;
1035 return NULL;
1036 }
1037 assert(self->extra);
1038 assert(self->extra->allocated >= nchildren);
1039 if (oldextra) {
1040 assert(self->extra->attrib == Py_None);
1041 self->extra->attrib = oldextra->attrib;
1042 oldextra->attrib = Py_None;
1043 }
1044
1045 /* Copy children */
1046 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001047 PyObject *child = PyList_GET_ITEM(children, i);
1048 if (!Element_Check(child)) {
1049 raise_type_error(child);
1050 self->extra->length = i;
1051 dealloc_extra(oldextra);
1052 return NULL;
1053 }
1054 Py_INCREF(child);
1055 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001056 }
1057
1058 assert(!self->extra->length);
1059 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001060 }
1061 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001062 if (element_resize(self, 0)) {
1063 return NULL;
1064 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001065 }
1066
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067 /* Stash attrib. */
1068 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001070 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001071 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001072 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001073
1074 Py_RETURN_NONE;
1075}
1076
1077/* __setstate__ for Element instance from the Python implementation.
1078 * 'state' should be the instance dict.
1079 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001080
Eli Bendersky698bdb22013-01-10 06:01:06 -08001081static PyObject *
1082element_setstate_from_Python(ElementObject *self, PyObject *state)
1083{
1084 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1085 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1086 PyObject *args;
1087 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001088 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090 tag = attrib = text = tail = children = NULL;
1091 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001092 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001093 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001094
1095 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1096 &attrib, &text, &tail, &children))
1097 retval = element_setstate_from_attributes(self, tag, attrib, text,
1098 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001100 retval = NULL;
1101
1102 Py_DECREF(args);
1103 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001104}
1105
Serhiy Storchakacb985562015-05-04 15:32:48 +03001106/*[clinic input]
1107_elementtree.Element.__setstate__
1108
1109 state: object
1110 /
1111
1112[clinic start generated code]*/
1113
Eli Bendersky698bdb22013-01-10 06:01:06 -08001114static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1116/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001117{
1118 if (!PyDict_CheckExact(state)) {
1119 PyErr_Format(PyExc_TypeError,
1120 "Don't know how to unpickle \"%.200R\" as an Element",
1121 state);
1122 return NULL;
1123 }
1124 else
1125 return element_setstate_from_Python(self, state);
1126}
1127
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128LOCAL(int)
1129checkpath(PyObject* tag)
1130{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001131 Py_ssize_t i;
1132 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133
1134 /* check if a tag contains an xpath character */
1135
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136#define PATHCHAR(ch) \
1137 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001139 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1141 void *data = PyUnicode_DATA(tag);
1142 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001143 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1144 PyUnicode_READ(kind, data, 1) == '}' || (
1145 PyUnicode_READ(kind, data, 1) == '*' &&
1146 PyUnicode_READ(kind, data, 2) == '}'))) {
1147 /* wildcard: '{}tag' or '{*}tag' */
1148 return 1;
1149 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 for (i = 0; i < len; i++) {
1151 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1152 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001154 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001155 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001157 return 1;
1158 }
1159 return 0;
1160 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001161 if (PyBytes_Check(tag)) {
1162 char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001163 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1164 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001165 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001166 /* wildcard: '{}tag' or '{*}tag' */
1167 return 1;
1168 }
1169 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 if (p[i] == '{')
1171 check = 0;
1172 else if (p[i] == '}')
1173 check = 1;
1174 else if (check && PATHCHAR(p[i]))
1175 return 1;
1176 }
1177 return 0;
1178 }
1179
1180 return 1; /* unknown type; might be path expression */
1181}
1182
Serhiy Storchakacb985562015-05-04 15:32:48 +03001183/*[clinic input]
1184_elementtree.Element.extend
1185
1186 elements: object
1187 /
1188
1189[clinic start generated code]*/
1190
1191static PyObject *
1192_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1193/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001194{
1195 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001196 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 if (!seq) {
1200 PyErr_Format(
1201 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001202 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001203 );
1204 return NULL;
1205 }
1206
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001207 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001209 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001210 if (element_add_subelement(self, element) < 0) {
1211 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001212 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001213 return NULL;
1214 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001216 }
1217
1218 Py_DECREF(seq);
1219
1220 Py_RETURN_NONE;
1221}
1222
Serhiy Storchakacb985562015-05-04 15:32:48 +03001223/*[clinic input]
1224_elementtree.Element.find
1225
1226 path: object
1227 namespaces: object = None
1228
1229[clinic start generated code]*/
1230
1231static PyObject *
1232_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1233 PyObject *namespaces)
1234/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001236 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001237 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001238
Serhiy Storchakacb985562015-05-04 15:32:48 +03001239 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001240 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001241 return _PyObject_CallMethodIdObjArgs(
1242 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001243 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001244 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245
1246 if (!self->extra)
1247 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001248
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 for (i = 0; i < self->extra->length; i++) {
1250 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001252 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001253 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001254 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001255 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001257 Py_DECREF(item);
1258 if (rc < 0)
1259 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260 }
1261
1262 Py_RETURN_NONE;
1263}
1264
Serhiy Storchakacb985562015-05-04 15:32:48 +03001265/*[clinic input]
1266_elementtree.Element.findtext
1267
1268 path: object
1269 default: object = None
1270 namespaces: object = None
1271
1272[clinic start generated code]*/
1273
1274static PyObject *
1275_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1276 PyObject *default_value,
1277 PyObject *namespaces)
1278/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001280 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001281 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001282 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001283
Serhiy Storchakacb985562015-05-04 15:32:48 +03001284 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001285 return _PyObject_CallMethodIdObjArgs(
1286 st->elementpath_obj, &PyId_findtext,
1287 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288 );
1289
1290 if (!self->extra) {
1291 Py_INCREF(default_value);
1292 return default_value;
1293 }
1294
1295 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001296 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001298 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001299 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001300 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001301 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001302 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001303 if (text == Py_None) {
1304 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001305 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001306 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001307 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001308 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309 return text;
1310 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001311 Py_DECREF(item);
1312 if (rc < 0)
1313 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001314 }
1315
1316 Py_INCREF(default_value);
1317 return default_value;
1318}
1319
Serhiy Storchakacb985562015-05-04 15:32:48 +03001320/*[clinic input]
1321_elementtree.Element.findall
1322
1323 path: object
1324 namespaces: object = None
1325
1326[clinic start generated code]*/
1327
1328static PyObject *
1329_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1330 PyObject *namespaces)
1331/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001332{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001333 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001335 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001336
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001337 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001338 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001339 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001340 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001342 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 out = PyList_New(0);
1345 if (!out)
1346 return NULL;
1347
1348 if (!self->extra)
1349 return out;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001353 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001354 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001355 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001356 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001357 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1358 Py_DECREF(item);
1359 Py_DECREF(out);
1360 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001362 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363 }
1364
1365 return out;
1366}
1367
Serhiy Storchakacb985562015-05-04 15:32:48 +03001368/*[clinic input]
1369_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001370
Serhiy Storchakacb985562015-05-04 15:32:48 +03001371 path: object
1372 namespaces: object = None
1373
1374[clinic start generated code]*/
1375
1376static PyObject *
1377_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1378 PyObject *namespaces)
1379/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1380{
1381 PyObject* tag = path;
1382 _Py_IDENTIFIER(iterfind);
1383 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001384
Victor Stinnerf5616342016-12-09 15:26:00 +01001385 return _PyObject_CallMethodIdObjArgs(
1386 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001387}
1388
Serhiy Storchakacb985562015-05-04 15:32:48 +03001389/*[clinic input]
1390_elementtree.Element.get
1391
1392 key: object
1393 default: object = None
1394
1395[clinic start generated code]*/
1396
1397static PyObject *
1398_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1399 PyObject *default_value)
1400/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401{
1402 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403
1404 if (!self->extra || self->extra->attrib == Py_None)
1405 value = default_value;
1406 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001407 value = PyDict_GetItemWithError(self->extra->attrib, key);
1408 if (!value) {
1409 if (PyErr_Occurred()) {
1410 return NULL;
1411 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001414 }
1415
1416 Py_INCREF(value);
1417 return value;
1418}
1419
Serhiy Storchakacb985562015-05-04 15:32:48 +03001420/*[clinic input]
1421_elementtree.Element.getchildren
1422
1423[clinic start generated code]*/
1424
1425static PyObject *
1426_elementtree_Element_getchildren_impl(ElementObject *self)
1427/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001428{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001429 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430 PyObject* list;
1431
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001432 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1433 "This method will be removed in future versions. "
1434 "Use 'list(elem)' or iteration over elem instead.",
1435 1) < 0) {
1436 return NULL;
1437 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001438
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439 if (!self->extra)
1440 return PyList_New(0);
1441
1442 list = PyList_New(self->extra->length);
1443 if (!list)
1444 return NULL;
1445
1446 for (i = 0; i < self->extra->length; i++) {
1447 PyObject* item = self->extra->children[i];
1448 Py_INCREF(item);
1449 PyList_SET_ITEM(list, i, item);
1450 }
1451
1452 return list;
1453}
1454
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001455
Eli Bendersky64d11e62012-06-15 07:42:50 +03001456static PyObject *
1457create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1458
1459
Serhiy Storchakacb985562015-05-04 15:32:48 +03001460/*[clinic input]
1461_elementtree.Element.iter
1462
1463 tag: object = None
1464
1465[clinic start generated code]*/
1466
Eli Bendersky64d11e62012-06-15 07:42:50 +03001467static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001468_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1469/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001470{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001471 if (PyUnicode_Check(tag)) {
1472 if (PyUnicode_READY(tag) < 0)
1473 return NULL;
1474 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1475 tag = Py_None;
1476 }
1477 else if (PyBytes_Check(tag)) {
1478 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1479 tag = Py_None;
1480 }
1481
Eli Bendersky64d11e62012-06-15 07:42:50 +03001482 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001483}
1484
1485
Serhiy Storchakacb985562015-05-04 15:32:48 +03001486/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001487_elementtree.Element.getiterator
1488
1489 tag: object = None
1490
1491[clinic start generated code]*/
1492
1493static PyObject *
1494_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1495/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1496{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001497 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001498 "This method will be removed in future versions. "
1499 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1500 1) < 0) {
1501 return NULL;
1502 }
1503 return _elementtree_Element_iter_impl(self, tag);
1504}
1505
1506
1507/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001508_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001509
Serhiy Storchakacb985562015-05-04 15:32:48 +03001510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_itertext_impl(ElementObject *self)
1514/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1515{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001516 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001517}
1518
Eli Bendersky64d11e62012-06-15 07:42:50 +03001519
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001521element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001523 ElementObject* self = (ElementObject*) self_;
1524
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525 if (!self->extra || index < 0 || index >= self->extra->length) {
1526 PyErr_SetString(
1527 PyExc_IndexError,
1528 "child index out of range"
1529 );
1530 return NULL;
1531 }
1532
1533 Py_INCREF(self->extra->children[index]);
1534 return self->extra->children[index];
1535}
1536
Serhiy Storchakacb985562015-05-04 15:32:48 +03001537/*[clinic input]
1538_elementtree.Element.insert
1539
1540 index: Py_ssize_t
1541 subelement: object(subclass_of='&Element_Type')
1542 /
1543
1544[clinic start generated code]*/
1545
1546static PyObject *
1547_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1548 PyObject *subelement)
1549/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001550{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001551 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552
Victor Stinner5f0af232013-07-11 23:01:36 +02001553 if (!self->extra) {
1554 if (create_extra(self, NULL) < 0)
1555 return NULL;
1556 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001558 if (index < 0) {
1559 index += self->extra->length;
1560 if (index < 0)
1561 index = 0;
1562 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 if (index > self->extra->length)
1564 index = self->extra->length;
1565
1566 if (element_resize(self, 1) < 0)
1567 return NULL;
1568
1569 for (i = self->extra->length; i > index; i--)
1570 self->extra->children[i] = self->extra->children[i-1];
1571
Serhiy Storchakacb985562015-05-04 15:32:48 +03001572 Py_INCREF(subelement);
1573 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574
1575 self->extra->length++;
1576
1577 Py_RETURN_NONE;
1578}
1579
Serhiy Storchakacb985562015-05-04 15:32:48 +03001580/*[clinic input]
1581_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582
Serhiy Storchakacb985562015-05-04 15:32:48 +03001583[clinic start generated code]*/
1584
1585static PyObject *
1586_elementtree_Element_items_impl(ElementObject *self)
1587/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1588{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 if (!self->extra || self->extra->attrib == Py_None)
1590 return PyList_New(0);
1591
1592 return PyDict_Items(self->extra->attrib);
1593}
1594
Serhiy Storchakacb985562015-05-04 15:32:48 +03001595/*[clinic input]
1596_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597
Serhiy Storchakacb985562015-05-04 15:32:48 +03001598[clinic start generated code]*/
1599
1600static PyObject *
1601_elementtree_Element_keys_impl(ElementObject *self)
1602/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1603{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 if (!self->extra || self->extra->attrib == Py_None)
1605 return PyList_New(0);
1606
1607 return PyDict_Keys(self->extra->attrib);
1608}
1609
Martin v. Löwis18e16552006-02-15 17:27:45 +00001610static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611element_length(ElementObject* self)
1612{
1613 if (!self->extra)
1614 return 0;
1615
1616 return self->extra->length;
1617}
1618
Serhiy Storchakacb985562015-05-04 15:32:48 +03001619/*[clinic input]
1620_elementtree.Element.makeelement
1621
1622 tag: object
1623 attrib: object
1624 /
1625
1626[clinic start generated code]*/
1627
1628static PyObject *
1629_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1630 PyObject *attrib)
1631/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632{
1633 PyObject* elem;
1634
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 attrib = PyDict_Copy(attrib);
1636 if (!attrib)
1637 return NULL;
1638
Eli Bendersky092af1f2012-03-04 07:14:03 +02001639 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640
1641 Py_DECREF(attrib);
1642
1643 return elem;
1644}
1645
Serhiy Storchakacb985562015-05-04 15:32:48 +03001646/*[clinic input]
1647_elementtree.Element.remove
1648
1649 subelement: object(subclass_of='&Element_Type')
1650 /
1651
1652[clinic start generated code]*/
1653
1654static PyObject *
1655_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1656/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001658 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001659 int rc;
1660 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662 if (!self->extra) {
1663 /* element has no children, so raise exception */
1664 PyErr_SetString(
1665 PyExc_ValueError,
1666 "list.remove(x): x not in list"
1667 );
1668 return NULL;
1669 }
1670
1671 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001672 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001674 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001675 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001676 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001677 if (rc < 0)
1678 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001679 }
1680
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001681 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001682 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001683 PyErr_SetString(
1684 PyExc_ValueError,
1685 "list.remove(x): x not in list"
1686 );
1687 return NULL;
1688 }
1689
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001690 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001691
1692 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 for (; i < self->extra->length; i++)
1694 self->extra->children[i] = self->extra->children[i+1];
1695
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001696 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001697 Py_RETURN_NONE;
1698}
1699
1700static PyObject*
1701element_repr(ElementObject* self)
1702{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001703 int status;
1704
1705 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001706 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001707
1708 status = Py_ReprEnter((PyObject *)self);
1709 if (status == 0) {
1710 PyObject *res;
1711 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1712 Py_ReprLeave((PyObject *)self);
1713 return res;
1714 }
1715 if (status > 0)
1716 PyErr_Format(PyExc_RuntimeError,
1717 "reentrant call inside %s.__repr__",
1718 Py_TYPE(self)->tp_name);
1719 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001720}
1721
Serhiy Storchakacb985562015-05-04 15:32:48 +03001722/*[clinic input]
1723_elementtree.Element.set
1724
1725 key: object
1726 value: object
1727 /
1728
1729[clinic start generated code]*/
1730
1731static PyObject *
1732_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1733 PyObject *value)
1734/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001735{
1736 PyObject* attrib;
1737
Victor Stinner5f0af232013-07-11 23:01:36 +02001738 if (!self->extra) {
1739 if (create_extra(self, NULL) < 0)
1740 return NULL;
1741 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001742
1743 attrib = element_get_attrib(self);
1744 if (!attrib)
1745 return NULL;
1746
1747 if (PyDict_SetItem(attrib, key, value) < 0)
1748 return NULL;
1749
1750 Py_RETURN_NONE;
1751}
1752
1753static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001754element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001755{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001756 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001757 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001758 PyObject* old;
1759
1760 if (!self->extra || index < 0 || index >= self->extra->length) {
1761 PyErr_SetString(
1762 PyExc_IndexError,
1763 "child assignment index out of range");
1764 return -1;
1765 }
1766
1767 old = self->extra->children[index];
1768
1769 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001770 if (!Element_Check(item)) {
1771 raise_type_error(item);
1772 return -1;
1773 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001774 Py_INCREF(item);
1775 self->extra->children[index] = item;
1776 } else {
1777 self->extra->length--;
1778 for (i = index; i < self->extra->length; i++)
1779 self->extra->children[i] = self->extra->children[i+1];
1780 }
1781
1782 Py_DECREF(old);
1783
1784 return 0;
1785}
1786
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001787static PyObject*
1788element_subscr(PyObject* self_, PyObject* item)
1789{
1790 ElementObject* self = (ElementObject*) self_;
1791
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001792 if (PyIndex_Check(item)) {
1793 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001794
1795 if (i == -1 && PyErr_Occurred()) {
1796 return NULL;
1797 }
1798 if (i < 0 && self->extra)
1799 i += self->extra->length;
1800 return element_getitem(self_, i);
1801 }
1802 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001803 Py_ssize_t start, stop, step, slicelen, i;
1804 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001805 PyObject* list;
1806
1807 if (!self->extra)
1808 return PyList_New(0);
1809
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001810 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001811 return NULL;
1812 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001813 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1814 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001815
1816 if (slicelen <= 0)
1817 return PyList_New(0);
1818 else {
1819 list = PyList_New(slicelen);
1820 if (!list)
1821 return NULL;
1822
1823 for (cur = start, i = 0; i < slicelen;
1824 cur += step, i++) {
1825 PyObject* item = self->extra->children[cur];
1826 Py_INCREF(item);
1827 PyList_SET_ITEM(list, i, item);
1828 }
1829
1830 return list;
1831 }
1832 }
1833 else {
1834 PyErr_SetString(PyExc_TypeError,
1835 "element indices must be integers");
1836 return NULL;
1837 }
1838}
1839
1840static int
1841element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1842{
1843 ElementObject* self = (ElementObject*) self_;
1844
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001845 if (PyIndex_Check(item)) {
1846 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001847
1848 if (i == -1 && PyErr_Occurred()) {
1849 return -1;
1850 }
1851 if (i < 0 && self->extra)
1852 i += self->extra->length;
1853 return element_setitem(self_, i, value);
1854 }
1855 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001856 Py_ssize_t start, stop, step, slicelen, newlen, i;
1857 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858
1859 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001860 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001861
Victor Stinner5f0af232013-07-11 23:01:36 +02001862 if (!self->extra) {
1863 if (create_extra(self, NULL) < 0)
1864 return -1;
1865 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001867 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001868 return -1;
1869 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001870 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1871 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001872
Eli Bendersky865756a2012-03-09 13:38:15 +02001873 if (value == NULL) {
1874 /* Delete slice */
1875 size_t cur;
1876 Py_ssize_t i;
1877
1878 if (slicelen <= 0)
1879 return 0;
1880
1881 /* Since we're deleting, the direction of the range doesn't matter,
1882 * so for simplicity make it always ascending.
1883 */
1884 if (step < 0) {
1885 stop = start + 1;
1886 start = stop + step * (slicelen - 1) - 1;
1887 step = -step;
1888 }
1889
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001890 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001891
1892 /* recycle is a list that will contain all the children
1893 * scheduled for removal.
1894 */
1895 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001896 return -1;
1897 }
1898
1899 /* This loop walks over all the children that have to be deleted,
1900 * with cur pointing at them. num_moved is the amount of children
1901 * until the next deleted child that have to be "shifted down" to
1902 * occupy the deleted's places.
1903 * Note that in the ith iteration, shifting is done i+i places down
1904 * because i children were already removed.
1905 */
1906 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1907 /* Compute how many children have to be moved, clipping at the
1908 * list end.
1909 */
1910 Py_ssize_t num_moved = step - 1;
1911 if (cur + step >= (size_t)self->extra->length) {
1912 num_moved = self->extra->length - cur - 1;
1913 }
1914
1915 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1916
1917 memmove(
1918 self->extra->children + cur - i,
1919 self->extra->children + cur + 1,
1920 num_moved * sizeof(PyObject *));
1921 }
1922
1923 /* Leftover "tail" after the last removed child */
1924 cur = start + (size_t)slicelen * step;
1925 if (cur < (size_t)self->extra->length) {
1926 memmove(
1927 self->extra->children + cur - slicelen,
1928 self->extra->children + cur,
1929 (self->extra->length - cur) * sizeof(PyObject *));
1930 }
1931
1932 self->extra->length -= slicelen;
1933
1934 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001935 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001936 return 0;
1937 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001938
1939 /* A new slice is actually being assigned */
1940 seq = PySequence_Fast(value, "");
1941 if (!seq) {
1942 PyErr_Format(
1943 PyExc_TypeError,
1944 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1945 );
1946 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001948 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001949
1950 if (step != 1 && newlen != slicelen)
1951 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001952 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001953 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001954 "attempt to assign sequence of size %zd "
1955 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001956 newlen, slicelen
1957 );
1958 return -1;
1959 }
1960
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001961 /* Resize before creating the recycle bin, to prevent refleaks. */
1962 if (newlen > slicelen) {
1963 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001964 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001965 return -1;
1966 }
1967 }
1968
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001969 for (i = 0; i < newlen; i++) {
1970 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1971 if (!Element_Check(element)) {
1972 raise_type_error(element);
1973 Py_DECREF(seq);
1974 return -1;
1975 }
1976 }
1977
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001978 if (slicelen > 0) {
1979 /* to avoid recursive calls to this method (via decref), move
1980 old items to the recycle bin here, and get rid of them when
1981 we're done modifying the element */
1982 recycle = PyList_New(slicelen);
1983 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001984 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001985 return -1;
1986 }
1987 for (cur = start, i = 0; i < slicelen;
1988 cur += step, i++)
1989 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1990 }
1991
1992 if (newlen < slicelen) {
1993 /* delete slice */
1994 for (i = stop; i < self->extra->length; i++)
1995 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1996 } else if (newlen > slicelen) {
1997 /* insert slice */
1998 for (i = self->extra->length-1; i >= stop; i--)
1999 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2000 }
2001
2002 /* replace the slice */
2003 for (cur = start, i = 0; i < newlen;
2004 cur += step, i++) {
2005 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2006 Py_INCREF(element);
2007 self->extra->children[cur] = element;
2008 }
2009
2010 self->extra->length += newlen - slicelen;
2011
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002012 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002013
2014 /* discard the recycle bin, and everything in it */
2015 Py_XDECREF(recycle);
2016
2017 return 0;
2018 }
2019 else {
2020 PyErr_SetString(PyExc_TypeError,
2021 "element indices must be integers");
2022 return -1;
2023 }
2024}
2025
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002026static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002027element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002028{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002029 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002030 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002031 return res;
2032}
2033
Serhiy Storchakadde08152015-11-25 15:28:13 +02002034static PyObject*
2035element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002037 PyObject *res = element_get_text(self);
2038 Py_XINCREF(res);
2039 return res;
2040}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002041
Serhiy Storchakadde08152015-11-25 15:28:13 +02002042static PyObject*
2043element_tail_getter(ElementObject *self, void *closure)
2044{
2045 PyObject *res = element_get_tail(self);
2046 Py_XINCREF(res);
2047 return res;
2048}
2049
2050static PyObject*
2051element_attrib_getter(ElementObject *self, void *closure)
2052{
2053 PyObject *res;
2054 if (!self->extra) {
2055 if (create_extra(self, NULL) < 0)
2056 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002057 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002058 res = element_get_attrib(self);
2059 Py_XINCREF(res);
2060 return res;
2061}
Victor Stinner4d463432013-07-11 23:05:03 +02002062
Serhiy Storchakadde08152015-11-25 15:28:13 +02002063/* macro for setter validation */
2064#define _VALIDATE_ATTR_VALUE(V) \
2065 if ((V) == NULL) { \
2066 PyErr_SetString( \
2067 PyExc_AttributeError, \
2068 "can't delete element attribute"); \
2069 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070 }
2071
Serhiy Storchakadde08152015-11-25 15:28:13 +02002072static int
2073element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2074{
2075 _VALIDATE_ATTR_VALUE(value);
2076 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002077 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002078 return 0;
2079}
2080
2081static int
2082element_text_setter(ElementObject *self, PyObject *value, void *closure)
2083{
2084 _VALIDATE_ATTR_VALUE(value);
2085 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002086 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002087 return 0;
2088}
2089
2090static int
2091element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2092{
2093 _VALIDATE_ATTR_VALUE(value);
2094 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002095 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002096 return 0;
2097}
2098
2099static int
2100element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2101{
2102 _VALIDATE_ATTR_VALUE(value);
2103 if (!self->extra) {
2104 if (create_extra(self, NULL) < 0)
2105 return -1;
2106 }
2107 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002108 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002109 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002110}
2111
2112static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002113 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002114 0, /* sq_concat */
2115 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002116 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002117 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002118 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002119 0,
2120};
2121
Eli Bendersky64d11e62012-06-15 07:42:50 +03002122/******************************* Element iterator ****************************/
2123
2124/* ElementIterObject represents the iteration state over an XML element in
2125 * pre-order traversal. To keep track of which sub-element should be returned
2126 * next, a stack of parents is maintained. This is a standard stack-based
2127 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002128 * The stack is managed using a continuous array.
2129 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002130 * the current one is exhausted, and the next child to examine in that parent.
2131 */
2132typedef struct ParentLocator_t {
2133 ElementObject *parent;
2134 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002135} ParentLocator;
2136
2137typedef struct {
2138 PyObject_HEAD
2139 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002140 Py_ssize_t parent_stack_used;
2141 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142 ElementObject *root_element;
2143 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144 int gettext;
2145} ElementIterObject;
2146
2147
2148static void
2149elementiter_dealloc(ElementIterObject *it)
2150{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151 Py_ssize_t i = it->parent_stack_used;
2152 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002153 /* bpo-31095: UnTrack is needed before calling any callbacks */
2154 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002155 while (i--)
2156 Py_XDECREF(it->parent_stack[i].parent);
2157 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158
2159 Py_XDECREF(it->sought_tag);
2160 Py_XDECREF(it->root_element);
2161
Eli Bendersky64d11e62012-06-15 07:42:50 +03002162 PyObject_GC_Del(it);
2163}
2164
2165static int
2166elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2167{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002168 Py_ssize_t i = it->parent_stack_used;
2169 while (i--)
2170 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171
2172 Py_VISIT(it->root_element);
2173 Py_VISIT(it->sought_tag);
2174 return 0;
2175}
2176
2177/* Helper function for elementiter_next. Add a new parent to the parent stack.
2178 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002179static int
2180parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002181{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002182 ParentLocator *item;
2183
2184 if (it->parent_stack_used >= it->parent_stack_size) {
2185 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2186 ParentLocator *parent_stack = it->parent_stack;
2187 PyMem_Resize(parent_stack, ParentLocator, new_size);
2188 if (parent_stack == NULL)
2189 return -1;
2190 it->parent_stack = parent_stack;
2191 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002192 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002193 item = it->parent_stack + it->parent_stack_used++;
2194 Py_INCREF(parent);
2195 item->parent = parent;
2196 item->child_index = 0;
2197 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002198}
2199
2200static PyObject *
2201elementiter_next(ElementIterObject *it)
2202{
2203 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002204 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002205 * A short note on gettext: this function serves both the iter() and
2206 * itertext() methods to avoid code duplication. However, there are a few
2207 * small differences in the way these iterations work. Namely:
2208 * - itertext() only yields text from nodes that have it, and continues
2209 * iterating when a node doesn't have text (so it doesn't return any
2210 * node like iter())
2211 * - itertext() also has to handle tail, after finishing with all the
2212 * children of a node.
2213 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002214 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002215 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002216 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002217
2218 while (1) {
2219 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002220 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002221 * iterator is exhausted.
2222 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002223 if (!it->parent_stack_used) {
2224 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002225 PyErr_SetNone(PyExc_StopIteration);
2226 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002227 }
2228
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002229 elem = it->root_element; /* steals a reference */
2230 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002231 }
2232 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002233 /* See if there are children left to traverse in the current parent. If
2234 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002235 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002236 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2237 Py_ssize_t child_index = item->child_index;
2238 ElementObjectExtra *extra;
2239 elem = item->parent;
2240 extra = elem->extra;
2241 if (!extra || child_index >= extra->length) {
2242 it->parent_stack_used--;
2243 /* Note that extra condition on it->parent_stack_used here;
2244 * this is because itertext() is supposed to only return *inner*
2245 * text, not text following the element it began iteration with.
2246 */
2247 if (it->gettext && it->parent_stack_used) {
2248 text = element_get_tail(elem);
2249 goto gettext;
2250 }
2251 Py_DECREF(elem);
2252 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002253 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002254
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002255 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002256 elem = (ElementObject *)extra->children[child_index];
2257 item->child_index++;
2258 Py_INCREF(elem);
2259 }
2260
2261 if (parent_stack_push_new(it, elem) < 0) {
2262 Py_DECREF(elem);
2263 PyErr_NoMemory();
2264 return NULL;
2265 }
2266 if (it->gettext) {
2267 text = element_get_text(elem);
2268 goto gettext;
2269 }
2270
2271 if (it->sought_tag == Py_None)
2272 return (PyObject *)elem;
2273
2274 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2275 if (rc > 0)
2276 return (PyObject *)elem;
2277
2278 Py_DECREF(elem);
2279 if (rc < 0)
2280 return NULL;
2281 continue;
2282
2283gettext:
2284 if (!text) {
2285 Py_DECREF(elem);
2286 return NULL;
2287 }
2288 if (text == Py_None) {
2289 Py_DECREF(elem);
2290 }
2291 else {
2292 Py_INCREF(text);
2293 Py_DECREF(elem);
2294 rc = PyObject_IsTrue(text);
2295 if (rc > 0)
2296 return text;
2297 Py_DECREF(text);
2298 if (rc < 0)
2299 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002300 }
2301 }
2302
2303 return NULL;
2304}
2305
2306
2307static PyTypeObject ElementIter_Type = {
2308 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002309 /* Using the module's name since the pure-Python implementation does not
2310 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002311 "_elementtree._element_iterator", /* tp_name */
2312 sizeof(ElementIterObject), /* tp_basicsize */
2313 0, /* tp_itemsize */
2314 /* methods */
2315 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002316 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002317 0, /* tp_getattr */
2318 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002319 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002320 0, /* tp_repr */
2321 0, /* tp_as_number */
2322 0, /* tp_as_sequence */
2323 0, /* tp_as_mapping */
2324 0, /* tp_hash */
2325 0, /* tp_call */
2326 0, /* tp_str */
2327 0, /* tp_getattro */
2328 0, /* tp_setattro */
2329 0, /* tp_as_buffer */
2330 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2331 0, /* tp_doc */
2332 (traverseproc)elementiter_traverse, /* tp_traverse */
2333 0, /* tp_clear */
2334 0, /* tp_richcompare */
2335 0, /* tp_weaklistoffset */
2336 PyObject_SelfIter, /* tp_iter */
2337 (iternextfunc)elementiter_next, /* tp_iternext */
2338 0, /* tp_methods */
2339 0, /* tp_members */
2340 0, /* tp_getset */
2341 0, /* tp_base */
2342 0, /* tp_dict */
2343 0, /* tp_descr_get */
2344 0, /* tp_descr_set */
2345 0, /* tp_dictoffset */
2346 0, /* tp_init */
2347 0, /* tp_alloc */
2348 0, /* tp_new */
2349};
2350
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002351#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002352
2353static PyObject *
2354create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2355{
2356 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002357
2358 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2359 if (!it)
2360 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002361
Victor Stinner4d463432013-07-11 23:05:03 +02002362 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002363 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002364 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002365 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002366 it->root_element = self;
2367
Eli Bendersky64d11e62012-06-15 07:42:50 +03002368 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002369
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002370 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002371 if (it->parent_stack == NULL) {
2372 Py_DECREF(it);
2373 PyErr_NoMemory();
2374 return NULL;
2375 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002376 it->parent_stack_used = 0;
2377 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002378
Eli Bendersky64d11e62012-06-15 07:42:50 +03002379 return (PyObject *)it;
2380}
2381
2382
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383/* ==================================================================== */
2384/* the tree builder type */
2385
2386typedef struct {
2387 PyObject_HEAD
2388
Eli Bendersky58d548d2012-05-29 15:45:16 +03002389 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390
Antoine Pitrouee329312012-10-04 19:53:29 +02002391 PyObject *this; /* current node */
2392 PyObject *last; /* most recently created node */
Stefan Behnelbb697892019-07-24 20:46:01 +02002393 PyObject *last_for_tail; /* most recently created node that takes a tail */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394
Eli Bendersky58d548d2012-05-29 15:45:16 +03002395 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396
Eli Bendersky58d548d2012-05-29 15:45:16 +03002397 PyObject *stack; /* element stack */
2398 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399
Eli Bendersky48d358b2012-05-30 17:57:50 +03002400 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002401 PyObject *comment_factory;
2402 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002405 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002406 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2407 PyObject *end_event_obj;
2408 PyObject *start_ns_event_obj;
2409 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002410 PyObject *comment_event_obj;
2411 PyObject *pi_event_obj;
2412
2413 char insert_comments;
2414 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415} TreeBuilderObject;
2416
Christian Heimes90aa7642007-12-19 02:45:37 +00002417#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418
2419/* -------------------------------------------------------------------- */
2420/* constructor and destructor */
2421
Eli Bendersky58d548d2012-05-29 15:45:16 +03002422static PyObject *
2423treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002425 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2426 if (t != NULL) {
2427 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002428
Eli Bendersky58d548d2012-05-29 15:45:16 +03002429 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002430 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002432 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002433
Eli Bendersky58d548d2012-05-29 15:45:16 +03002434 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002435 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002436 t->comment_factory = NULL;
2437 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002438 t->stack = PyList_New(20);
2439 if (!t->stack) {
2440 Py_DECREF(t->this);
2441 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002442 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002443 return NULL;
2444 }
2445 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002447 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002448 t->start_event_obj = t->end_event_obj = NULL;
2449 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002450 t->comment_event_obj = t->pi_event_obj = NULL;
2451 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002452 }
2453 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454}
2455
Serhiy Storchakacb985562015-05-04 15:32:48 +03002456/*[clinic input]
2457_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002458
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002459 element_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002460 *
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002461 comment_factory: object = None
2462 pi_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002463 insert_comments: bool = False
2464 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002465
2466[clinic start generated code]*/
2467
2468static int
2469_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002470 PyObject *element_factory,
2471 PyObject *comment_factory,
2472 PyObject *pi_factory,
2473 int insert_comments, int insert_pis)
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002474/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002475{
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002476 if (element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002477 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002478 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002479 } else {
2480 Py_CLEAR(self->element_factory);
2481 }
2482
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002483 if (comment_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002484 elementtreestate *st = ET_STATE_GLOBAL;
2485 comment_factory = st->comment_factory;
2486 }
2487 if (comment_factory) {
2488 Py_INCREF(comment_factory);
2489 Py_XSETREF(self->comment_factory, comment_factory);
2490 self->insert_comments = insert_comments;
2491 } else {
2492 Py_CLEAR(self->comment_factory);
2493 self->insert_comments = 0;
2494 }
2495
Serhiy Storchakad322abb2019-09-14 13:31:50 +03002496 if (pi_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002497 elementtreestate *st = ET_STATE_GLOBAL;
2498 pi_factory = st->pi_factory;
2499 }
2500 if (pi_factory) {
2501 Py_INCREF(pi_factory);
2502 Py_XSETREF(self->pi_factory, pi_factory);
2503 self->insert_pis = insert_pis;
2504 } else {
2505 Py_CLEAR(self->pi_factory);
2506 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002507 }
2508
Eli Bendersky58d548d2012-05-29 15:45:16 +03002509 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510}
2511
Eli Bendersky48d358b2012-05-30 17:57:50 +03002512static int
2513treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2514{
Stefan Behnel43851a22019-05-01 21:20:38 +02002515 Py_VISIT(self->pi_event_obj);
2516 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002517 Py_VISIT(self->end_ns_event_obj);
2518 Py_VISIT(self->start_ns_event_obj);
2519 Py_VISIT(self->end_event_obj);
2520 Py_VISIT(self->start_event_obj);
2521 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002522 Py_VISIT(self->root);
2523 Py_VISIT(self->this);
2524 Py_VISIT(self->last);
Stefan Behnelbb697892019-07-24 20:46:01 +02002525 Py_VISIT(self->last_for_tail);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002526 Py_VISIT(self->data);
2527 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002528 Py_VISIT(self->pi_factory);
2529 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002530 Py_VISIT(self->element_factory);
2531 return 0;
2532}
2533
2534static int
2535treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536{
Stefan Behnel43851a22019-05-01 21:20:38 +02002537 Py_CLEAR(self->pi_event_obj);
2538 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002539 Py_CLEAR(self->end_ns_event_obj);
2540 Py_CLEAR(self->start_ns_event_obj);
2541 Py_CLEAR(self->end_event_obj);
2542 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002543 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002544 Py_CLEAR(self->stack);
2545 Py_CLEAR(self->data);
2546 Py_CLEAR(self->last);
Stefan Behnelbb697892019-07-24 20:46:01 +02002547 Py_CLEAR(self->last_for_tail);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002548 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002549 Py_CLEAR(self->pi_factory);
2550 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002551 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002552 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002553 return 0;
2554}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555
Eli Bendersky48d358b2012-05-30 17:57:50 +03002556static void
2557treebuilder_dealloc(TreeBuilderObject *self)
2558{
2559 PyObject_GC_UnTrack(self);
2560 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002561 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562}
2563
2564/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002565/* helpers for handling of arbitrary element-like objects */
2566
Stefan Behnel43851a22019-05-01 21:20:38 +02002567/*[clinic input]
2568_elementtree._set_factories
2569
2570 comment_factory: object
2571 pi_factory: object
2572 /
2573
2574Change the factories used to create comments and processing instructions.
2575
2576For internal use only.
2577[clinic start generated code]*/
2578
2579static PyObject *
2580_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2581 PyObject *pi_factory)
2582/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2583{
2584 elementtreestate *st = ET_STATE_GLOBAL;
2585 PyObject *old;
2586
2587 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2588 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2589 Py_TYPE(comment_factory)->tp_name);
2590 return NULL;
2591 }
2592 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2593 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2594 Py_TYPE(pi_factory)->tp_name);
2595 return NULL;
2596 }
2597
2598 old = PyTuple_Pack(2,
2599 st->comment_factory ? st->comment_factory : Py_None,
2600 st->pi_factory ? st->pi_factory : Py_None);
2601
2602 if (comment_factory == Py_None) {
2603 Py_CLEAR(st->comment_factory);
2604 } else {
2605 Py_INCREF(comment_factory);
2606 Py_XSETREF(st->comment_factory, comment_factory);
2607 }
2608 if (pi_factory == Py_None) {
2609 Py_CLEAR(st->pi_factory);
2610 } else {
2611 Py_INCREF(pi_factory);
2612 Py_XSETREF(st->pi_factory, pi_factory);
2613 }
2614
2615 return old;
2616}
2617
Antoine Pitrouee329312012-10-04 19:53:29 +02002618static int
Stefan Behnelbb697892019-07-24 20:46:01 +02002619treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2620 PyObject **dest, _Py_Identifier *name)
Antoine Pitrouee329312012-10-04 19:53:29 +02002621{
Stefan Behnelbb697892019-07-24 20:46:01 +02002622 /* Fast paths for the "almost always" cases. */
Antoine Pitrouee329312012-10-04 19:53:29 +02002623 if (Element_CheckExact(element)) {
Stefan Behnelbb697892019-07-24 20:46:01 +02002624 PyObject *dest_obj = JOIN_OBJ(*dest);
2625 if (dest_obj == Py_None) {
2626 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2627 *data = NULL;
2628 Py_DECREF(dest_obj);
2629 return 0;
2630 }
2631 else if (JOIN_GET(*dest)) {
2632 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2633 return -1;
2634 }
2635 Py_CLEAR(*data);
2636 return 0;
2637 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002638 }
Stefan Behnelbb697892019-07-24 20:46:01 +02002639
2640 /* Fallback for the non-Element / non-trivial cases. */
2641 {
Antoine Pitrouee329312012-10-04 19:53:29 +02002642 int r;
Stefan Behnelbb697892019-07-24 20:46:01 +02002643 PyObject* joined;
2644 PyObject* previous = _PyObject_GetAttrId(element, name);
2645 if (!previous)
Antoine Pitrouee329312012-10-04 19:53:29 +02002646 return -1;
Stefan Behnelbb697892019-07-24 20:46:01 +02002647 joined = list_join(*data);
2648 if (!joined) {
2649 Py_DECREF(previous);
2650 return -1;
2651 }
2652 if (previous != Py_None) {
2653 PyObject *tmp = PyNumber_Add(previous, joined);
2654 Py_DECREF(joined);
2655 Py_DECREF(previous);
2656 if (!tmp)
2657 return -1;
2658 joined = tmp;
2659 } else {
2660 Py_DECREF(previous);
2661 }
2662
Antoine Pitrouee329312012-10-04 19:53:29 +02002663 r = _PyObject_SetAttrId(element, name, joined);
2664 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002665 if (r < 0)
2666 return -1;
2667 Py_CLEAR(*data);
2668 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002669 }
2670}
2671
Serhiy Storchaka576def02017-03-30 09:47:31 +03002672LOCAL(int)
2673treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002674{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002675 if (!self->data) {
2676 return 0;
2677 }
2678
Stefan Behnelbb697892019-07-24 20:46:01 +02002679 if (!self->last_for_tail) {
2680 PyObject *element = self->last;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002681 _Py_IDENTIFIER(text);
Stefan Behnelbb697892019-07-24 20:46:01 +02002682 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002683 element, &self->data,
2684 &((ElementObject *) element)->text, &PyId_text);
2685 }
2686 else {
Stefan Behnelbb697892019-07-24 20:46:01 +02002687 PyObject *element = self->last_for_tail;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002688 _Py_IDENTIFIER(tail);
Stefan Behnelbb697892019-07-24 20:46:01 +02002689 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002690 element, &self->data,
2691 &((ElementObject *) element)->tail, &PyId_tail);
2692 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002693}
2694
2695static int
2696treebuilder_add_subelement(PyObject *element, PyObject *child)
2697{
2698 _Py_IDENTIFIER(append);
2699 if (Element_CheckExact(element)) {
2700 ElementObject *elem = (ElementObject *) element;
2701 return element_add_subelement(elem, child);
2702 }
2703 else {
2704 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002705 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002706 if (res == NULL)
2707 return -1;
2708 Py_DECREF(res);
2709 return 0;
2710 }
2711}
2712
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002713LOCAL(int)
2714treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2715 PyObject *node)
2716{
2717 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002718 PyObject *res;
2719 PyObject *event = PyTuple_Pack(2, action, node);
2720 if (event == NULL)
2721 return -1;
Stefan Behnel43851a22019-05-01 21:20:38 +02002722 res = _PyObject_FastCall(self->events_append, &event, 1);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002723 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002724 if (res == NULL)
2725 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002726 Py_DECREF(res);
2727 }
2728 return 0;
2729}
2730
Antoine Pitrouee329312012-10-04 19:53:29 +02002731/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732/* handlers */
2733
2734LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2736 PyObject* attrib)
2737{
2738 PyObject* node;
2739 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002740 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741
Serhiy Storchaka576def02017-03-30 09:47:31 +03002742 if (treebuilder_flush_data(self) < 0) {
2743 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 }
2745
Stefan Behnel43851a22019-05-01 21:20:38 +02002746 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002747 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002748 } else if (attrib == Py_None) {
2749 attrib = PyDict_New();
2750 if (!attrib)
2751 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002752 node = PyObject_CallFunctionObjArgs(self->element_factory,
2753 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002754 Py_DECREF(attrib);
2755 }
2756 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002757 node = PyObject_CallFunctionObjArgs(self->element_factory,
2758 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002759 }
2760 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002762 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763
Antoine Pitrouee329312012-10-04 19:53:29 +02002764 this = self->this;
Stefan Behnelbb697892019-07-24 20:46:01 +02002765 Py_CLEAR(self->last_for_tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766
2767 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002768 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002769 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 } else {
2771 if (self->root) {
2772 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002773 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774 "multiple elements on top level"
2775 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002776 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777 }
2778 Py_INCREF(node);
2779 self->root = node;
2780 }
2781
2782 if (self->index < PyList_GET_SIZE(self->stack)) {
2783 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002784 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785 Py_INCREF(this);
2786 } else {
2787 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002788 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 }
2790 self->index++;
2791
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002793 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002795 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002797 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2798 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799
2800 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002801
2802 error:
2803 Py_DECREF(node);
2804 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805}
2806
2807LOCAL(PyObject*)
2808treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2809{
2810 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002811 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002812 /* ignore calls to data before the first call to start */
2813 Py_RETURN_NONE;
2814 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 /* store the first item as is */
2816 Py_INCREF(data); self->data = data;
2817 } else {
2818 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002819 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2820 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002821 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822 /* expat often generates single character data sections; handle
2823 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002824 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2825 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002827 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 } else if (PyList_CheckExact(self->data)) {
2829 if (PyList_Append(self->data, data) < 0)
2830 return NULL;
2831 } else {
2832 PyObject* list = PyList_New(2);
2833 if (!list)
2834 return NULL;
2835 PyList_SET_ITEM(list, 0, self->data);
2836 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2837 self->data = list;
2838 }
2839 }
2840
2841 Py_RETURN_NONE;
2842}
2843
2844LOCAL(PyObject*)
2845treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2846{
2847 PyObject* item;
2848
Serhiy Storchaka576def02017-03-30 09:47:31 +03002849 if (treebuilder_flush_data(self) < 0) {
2850 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002851 }
2852
2853 if (self->index == 0) {
2854 PyErr_SetString(
2855 PyExc_IndexError,
2856 "pop from empty stack"
2857 );
2858 return NULL;
2859 }
2860
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002861 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002862 self->last = self->this;
Stefan Behnelbb697892019-07-24 20:46:01 +02002863 Py_INCREF(self->last);
2864 Py_XSETREF(self->last_for_tail, self->last);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002865 self->index--;
2866 self->this = PyList_GET_ITEM(self->stack, self->index);
2867 Py_INCREF(self->this);
2868 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002870 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2871 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872
2873 Py_INCREF(self->last);
2874 return (PyObject*) self->last;
2875}
2876
Stefan Behnel43851a22019-05-01 21:20:38 +02002877LOCAL(PyObject*)
2878treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2879{
Stefan Behnelbb697892019-07-24 20:46:01 +02002880 PyObject* comment;
Stefan Behnel43851a22019-05-01 21:20:38 +02002881 PyObject* this;
2882
2883 if (treebuilder_flush_data(self) < 0) {
2884 return NULL;
2885 }
2886
2887 if (self->comment_factory) {
2888 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2889 if (!comment)
2890 return NULL;
2891
2892 this = self->this;
2893 if (self->insert_comments && this != Py_None) {
2894 if (treebuilder_add_subelement(this, comment) < 0)
2895 goto error;
Stefan Behnelbb697892019-07-24 20:46:01 +02002896 Py_INCREF(comment);
2897 Py_XSETREF(self->last_for_tail, comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02002898 }
2899 } else {
2900 Py_INCREF(text);
2901 comment = text;
2902 }
2903
2904 if (self->events_append && self->comment_event_obj) {
2905 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2906 goto error;
2907 }
2908
2909 return comment;
2910
2911 error:
2912 Py_DECREF(comment);
2913 return NULL;
2914}
2915
2916LOCAL(PyObject*)
2917treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2918{
Stefan Behnelbb697892019-07-24 20:46:01 +02002919 PyObject* pi;
Stefan Behnel43851a22019-05-01 21:20:38 +02002920 PyObject* this;
2921 PyObject* stack[2] = {target, text};
2922
2923 if (treebuilder_flush_data(self) < 0) {
2924 return NULL;
2925 }
2926
2927 if (self->pi_factory) {
2928 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2929 if (!pi) {
2930 return NULL;
2931 }
2932
2933 this = self->this;
2934 if (self->insert_pis && this != Py_None) {
2935 if (treebuilder_add_subelement(this, pi) < 0)
2936 goto error;
Stefan Behnelbb697892019-07-24 20:46:01 +02002937 Py_INCREF(pi);
2938 Py_XSETREF(self->last_for_tail, pi);
Stefan Behnel43851a22019-05-01 21:20:38 +02002939 }
2940 } else {
2941 pi = PyTuple_Pack(2, target, text);
2942 if (!pi) {
2943 return NULL;
2944 }
2945 }
2946
2947 if (self->events_append && self->pi_event_obj) {
2948 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2949 goto error;
2950 }
2951
2952 return pi;
2953
2954 error:
2955 Py_DECREF(pi);
2956 return NULL;
2957}
2958
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002959LOCAL(PyObject*)
2960treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2961{
2962 PyObject* parcel;
2963
2964 if (self->events_append && self->start_ns_event_obj) {
2965 parcel = PyTuple_Pack(2, prefix, uri);
2966 if (!parcel) {
2967 return NULL;
2968 }
2969
2970 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2971 Py_DECREF(parcel);
2972 return NULL;
2973 }
2974 Py_DECREF(parcel);
2975 }
2976
2977 Py_RETURN_NONE;
2978}
2979
2980LOCAL(PyObject*)
2981treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2982{
2983 if (self->events_append && self->end_ns_event_obj) {
2984 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2985 return NULL;
2986 }
2987 }
2988
2989 Py_RETURN_NONE;
2990}
2991
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992/* -------------------------------------------------------------------- */
2993/* methods (in alphabetical order) */
2994
Serhiy Storchakacb985562015-05-04 15:32:48 +03002995/*[clinic input]
2996_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997
Serhiy Storchakacb985562015-05-04 15:32:48 +03002998 data: object
2999 /
3000
3001[clinic start generated code]*/
3002
3003static PyObject *
3004_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
3005/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
3006{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007 return treebuilder_handle_data(self, data);
3008}
3009
Serhiy Storchakacb985562015-05-04 15:32:48 +03003010/*[clinic input]
3011_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012
Serhiy Storchakacb985562015-05-04 15:32:48 +03003013 tag: object
3014 /
3015
3016[clinic start generated code]*/
3017
3018static PyObject *
3019_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
3020/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
3021{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 return treebuilder_handle_end(self, tag);
3023}
3024
Stefan Behnel43851a22019-05-01 21:20:38 +02003025/*[clinic input]
3026_elementtree.TreeBuilder.comment
3027
3028 text: object
3029 /
3030
3031[clinic start generated code]*/
3032
3033static PyObject *
3034_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
3035/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
3036{
3037 return treebuilder_handle_comment(self, text);
3038}
3039
3040/*[clinic input]
3041_elementtree.TreeBuilder.pi
3042
3043 target: object
3044 text: object = None
3045 /
3046
3047[clinic start generated code]*/
3048
3049static PyObject *
3050_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3051 PyObject *text)
3052/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3053{
3054 return treebuilder_handle_pi(self, target, text);
3055}
3056
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057LOCAL(PyObject*)
3058treebuilder_done(TreeBuilderObject* self)
3059{
3060 PyObject* res;
3061
3062 /* FIXME: check stack size? */
3063
3064 if (self->root)
3065 res = self->root;
3066 else
3067 res = Py_None;
3068
3069 Py_INCREF(res);
3070 return res;
3071}
3072
Serhiy Storchakacb985562015-05-04 15:32:48 +03003073/*[clinic input]
3074_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075
Serhiy Storchakacb985562015-05-04 15:32:48 +03003076[clinic start generated code]*/
3077
3078static PyObject *
3079_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3080/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3081{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082 return treebuilder_done(self);
3083}
3084
Serhiy Storchakacb985562015-05-04 15:32:48 +03003085/*[clinic input]
3086_elementtree.TreeBuilder.start
3087
3088 tag: object
3089 attrs: object = None
3090 /
3091
3092[clinic start generated code]*/
3093
3094static PyObject *
3095_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3096 PyObject *attrs)
3097/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003099 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100}
3101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102/* ==================================================================== */
3103/* the expat interface */
3104
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003107
3108/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3109 * cached globally without being in per-module state.
3110 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003111static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003112#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113
Eli Bendersky52467b12012-06-01 07:13:08 +03003114static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3115 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117typedef struct {
3118 PyObject_HEAD
3119
3120 XML_Parser parser;
3121
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003122 PyObject *target;
3123 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003125 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003127 PyObject *handle_start_ns;
3128 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003129 PyObject *handle_start;
3130 PyObject *handle_data;
3131 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003133 PyObject *handle_comment;
3134 PyObject *handle_pi;
3135 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003136
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003137 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003138
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003139} XMLParserObject;
3140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003141/* helpers */
3142
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003143LOCAL(PyObject*)
3144makeuniversal(XMLParserObject* self, const char* string)
3145{
3146 /* convert a UTF-8 tag/attribute name from the expat parser
3147 to a universal name string */
3148
Antoine Pitrouc1948842012-10-01 23:40:37 +02003149 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003150 PyObject* key;
3151 PyObject* value;
3152
3153 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003154 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003155 if (!key)
3156 return NULL;
3157
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003158 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159
3160 if (value) {
3161 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003162 }
3163 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164 /* new name. convert to universal name, and decode as
3165 necessary */
3166
3167 PyObject* tag;
3168 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003169 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170
3171 /* look for namespace separator */
3172 for (i = 0; i < size; i++)
3173 if (string[i] == '}')
3174 break;
3175 if (i != size) {
3176 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003177 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003178 if (tag == NULL) {
3179 Py_DECREF(key);
3180 return NULL;
3181 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003182 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003183 p[0] = '{';
3184 memcpy(p+1, string, size);
3185 size++;
3186 } else {
3187 /* plain name; use key as tag */
3188 Py_INCREF(key);
3189 tag = key;
3190 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003191
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003193 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003194 value = PyUnicode_DecodeUTF8(p, size, "strict");
3195 Py_DECREF(tag);
3196 if (!value) {
3197 Py_DECREF(key);
3198 return NULL;
3199 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200
3201 /* add to names dictionary */
3202 if (PyDict_SetItem(self->names, key, value) < 0) {
3203 Py_DECREF(key);
3204 Py_DECREF(value);
3205 return NULL;
3206 }
3207 }
3208
3209 Py_DECREF(key);
3210 return value;
3211}
3212
Eli Bendersky5b77d812012-03-16 08:20:05 +02003213/* Set the ParseError exception with the given parameters.
3214 * If message is not NULL, it's used as the error string. Otherwise, the
3215 * message string is the default for the given error_code.
3216*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003217static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003218expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3219 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003220{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003221 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003222 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003223
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003224 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003225 message ? message : EXPAT(ErrorString)(error_code),
3226 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003227 if (errmsg == NULL)
3228 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003229
Stefan Behnel43851a22019-05-01 21:20:38 +02003230 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003231 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003232 if (!error)
3233 return;
3234
Eli Bendersky5b77d812012-03-16 08:20:05 +02003235 /* Add code and position attributes */
3236 code = PyLong_FromLong((long)error_code);
3237 if (!code) {
3238 Py_DECREF(error);
3239 return;
3240 }
3241 if (PyObject_SetAttrString(error, "code", code) == -1) {
3242 Py_DECREF(error);
3243 Py_DECREF(code);
3244 return;
3245 }
3246 Py_DECREF(code);
3247
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003248 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003249 if (!position) {
3250 Py_DECREF(error);
3251 return;
3252 }
3253 if (PyObject_SetAttrString(error, "position", position) == -1) {
3254 Py_DECREF(error);
3255 Py_DECREF(position);
3256 return;
3257 }
3258 Py_DECREF(position);
3259
Eli Bendersky532d03e2013-08-10 08:00:39 -07003260 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003261 Py_DECREF(error);
3262}
3263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264/* -------------------------------------------------------------------- */
3265/* handlers */
3266
3267static void
3268expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3269 int data_len)
3270{
3271 PyObject* key;
3272 PyObject* value;
3273 PyObject* res;
3274
3275 if (data_len < 2 || data_in[0] != '&')
3276 return;
3277
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003278 if (PyErr_Occurred())
3279 return;
3280
Neal Norwitz0269b912007-08-08 06:56:02 +00003281 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 if (!key)
3283 return;
3284
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003285 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286
3287 if (value) {
3288 if (TreeBuilder_CheckExact(self->target))
3289 res = treebuilder_handle_data(
3290 (TreeBuilderObject*) self->target, value
3291 );
3292 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003293 res = _PyObject_FastCall(self->handle_data, &value, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 else
3295 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003297 } else if (!PyErr_Occurred()) {
3298 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003299 char message[128] = "undefined entity ";
3300 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003301 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003302 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003304 EXPAT(GetErrorColumnNumber)(self->parser),
3305 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 );
3307 }
3308
3309 Py_DECREF(key);
3310}
3311
3312static void
3313expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3314 const XML_Char **attrib_in)
3315{
3316 PyObject* res;
3317 PyObject* tag;
3318 PyObject* attrib;
3319 int ok;
3320
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003321 if (PyErr_Occurred())
3322 return;
3323
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 /* tag name */
3325 tag = makeuniversal(self, tag_in);
3326 if (!tag)
3327 return; /* parser will look for errors */
3328
3329 /* attributes */
3330 if (attrib_in[0]) {
3331 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003332 if (!attrib) {
3333 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003335 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336 while (attrib_in[0] && attrib_in[1]) {
3337 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003338 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 if (!key || !value) {
3340 Py_XDECREF(value);
3341 Py_XDECREF(key);
3342 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003343 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344 return;
3345 }
3346 ok = PyDict_SetItem(attrib, key, value);
3347 Py_DECREF(value);
3348 Py_DECREF(key);
3349 if (ok < 0) {
3350 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003351 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352 return;
3353 }
3354 attrib_in += 2;
3355 }
3356 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003357 Py_INCREF(Py_None);
3358 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003359 }
3360
3361 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362 /* shortcut */
3363 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3364 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003365 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003366 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003367 if (attrib == Py_None) {
3368 Py_DECREF(attrib);
3369 attrib = PyDict_New();
3370 if (!attrib) {
3371 Py_DECREF(tag);
3372 return;
3373 }
3374 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003375 res = PyObject_CallFunctionObjArgs(self->handle_start,
3376 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003377 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 res = NULL;
3379
3380 Py_DECREF(tag);
3381 Py_DECREF(attrib);
3382
3383 Py_XDECREF(res);
3384}
3385
3386static void
3387expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3388 int data_len)
3389{
3390 PyObject* data;
3391 PyObject* res;
3392
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003393 if (PyErr_Occurred())
3394 return;
3395
Neal Norwitz0269b912007-08-08 06:56:02 +00003396 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003397 if (!data)
3398 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399
3400 if (TreeBuilder_CheckExact(self->target))
3401 /* shortcut */
3402 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3403 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003404 res = _PyObject_FastCall(self->handle_data, &data, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 else
3406 res = NULL;
3407
3408 Py_DECREF(data);
3409
3410 Py_XDECREF(res);
3411}
3412
3413static void
3414expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3415{
3416 PyObject* tag;
3417 PyObject* res = NULL;
3418
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003419 if (PyErr_Occurred())
3420 return;
3421
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422 if (TreeBuilder_CheckExact(self->target))
3423 /* shortcut */
3424 /* the standard tree builder doesn't look at the end tag */
3425 res = treebuilder_handle_end(
3426 (TreeBuilderObject*) self->target, Py_None
3427 );
3428 else if (self->handle_end) {
3429 tag = makeuniversal(self, tag_in);
3430 if (tag) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003431 res = _PyObject_FastCall(self->handle_end, &tag, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432 Py_DECREF(tag);
3433 }
3434 }
3435
3436 Py_XDECREF(res);
3437}
3438
3439static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003440expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3441 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003443 PyObject* res = NULL;
3444 PyObject* uri;
3445 PyObject* prefix;
3446 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003447
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003448 if (PyErr_Occurred())
3449 return;
3450
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003451 if (!uri_in)
3452 uri_in = "";
3453 if (!prefix_in)
3454 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003455
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003456 if (TreeBuilder_CheckExact(self->target)) {
3457 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3458 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003459
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003460 if (target->events_append && target->start_ns_event_obj) {
3461 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3462 if (!prefix)
3463 return;
3464 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3465 if (!uri) {
3466 Py_DECREF(prefix);
3467 return;
3468 }
3469
3470 res = treebuilder_handle_start_ns(target, prefix, uri);
3471 Py_DECREF(uri);
3472 Py_DECREF(prefix);
3473 }
3474 } else if (self->handle_start_ns) {
3475 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3476 if (!prefix)
3477 return;
3478 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3479 if (!uri) {
3480 Py_DECREF(prefix);
3481 return;
3482 }
3483
3484 stack[0] = prefix;
3485 stack[1] = uri;
3486 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3487 Py_DECREF(uri);
3488 Py_DECREF(prefix);
3489 }
3490
3491 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492}
3493
3494static void
3495expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3496{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003497 PyObject *res = NULL;
3498 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003499
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003500 if (PyErr_Occurred())
3501 return;
3502
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003503 if (!prefix_in)
3504 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003505
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003506 if (TreeBuilder_CheckExact(self->target)) {
3507 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3508 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3509
3510 if (target->events_append && target->end_ns_event_obj) {
3511 res = treebuilder_handle_end_ns(target, Py_None);
3512 }
3513 } else if (self->handle_end_ns) {
3514 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3515 if (!prefix)
3516 return;
3517
3518 res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3519 Py_DECREF(prefix);
3520 }
3521
3522 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523}
3524
3525static void
3526expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3527{
Stefan Behnelbb697892019-07-24 20:46:01 +02003528 PyObject* comment;
3529 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003530
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003531 if (PyErr_Occurred())
3532 return;
3533
Stefan Behnel43851a22019-05-01 21:20:38 +02003534 if (TreeBuilder_CheckExact(self->target)) {
3535 /* shortcut */
3536 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3537
Neal Norwitz0269b912007-08-08 06:56:02 +00003538 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003539 if (!comment)
3540 return; /* parser will look for errors */
3541
3542 res = treebuilder_handle_comment(target, comment);
Stefan Behnelbb697892019-07-24 20:46:01 +02003543 Py_XDECREF(res);
3544 Py_DECREF(comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02003545 } else if (self->handle_comment) {
3546 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3547 if (!comment)
3548 return;
3549
3550 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
Stefan Behnelbb697892019-07-24 20:46:01 +02003551 Py_XDECREF(res);
3552 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003553 }
3554}
3555
Eli Bendersky45839902013-01-13 05:14:47 -08003556static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003557expat_start_doctype_handler(XMLParserObject *self,
3558 const XML_Char *doctype_name,
3559 const XML_Char *sysid,
3560 const XML_Char *pubid,
3561 int has_internal_subset)
3562{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003563 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003564 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003565 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003566
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003567 if (PyErr_Occurred())
3568 return;
3569
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003570 doctype_name_obj = makeuniversal(self, doctype_name);
3571 if (!doctype_name_obj)
3572 return;
3573
3574 if (sysid) {
3575 sysid_obj = makeuniversal(self, sysid);
3576 if (!sysid_obj) {
3577 Py_DECREF(doctype_name_obj);
3578 return;
3579 }
3580 } else {
3581 Py_INCREF(Py_None);
3582 sysid_obj = Py_None;
3583 }
3584
3585 if (pubid) {
3586 pubid_obj = makeuniversal(self, pubid);
3587 if (!pubid_obj) {
3588 Py_DECREF(doctype_name_obj);
3589 Py_DECREF(sysid_obj);
3590 return;
3591 }
3592 } else {
3593 Py_INCREF(Py_None);
3594 pubid_obj = Py_None;
3595 }
3596
3597 /* If the target has a handler for doctype, call it. */
3598 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003599 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3600 doctype_name_obj, pubid_obj,
3601 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003602 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003603 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003604 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3605 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3606 "The doctype() method of XMLParser is ignored. "
3607 "Define doctype() method on the TreeBuilder target.",
3608 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003609 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003610 }
3611
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003612 Py_DECREF(doctype_name_obj);
3613 Py_DECREF(pubid_obj);
3614 Py_DECREF(sysid_obj);
3615}
3616
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003617static void
3618expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3619 const XML_Char* data_in)
3620{
Stefan Behnelbb697892019-07-24 20:46:01 +02003621 PyObject* pi_target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622 PyObject* data;
3623 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003624 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003626 if (PyErr_Occurred())
3627 return;
3628
Stefan Behnel43851a22019-05-01 21:20:38 +02003629 if (TreeBuilder_CheckExact(self->target)) {
3630 /* shortcut */
3631 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3632
Stefan Behnelbb697892019-07-24 20:46:01 +02003633 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003634 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3635 if (!pi_target)
3636 goto error;
3637 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3638 if (!data)
3639 goto error;
3640 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641 Py_XDECREF(res);
3642 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003643 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003645 } else if (self->handle_pi) {
3646 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3647 if (!pi_target)
3648 goto error;
3649 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3650 if (!data)
3651 goto error;
3652
3653 stack[0] = pi_target;
3654 stack[1] = data;
3655 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3656 Py_XDECREF(res);
3657 Py_DECREF(data);
3658 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003660
3661 return;
3662
3663 error:
3664 Py_XDECREF(pi_target);
3665 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666}
3667
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669
Eli Bendersky52467b12012-06-01 07:13:08 +03003670static PyObject *
3671xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672{
Eli Bendersky52467b12012-06-01 07:13:08 +03003673 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3674 if (self) {
3675 self->parser = NULL;
3676 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003677 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003678 self->handle_start = self->handle_data = self->handle_end = NULL;
3679 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003680 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003682 return (PyObject *)self;
3683}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684
scoderc8d8e152017-09-14 22:00:03 +02003685static int
3686ignore_attribute_error(PyObject *value)
3687{
3688 if (value == NULL) {
3689 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3690 return -1;
3691 }
3692 PyErr_Clear();
3693 }
3694 return 0;
3695}
3696
Serhiy Storchakacb985562015-05-04 15:32:48 +03003697/*[clinic input]
3698_elementtree.XMLParser.__init__
3699
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003700 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003701 target: object = NULL
Serhiy Storchakad322abb2019-09-14 13:31:50 +03003702 encoding: str(accept={str, NoneType}) = None
Serhiy Storchakacb985562015-05-04 15:32:48 +03003703
3704[clinic start generated code]*/
3705
Eli Bendersky52467b12012-06-01 07:13:08 +03003706static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003707_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3708 const char *encoding)
Serhiy Storchakad322abb2019-09-14 13:31:50 +03003709/*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003710{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003711 self->entity = PyDict_New();
3712 if (!self->entity)
3713 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003714
Serhiy Storchakacb985562015-05-04 15:32:48 +03003715 self->names = PyDict_New();
3716 if (!self->names) {
3717 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003718 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003719 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003720
Serhiy Storchakacb985562015-05-04 15:32:48 +03003721 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3722 if (!self->parser) {
3723 Py_CLEAR(self->entity);
3724 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003725 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003726 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003727 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003728 /* expat < 2.1.0 has no XML_SetHashSalt() */
3729 if (EXPAT(SetHashSalt) != NULL) {
3730 EXPAT(SetHashSalt)(self->parser,
3731 (unsigned long)_Py_HashSecret.expat.hashsalt);
3732 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733
Eli Bendersky52467b12012-06-01 07:13:08 +03003734 if (target) {
3735 Py_INCREF(target);
3736 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003737 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003739 Py_CLEAR(self->entity);
3740 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003741 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003742 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003743 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003744 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003745
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003746 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3747 if (ignore_attribute_error(self->handle_start_ns)) {
3748 return -1;
3749 }
3750 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3751 if (ignore_attribute_error(self->handle_end_ns)) {
3752 return -1;
3753 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003754 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003755 if (ignore_attribute_error(self->handle_start)) {
3756 return -1;
3757 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003758 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003759 if (ignore_attribute_error(self->handle_data)) {
3760 return -1;
3761 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003762 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003763 if (ignore_attribute_error(self->handle_end)) {
3764 return -1;
3765 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003766 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003767 if (ignore_attribute_error(self->handle_comment)) {
3768 return -1;
3769 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003770 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003771 if (ignore_attribute_error(self->handle_pi)) {
3772 return -1;
3773 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003774 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003775 if (ignore_attribute_error(self->handle_close)) {
3776 return -1;
3777 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003778 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003779 if (ignore_attribute_error(self->handle_doctype)) {
3780 return -1;
3781 }
Eli Bendersky45839902013-01-13 05:14:47 -08003782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003783 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003784 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003785 if (self->handle_start_ns || self->handle_end_ns)
3786 EXPAT(SetNamespaceDeclHandler)(
3787 self->parser,
3788 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3789 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3790 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003791 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003792 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003793 (XML_StartElementHandler) expat_start_handler,
3794 (XML_EndElementHandler) expat_end_handler
3795 );
3796 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003797 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003798 (XML_DefaultHandler) expat_default_handler
3799 );
3800 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003801 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003802 (XML_CharacterDataHandler) expat_data_handler
3803 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003804 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003805 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003806 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003807 (XML_CommentHandler) expat_comment_handler
3808 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003809 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003810 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003811 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003812 (XML_ProcessingInstructionHandler) expat_pi_handler
3813 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003814 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003815 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003816 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3817 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003818 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003819 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003820 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003821 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003822
Eli Bendersky52467b12012-06-01 07:13:08 +03003823 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003824}
3825
Eli Bendersky52467b12012-06-01 07:13:08 +03003826static int
3827xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3828{
3829 Py_VISIT(self->handle_close);
3830 Py_VISIT(self->handle_pi);
3831 Py_VISIT(self->handle_comment);
3832 Py_VISIT(self->handle_end);
3833 Py_VISIT(self->handle_data);
3834 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003835 Py_VISIT(self->handle_start_ns);
3836 Py_VISIT(self->handle_end_ns);
3837 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003838
3839 Py_VISIT(self->target);
3840 Py_VISIT(self->entity);
3841 Py_VISIT(self->names);
3842
3843 return 0;
3844}
3845
3846static int
3847xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003848{
Victor Stinnere727d412017-09-18 05:29:37 -07003849 if (self->parser != NULL) {
3850 XML_Parser parser = self->parser;
3851 self->parser = NULL;
3852 EXPAT(ParserFree)(parser);
3853 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003854
Antoine Pitrouc1948842012-10-01 23:40:37 +02003855 Py_CLEAR(self->handle_close);
3856 Py_CLEAR(self->handle_pi);
3857 Py_CLEAR(self->handle_comment);
3858 Py_CLEAR(self->handle_end);
3859 Py_CLEAR(self->handle_data);
3860 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003861 Py_CLEAR(self->handle_start_ns);
3862 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003863 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003864
Antoine Pitrouc1948842012-10-01 23:40:37 +02003865 Py_CLEAR(self->target);
3866 Py_CLEAR(self->entity);
3867 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003868
Eli Bendersky52467b12012-06-01 07:13:08 +03003869 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003870}
3871
Eli Bendersky52467b12012-06-01 07:13:08 +03003872static void
3873xmlparser_dealloc(XMLParserObject* self)
3874{
3875 PyObject_GC_UnTrack(self);
3876 xmlparser_gc_clear(self);
3877 Py_TYPE(self)->tp_free((PyObject *)self);
3878}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003879
3880LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003881expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003882{
3883 int ok;
3884
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003885 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003886 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3887
3888 if (PyErr_Occurred())
3889 return NULL;
3890
3891 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003892 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003893 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003894 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003895 EXPAT(GetErrorColumnNumber)(self->parser),
3896 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003897 );
3898 return NULL;
3899 }
3900
3901 Py_RETURN_NONE;
3902}
3903
Serhiy Storchakacb985562015-05-04 15:32:48 +03003904/*[clinic input]
3905_elementtree.XMLParser.close
3906
3907[clinic start generated code]*/
3908
3909static PyObject *
3910_elementtree_XMLParser_close_impl(XMLParserObject *self)
3911/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003912{
3913 /* end feeding data to parser */
3914
3915 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003916 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003917 if (!res)
3918 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003919
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003920 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003921 Py_DECREF(res);
3922 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003923 }
3924 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003925 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003926 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003927 }
3928 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003929 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003930 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003931}
3932
Serhiy Storchakacb985562015-05-04 15:32:48 +03003933/*[clinic input]
3934_elementtree.XMLParser.feed
3935
3936 data: object
3937 /
3938
3939[clinic start generated code]*/
3940
3941static PyObject *
3942_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3943/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003944{
3945 /* feed data to parser */
3946
Serhiy Storchakacb985562015-05-04 15:32:48 +03003947 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003948 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003949 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3950 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003951 return NULL;
3952 if (data_len > INT_MAX) {
3953 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3954 return NULL;
3955 }
3956 /* Explicitly set UTF-8 encoding. Return code ignored. */
3957 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003958 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003959 }
3960 else {
3961 Py_buffer view;
3962 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003963 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003964 return NULL;
3965 if (view.len > INT_MAX) {
3966 PyBuffer_Release(&view);
3967 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3968 return NULL;
3969 }
3970 res = expat_parse(self, view.buf, (int)view.len, 0);
3971 PyBuffer_Release(&view);
3972 return res;
3973 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003974}
3975
Serhiy Storchakacb985562015-05-04 15:32:48 +03003976/*[clinic input]
3977_elementtree.XMLParser._parse_whole
3978
3979 file: object
3980 /
3981
3982[clinic start generated code]*/
3983
3984static PyObject *
3985_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3986/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003987{
Eli Benderskya3699232013-05-19 18:47:23 -07003988 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003989 PyObject* reader;
3990 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003991 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003992 PyObject* res;
3993
Serhiy Storchakacb985562015-05-04 15:32:48 +03003994 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003995 if (!reader)
3996 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003997
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003998 /* read from open file object */
3999 for (;;) {
4000
4001 buffer = PyObject_CallFunction(reader, "i", 64*1024);
4002
4003 if (!buffer) {
4004 /* read failed (e.g. due to KeyboardInterrupt) */
4005 Py_DECREF(reader);
4006 return NULL;
4007 }
4008
Eli Benderskyf996e772012-03-16 05:53:30 +02004009 if (PyUnicode_CheckExact(buffer)) {
4010 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01004011 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02004012 Py_DECREF(buffer);
4013 break;
4014 }
4015 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02004016 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02004017 if (!temp) {
4018 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02004019 Py_DECREF(reader);
4020 return NULL;
4021 }
Eli Benderskyf996e772012-03-16 05:53:30 +02004022 buffer = temp;
4023 }
4024 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004025 Py_DECREF(buffer);
4026 break;
4027 }
4028
Serhiy Storchaka26861b02015-02-16 20:52:17 +02004029 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
4030 Py_DECREF(buffer);
4031 Py_DECREF(reader);
4032 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
4033 return NULL;
4034 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004035 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02004036 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004037 );
4038
4039 Py_DECREF(buffer);
4040
4041 if (!res) {
4042 Py_DECREF(reader);
4043 return NULL;
4044 }
4045 Py_DECREF(res);
4046
4047 }
4048
4049 Py_DECREF(reader);
4050
4051 res = expat_parse(self, "", 0, 1);
4052
4053 if (res && TreeBuilder_CheckExact(self->target)) {
4054 Py_DECREF(res);
4055 return treebuilder_done((TreeBuilderObject*) self->target);
4056 }
4057
4058 return res;
4059}
4060
Serhiy Storchakacb985562015-05-04 15:32:48 +03004061/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004062_elementtree.XMLParser._setevents
4063
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004064 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004065 events_to_report: object = None
4066 /
4067
4068[clinic start generated code]*/
4069
4070static PyObject *
4071_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4072 PyObject *events_queue,
4073 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004074/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004075{
4076 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004077 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004078 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004079 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004080
4081 if (!TreeBuilder_CheckExact(self->target)) {
4082 PyErr_SetString(
4083 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004084 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004085 "targets"
4086 );
4087 return NULL;
4088 }
4089
4090 target = (TreeBuilderObject*) self->target;
4091
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004092 events_append = PyObject_GetAttrString(events_queue, "append");
4093 if (events_append == NULL)
4094 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004095 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004096
4097 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004098 Py_CLEAR(target->start_event_obj);
4099 Py_CLEAR(target->end_event_obj);
4100 Py_CLEAR(target->start_ns_event_obj);
4101 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004102 Py_CLEAR(target->comment_event_obj);
4103 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004104
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004105 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004106 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004107 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004108 Py_RETURN_NONE;
4109 }
4110
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004111 if (!(events_seq = PySequence_Fast(events_to_report,
4112 "events must be a sequence"))) {
4113 return NULL;
4114 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004115
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004116 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004117 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004118 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004119 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004120 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004121 } else if (PyBytes_Check(event_name_obj)) {
4122 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004123 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004124 if (event_name == NULL) {
4125 Py_DECREF(events_seq);
4126 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4127 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004128 }
4129
4130 Py_INCREF(event_name_obj);
4131 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004132 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004133 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004134 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004135 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004136 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004137 EXPAT(SetNamespaceDeclHandler)(
4138 self->parser,
4139 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4140 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4141 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004142 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004143 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004144 EXPAT(SetNamespaceDeclHandler)(
4145 self->parser,
4146 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4147 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4148 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004149 } else if (strcmp(event_name, "comment") == 0) {
4150 Py_XSETREF(target->comment_event_obj, event_name_obj);
4151 EXPAT(SetCommentHandler)(
4152 self->parser,
4153 (XML_CommentHandler) expat_comment_handler
4154 );
4155 } else if (strcmp(event_name, "pi") == 0) {
4156 Py_XSETREF(target->pi_event_obj, event_name_obj);
4157 EXPAT(SetProcessingInstructionHandler)(
4158 self->parser,
4159 (XML_ProcessingInstructionHandler) expat_pi_handler
4160 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004161 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004162 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004163 Py_DECREF(events_seq);
4164 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004165 return NULL;
4166 }
4167 }
4168
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004169 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004170 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004171}
4172
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004173static PyMemberDef xmlparser_members[] = {
4174 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4175 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4176 {NULL}
4177};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004178
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004179static PyObject*
4180xmlparser_version_getter(XMLParserObject *self, void *closure)
4181{
4182 return PyUnicode_FromFormat(
4183 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4184 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004185}
4186
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004187static PyGetSetDef xmlparser_getsetlist[] = {
4188 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4189 {NULL},
4190};
4191
Serhiy Storchakacb985562015-05-04 15:32:48 +03004192#include "clinic/_elementtree.c.h"
4193
4194static PyMethodDef element_methods[] = {
4195
4196 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4197
4198 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4199 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4200
4201 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4202 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4203 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4204
4205 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4206 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4207 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4208 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4209
4210 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4211 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4212 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4213
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004214 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004215 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4216
4217 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4218 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4219
4220 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4221
4222 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4223 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4224 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4225 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4226 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4227
4228 {NULL, NULL}
4229};
4230
4231static PyMappingMethods element_as_mapping = {
4232 (lenfunc) element_length,
4233 (binaryfunc) element_subscr,
4234 (objobjargproc) element_ass_subscr,
4235};
4236
Serhiy Storchakadde08152015-11-25 15:28:13 +02004237static PyGetSetDef element_getsetlist[] = {
4238 {"tag",
4239 (getter)element_tag_getter,
4240 (setter)element_tag_setter,
4241 "A string identifying what kind of data this element represents"},
4242 {"text",
4243 (getter)element_text_getter,
4244 (setter)element_text_setter,
4245 "A string of text directly after the start tag, or None"},
4246 {"tail",
4247 (getter)element_tail_getter,
4248 (setter)element_tail_setter,
4249 "A string of text directly after the end tag, or None"},
4250 {"attrib",
4251 (getter)element_attrib_getter,
4252 (setter)element_attrib_setter,
4253 "A dictionary containing the element's attributes"},
4254 {NULL},
4255};
4256
Serhiy Storchakacb985562015-05-04 15:32:48 +03004257static PyTypeObject Element_Type = {
4258 PyVarObject_HEAD_INIT(NULL, 0)
4259 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4260 /* methods */
4261 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004262 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004263 0, /* tp_getattr */
4264 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004265 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004266 (reprfunc)element_repr, /* tp_repr */
4267 0, /* tp_as_number */
4268 &element_as_sequence, /* tp_as_sequence */
4269 &element_as_mapping, /* tp_as_mapping */
4270 0, /* tp_hash */
4271 0, /* tp_call */
4272 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004273 PyObject_GenericGetAttr, /* tp_getattro */
4274 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004275 0, /* tp_as_buffer */
4276 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4277 /* tp_flags */
4278 0, /* tp_doc */
4279 (traverseproc)element_gc_traverse, /* tp_traverse */
4280 (inquiry)element_gc_clear, /* tp_clear */
4281 0, /* tp_richcompare */
4282 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4283 0, /* tp_iter */
4284 0, /* tp_iternext */
4285 element_methods, /* tp_methods */
4286 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004287 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004288 0, /* tp_base */
4289 0, /* tp_dict */
4290 0, /* tp_descr_get */
4291 0, /* tp_descr_set */
4292 0, /* tp_dictoffset */
4293 (initproc)element_init, /* tp_init */
4294 PyType_GenericAlloc, /* tp_alloc */
4295 element_new, /* tp_new */
4296 0, /* tp_free */
4297};
4298
4299static PyMethodDef treebuilder_methods[] = {
4300 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4301 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4302 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004303 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4304 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004305 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4306 {NULL, NULL}
4307};
4308
4309static PyTypeObject TreeBuilder_Type = {
4310 PyVarObject_HEAD_INIT(NULL, 0)
4311 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4312 /* methods */
4313 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004314 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004315 0, /* tp_getattr */
4316 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004317 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004318 0, /* tp_repr */
4319 0, /* tp_as_number */
4320 0, /* tp_as_sequence */
4321 0, /* tp_as_mapping */
4322 0, /* tp_hash */
4323 0, /* tp_call */
4324 0, /* tp_str */
4325 0, /* tp_getattro */
4326 0, /* tp_setattro */
4327 0, /* tp_as_buffer */
4328 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4329 /* tp_flags */
4330 0, /* tp_doc */
4331 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4332 (inquiry)treebuilder_gc_clear, /* tp_clear */
4333 0, /* tp_richcompare */
4334 0, /* tp_weaklistoffset */
4335 0, /* tp_iter */
4336 0, /* tp_iternext */
4337 treebuilder_methods, /* tp_methods */
4338 0, /* tp_members */
4339 0, /* tp_getset */
4340 0, /* tp_base */
4341 0, /* tp_dict */
4342 0, /* tp_descr_get */
4343 0, /* tp_descr_set */
4344 0, /* tp_dictoffset */
4345 _elementtree_TreeBuilder___init__, /* tp_init */
4346 PyType_GenericAlloc, /* tp_alloc */
4347 treebuilder_new, /* tp_new */
4348 0, /* tp_free */
4349};
4350
4351static PyMethodDef xmlparser_methods[] = {
4352 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4353 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4354 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4355 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004356 {NULL, NULL}
4357};
4358
Neal Norwitz227b5332006-03-22 09:28:35 +00004359static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004360 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004361 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004362 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004363 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004364 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004365 0, /* tp_getattr */
4366 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004367 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004368 0, /* tp_repr */
4369 0, /* tp_as_number */
4370 0, /* tp_as_sequence */
4371 0, /* tp_as_mapping */
4372 0, /* tp_hash */
4373 0, /* tp_call */
4374 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004375 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004376 0, /* tp_setattro */
4377 0, /* tp_as_buffer */
4378 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4379 /* tp_flags */
4380 0, /* tp_doc */
4381 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4382 (inquiry)xmlparser_gc_clear, /* tp_clear */
4383 0, /* tp_richcompare */
4384 0, /* tp_weaklistoffset */
4385 0, /* tp_iter */
4386 0, /* tp_iternext */
4387 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004388 xmlparser_members, /* tp_members */
4389 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004390 0, /* tp_base */
4391 0, /* tp_dict */
4392 0, /* tp_descr_get */
4393 0, /* tp_descr_set */
4394 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004395 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004396 PyType_GenericAlloc, /* tp_alloc */
4397 xmlparser_new, /* tp_new */
4398 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004399};
4400
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004401/* ==================================================================== */
4402/* python module interface */
4403
4404static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004405 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004406 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004407 {NULL, NULL}
4408};
4409
Martin v. Löwis1a214512008-06-11 05:26:20 +00004410
Eli Bendersky532d03e2013-08-10 08:00:39 -07004411static struct PyModuleDef elementtreemodule = {
4412 PyModuleDef_HEAD_INIT,
4413 "_elementtree",
4414 NULL,
4415 sizeof(elementtreestate),
4416 _functions,
4417 NULL,
4418 elementtree_traverse,
4419 elementtree_clear,
4420 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004421};
4422
Neal Norwitzf6657e62006-12-28 04:47:50 +00004423PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004424PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004425{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004426 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004427 elementtreestate *st;
4428
4429 m = PyState_FindModule(&elementtreemodule);
4430 if (m) {
4431 Py_INCREF(m);
4432 return m;
4433 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004434
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004435 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004436 if (PyType_Ready(&ElementIter_Type) < 0)
4437 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004438 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004439 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004440 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004441 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004442 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004443 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004444
Eli Bendersky532d03e2013-08-10 08:00:39 -07004445 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004446 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004447 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004448 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004449
Eli Bendersky828efde2012-04-05 05:40:58 +03004450 if (!(temp = PyImport_ImportModule("copy")))
4451 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004452 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004453 Py_XDECREF(temp);
4454
Victor Stinnerb136f112017-07-10 22:28:02 +02004455 if (st->deepcopy_obj == NULL) {
4456 return NULL;
4457 }
4458
4459 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004460 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004461 return NULL;
4462
Eli Bendersky20d41742012-06-01 09:48:37 +03004463 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004464 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4465 if (expat_capi) {
4466 /* check that it's usable */
4467 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004468 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004469 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4470 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004471 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004472 PyErr_SetString(PyExc_ImportError,
4473 "pyexpat version is incompatible");
4474 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004475 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004476 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004477 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004478 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004479
Eli Bendersky532d03e2013-08-10 08:00:39 -07004480 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004481 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004482 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004483 Py_INCREF(st->parseerror_obj);
4484 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004485
Eli Bendersky092af1f2012-03-04 07:14:03 +02004486 Py_INCREF((PyObject *)&Element_Type);
4487 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4488
Eli Bendersky58d548d2012-05-29 15:45:16 +03004489 Py_INCREF((PyObject *)&TreeBuilder_Type);
4490 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4491
Eli Bendersky52467b12012-06-01 07:13:08 +03004492 Py_INCREF((PyObject *)&XMLParser_Type);
4493 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004494
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004495 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004496}