blob: 8119c8b1e2b10fb5ba21eaa97d4f008779fc5ca6 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200672 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200683 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200953 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954 PyObject *instancedict = NULL, *children;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 /* Construct the state object. */
967 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
968 if (noattrib)
969 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975 else
976 instancedict = Py_BuildValue("{sOsOsOsOsO}",
977 PICKLED_TAG, self->tag,
978 PICKLED_CHILDREN, children,
979 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700980 PICKLED_TEXT, JOIN_OBJ(self->text),
981 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800982 if (instancedict) {
983 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 else {
987 for (i = 0; i < PyList_GET_SIZE(children); i++)
988 Py_DECREF(PyList_GET_ITEM(children, i));
989 Py_DECREF(children);
990
991 return NULL;
992 }
993}
994
995static PyObject *
996element_setstate_from_attributes(ElementObject *self,
997 PyObject *tag,
998 PyObject *attrib,
999 PyObject *text,
1000 PyObject *tail,
1001 PyObject *children)
1002{
1003 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
1006 if (!tag) {
1007 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1008 return NULL;
1009 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001011 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
Oren Milman39ecb9c2017-10-10 23:26:24 +03001014 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1015 Py_INCREF(JOIN_OBJ(text));
1016 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
Oren Milman39ecb9c2017-10-10 23:26:24 +03001018 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1019 Py_INCREF(JOIN_OBJ(tail));
1020 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021
1022 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001024 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001025 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026
1027 /* Compute 'nchildren'. */
1028 if (children) {
1029 if (!PyList_Check(children)) {
1030 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1031 return NULL;
1032 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 nchildren = PyList_GET_SIZE(children);
1034
1035 /* (Re-)allocate 'extra'.
1036 Avoid DECREFs calling into this code again (cycles, etc.)
1037 */
1038 oldextra = self->extra;
1039 self->extra = NULL;
1040 if (element_resize(self, nchildren)) {
1041 assert(!self->extra || !self->extra->length);
1042 clear_extra(self);
1043 self->extra = oldextra;
1044 return NULL;
1045 }
1046 assert(self->extra);
1047 assert(self->extra->allocated >= nchildren);
1048 if (oldextra) {
1049 assert(self->extra->attrib == Py_None);
1050 self->extra->attrib = oldextra->attrib;
1051 oldextra->attrib = Py_None;
1052 }
1053
1054 /* Copy children */
1055 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001056 PyObject *child = PyList_GET_ITEM(children, i);
1057 if (!Element_Check(child)) {
1058 raise_type_error(child);
1059 self->extra->length = i;
1060 dealloc_extra(oldextra);
1061 return NULL;
1062 }
1063 Py_INCREF(child);
1064 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 }
1066
1067 assert(!self->extra->length);
1068 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 }
1070 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 if (element_resize(self, 0)) {
1072 return NULL;
1073 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
1075
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076 /* Stash attrib. */
1077 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001079 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001081 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082
1083 Py_RETURN_NONE;
1084}
1085
1086/* __setstate__ for Element instance from the Python implementation.
1087 * 'state' should be the instance dict.
1088 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090static PyObject *
1091element_setstate_from_Python(ElementObject *self, PyObject *state)
1092{
1093 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1094 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1095 PyObject *args;
1096 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 tag = attrib = text = tail = children = NULL;
1100 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001101 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103
1104 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1105 &attrib, &text, &tail, &children))
1106 retval = element_setstate_from_attributes(self, tag, attrib, text,
1107 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001109 retval = NULL;
1110
1111 Py_DECREF(args);
1112 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113}
1114
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115/*[clinic input]
1116_elementtree.Element.__setstate__
1117
1118 state: object
1119 /
1120
1121[clinic start generated code]*/
1122
Eli Bendersky698bdb22013-01-10 06:01:06 -08001123static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001124_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1125/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001126{
1127 if (!PyDict_CheckExact(state)) {
1128 PyErr_Format(PyExc_TypeError,
1129 "Don't know how to unpickle \"%.200R\" as an Element",
1130 state);
1131 return NULL;
1132 }
1133 else
1134 return element_setstate_from_Python(self, state);
1135}
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137LOCAL(int)
1138checkpath(PyObject* tag)
1139{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 Py_ssize_t i;
1141 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142
1143 /* check if a tag contains an xpath character */
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145#define PATHCHAR(ch) \
1146 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1150 void *data = PyUnicode_DATA(tag);
1151 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001152 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1153 PyUnicode_READ(kind, data, 1) == '}' || (
1154 PyUnicode_READ(kind, data, 1) == '*' &&
1155 PyUnicode_READ(kind, data, 2) == '}'))) {
1156 /* wildcard: '{}tag' or '{*}tag' */
1157 return 1;
1158 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001159 for (i = 0; i < len; i++) {
1160 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1161 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 return 1;
1167 }
1168 return 0;
1169 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001170 if (PyBytes_Check(tag)) {
1171 char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001172 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1173 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001174 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001175 /* wildcard: '{}tag' or '{*}tag' */
1176 return 1;
1177 }
1178 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 if (p[i] == '{')
1180 check = 0;
1181 else if (p[i] == '}')
1182 check = 1;
1183 else if (check && PATHCHAR(p[i]))
1184 return 1;
1185 }
1186 return 0;
1187 }
1188
1189 return 1; /* unknown type; might be path expression */
1190}
1191
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192/*[clinic input]
1193_elementtree.Element.extend
1194
1195 elements: object
1196 /
1197
1198[clinic start generated code]*/
1199
1200static PyObject *
1201_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1202/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001203{
1204 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206
Serhiy Storchakacb985562015-05-04 15:32:48 +03001207 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208 if (!seq) {
1209 PyErr_Format(
1210 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001211 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212 );
1213 return NULL;
1214 }
1215
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001216 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001217 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001219 if (element_add_subelement(self, element) < 0) {
1220 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001222 return NULL;
1223 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001224 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001225 }
1226
1227 Py_DECREF(seq);
1228
1229 Py_RETURN_NONE;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.find
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001246 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001247
Serhiy Storchakacb985562015-05-04 15:32:48 +03001248 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001249 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001250 return _PyObject_CallMethodIdObjArgs(
1251 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001253 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 if (!self->extra)
1256 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001257
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001258 for (i = 0; i < self->extra->length; i++) {
1259 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001260 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001261 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001262 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001263 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001264 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 Py_DECREF(item);
1267 if (rc < 0)
1268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 }
1270
1271 Py_RETURN_NONE;
1272}
1273
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274/*[clinic input]
1275_elementtree.Element.findtext
1276
1277 path: object
1278 default: object = None
1279 namespaces: object = None
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1285 PyObject *default_value,
1286 PyObject *namespaces)
1287/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001289 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001290 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001291 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001292
Serhiy Storchakacb985562015-05-04 15:32:48 +03001293 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001294 return _PyObject_CallMethodIdObjArgs(
1295 st->elementpath_obj, &PyId_findtext,
1296 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297 );
1298
1299 if (!self->extra) {
1300 Py_INCREF(default_value);
1301 return default_value;
1302 }
1303
1304 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001305 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001306 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001307 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001308 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001309 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001310 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001311 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001312 if (text == Py_None) {
1313 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001314 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001315 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001316 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001317 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 return text;
1319 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001320 Py_DECREF(item);
1321 if (rc < 0)
1322 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 }
1324
1325 Py_INCREF(default_value);
1326 return default_value;
1327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.findall
1331
1332 path: object
1333 namespaces: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1339 PyObject *namespaces)
1340/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001342 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001344 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001345
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001346 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001347 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001348 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001349 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001351 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352
1353 out = PyList_New(0);
1354 if (!out)
1355 return NULL;
1356
1357 if (!self->extra)
1358 return out;
1359
1360 for (i = 0; i < self->extra->length; i++) {
1361 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001362 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001363 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001364 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001365 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001366 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1367 Py_DECREF(item);
1368 Py_DECREF(out);
1369 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001371 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 }
1373
1374 return out;
1375}
1376
Serhiy Storchakacb985562015-05-04 15:32:48 +03001377/*[clinic input]
1378_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001379
Serhiy Storchakacb985562015-05-04 15:32:48 +03001380 path: object
1381 namespaces: object = None
1382
1383[clinic start generated code]*/
1384
1385static PyObject *
1386_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1387 PyObject *namespaces)
1388/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1389{
1390 PyObject* tag = path;
1391 _Py_IDENTIFIER(iterfind);
1392 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Victor Stinnerf5616342016-12-09 15:26:00 +01001394 return _PyObject_CallMethodIdObjArgs(
1395 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396}
1397
Serhiy Storchakacb985562015-05-04 15:32:48 +03001398/*[clinic input]
1399_elementtree.Element.get
1400
1401 key: object
1402 default: object = None
1403
1404[clinic start generated code]*/
1405
1406static PyObject *
1407_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1408 PyObject *default_value)
1409/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410{
1411 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412
1413 if (!self->extra || self->extra->attrib == Py_None)
1414 value = default_value;
1415 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001416 value = PyDict_GetItemWithError(self->extra->attrib, key);
1417 if (!value) {
1418 if (PyErr_Occurred()) {
1419 return NULL;
1420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001422 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001423 }
1424
1425 Py_INCREF(value);
1426 return value;
1427}
1428
Serhiy Storchakacb985562015-05-04 15:32:48 +03001429/*[clinic input]
1430_elementtree.Element.getchildren
1431
1432[clinic start generated code]*/
1433
1434static PyObject *
1435_elementtree_Element_getchildren_impl(ElementObject *self)
1436/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001437{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001438 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439 PyObject* list;
1440
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001441 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1442 "This method will be removed in future versions. "
1443 "Use 'list(elem)' or iteration over elem instead.",
1444 1) < 0) {
1445 return NULL;
1446 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001447
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448 if (!self->extra)
1449 return PyList_New(0);
1450
1451 list = PyList_New(self->extra->length);
1452 if (!list)
1453 return NULL;
1454
1455 for (i = 0; i < self->extra->length; i++) {
1456 PyObject* item = self->extra->children[i];
1457 Py_INCREF(item);
1458 PyList_SET_ITEM(list, i, item);
1459 }
1460
1461 return list;
1462}
1463
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001464
Eli Bendersky64d11e62012-06-15 07:42:50 +03001465static PyObject *
1466create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1467
1468
Serhiy Storchakacb985562015-05-04 15:32:48 +03001469/*[clinic input]
1470_elementtree.Element.iter
1471
1472 tag: object = None
1473
1474[clinic start generated code]*/
1475
Eli Bendersky64d11e62012-06-15 07:42:50 +03001476static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1478/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001479{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001480 if (PyUnicode_Check(tag)) {
1481 if (PyUnicode_READY(tag) < 0)
1482 return NULL;
1483 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1484 tag = Py_None;
1485 }
1486 else if (PyBytes_Check(tag)) {
1487 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1488 tag = Py_None;
1489 }
1490
Eli Bendersky64d11e62012-06-15 07:42:50 +03001491 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492}
1493
1494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001496_elementtree.Element.getiterator
1497
1498 tag: object = None
1499
1500[clinic start generated code]*/
1501
1502static PyObject *
1503_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1504/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1505{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001506 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001507 "This method will be removed in future versions. "
1508 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1509 1) < 0) {
1510 return NULL;
1511 }
1512 return _elementtree_Element_iter_impl(self, tag);
1513}
1514
1515
1516/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_itertext_impl(ElementObject *self)
1523/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1524{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001525 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526}
1527
Eli Bendersky64d11e62012-06-15 07:42:50 +03001528
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001530element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001531{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001532 ElementObject* self = (ElementObject*) self_;
1533
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534 if (!self->extra || index < 0 || index >= self->extra->length) {
1535 PyErr_SetString(
1536 PyExc_IndexError,
1537 "child index out of range"
1538 );
1539 return NULL;
1540 }
1541
1542 Py_INCREF(self->extra->children[index]);
1543 return self->extra->children[index];
1544}
1545
Serhiy Storchakacb985562015-05-04 15:32:48 +03001546/*[clinic input]
1547_elementtree.Element.insert
1548
1549 index: Py_ssize_t
1550 subelement: object(subclass_of='&Element_Type')
1551 /
1552
1553[clinic start generated code]*/
1554
1555static PyObject *
1556_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1557 PyObject *subelement)
1558/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001560 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561
Victor Stinner5f0af232013-07-11 23:01:36 +02001562 if (!self->extra) {
1563 if (create_extra(self, NULL) < 0)
1564 return NULL;
1565 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 if (index < 0) {
1568 index += self->extra->length;
1569 if (index < 0)
1570 index = 0;
1571 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 if (index > self->extra->length)
1573 index = self->extra->length;
1574
1575 if (element_resize(self, 1) < 0)
1576 return NULL;
1577
1578 for (i = self->extra->length; i > index; i--)
1579 self->extra->children[i] = self->extra->children[i-1];
1580
Serhiy Storchakacb985562015-05-04 15:32:48 +03001581 Py_INCREF(subelement);
1582 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583
1584 self->extra->length++;
1585
1586 Py_RETURN_NONE;
1587}
1588
Serhiy Storchakacb985562015-05-04 15:32:48 +03001589/*[clinic input]
1590_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591
Serhiy Storchakacb985562015-05-04 15:32:48 +03001592[clinic start generated code]*/
1593
1594static PyObject *
1595_elementtree_Element_items_impl(ElementObject *self)
1596/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1597{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 if (!self->extra || self->extra->attrib == Py_None)
1599 return PyList_New(0);
1600
1601 return PyDict_Items(self->extra->attrib);
1602}
1603
Serhiy Storchakacb985562015-05-04 15:32:48 +03001604/*[clinic input]
1605_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606
Serhiy Storchakacb985562015-05-04 15:32:48 +03001607[clinic start generated code]*/
1608
1609static PyObject *
1610_elementtree_Element_keys_impl(ElementObject *self)
1611/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1612{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 if (!self->extra || self->extra->attrib == Py_None)
1614 return PyList_New(0);
1615
1616 return PyDict_Keys(self->extra->attrib);
1617}
1618
Martin v. Löwis18e16552006-02-15 17:27:45 +00001619static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620element_length(ElementObject* self)
1621{
1622 if (!self->extra)
1623 return 0;
1624
1625 return self->extra->length;
1626}
1627
Serhiy Storchakacb985562015-05-04 15:32:48 +03001628/*[clinic input]
1629_elementtree.Element.makeelement
1630
1631 tag: object
1632 attrib: object
1633 /
1634
1635[clinic start generated code]*/
1636
1637static PyObject *
1638_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1639 PyObject *attrib)
1640/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641{
1642 PyObject* elem;
1643
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644 attrib = PyDict_Copy(attrib);
1645 if (!attrib)
1646 return NULL;
1647
Eli Bendersky092af1f2012-03-04 07:14:03 +02001648 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649
1650 Py_DECREF(attrib);
1651
1652 return elem;
1653}
1654
Serhiy Storchakacb985562015-05-04 15:32:48 +03001655/*[clinic input]
1656_elementtree.Element.remove
1657
1658 subelement: object(subclass_of='&Element_Type')
1659 /
1660
1661[clinic start generated code]*/
1662
1663static PyObject *
1664_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1665/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001667 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001668 int rc;
1669 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001670
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671 if (!self->extra) {
1672 /* element has no children, so raise exception */
1673 PyErr_SetString(
1674 PyExc_ValueError,
1675 "list.remove(x): x not in list"
1676 );
1677 return NULL;
1678 }
1679
1680 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001681 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001683 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001684 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001686 if (rc < 0)
1687 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001688 }
1689
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001690 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001691 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692 PyErr_SetString(
1693 PyExc_ValueError,
1694 "list.remove(x): x not in list"
1695 );
1696 return NULL;
1697 }
1698
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001699 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001700
1701 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001702 for (; i < self->extra->length; i++)
1703 self->extra->children[i] = self->extra->children[i+1];
1704
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001705 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706 Py_RETURN_NONE;
1707}
1708
1709static PyObject*
1710element_repr(ElementObject* self)
1711{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001712 int status;
1713
1714 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001715 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001716
1717 status = Py_ReprEnter((PyObject *)self);
1718 if (status == 0) {
1719 PyObject *res;
1720 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1721 Py_ReprLeave((PyObject *)self);
1722 return res;
1723 }
1724 if (status > 0)
1725 PyErr_Format(PyExc_RuntimeError,
1726 "reentrant call inside %s.__repr__",
1727 Py_TYPE(self)->tp_name);
1728 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729}
1730
Serhiy Storchakacb985562015-05-04 15:32:48 +03001731/*[clinic input]
1732_elementtree.Element.set
1733
1734 key: object
1735 value: object
1736 /
1737
1738[clinic start generated code]*/
1739
1740static PyObject *
1741_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1742 PyObject *value)
1743/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744{
1745 PyObject* attrib;
1746
Victor Stinner5f0af232013-07-11 23:01:36 +02001747 if (!self->extra) {
1748 if (create_extra(self, NULL) < 0)
1749 return NULL;
1750 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751
1752 attrib = element_get_attrib(self);
1753 if (!attrib)
1754 return NULL;
1755
1756 if (PyDict_SetItem(attrib, key, value) < 0)
1757 return NULL;
1758
1759 Py_RETURN_NONE;
1760}
1761
1762static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001763element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001765 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001766 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001767 PyObject* old;
1768
1769 if (!self->extra || index < 0 || index >= self->extra->length) {
1770 PyErr_SetString(
1771 PyExc_IndexError,
1772 "child assignment index out of range");
1773 return -1;
1774 }
1775
1776 old = self->extra->children[index];
1777
1778 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001779 if (!Element_Check(item)) {
1780 raise_type_error(item);
1781 return -1;
1782 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783 Py_INCREF(item);
1784 self->extra->children[index] = item;
1785 } else {
1786 self->extra->length--;
1787 for (i = index; i < self->extra->length; i++)
1788 self->extra->children[i] = self->extra->children[i+1];
1789 }
1790
1791 Py_DECREF(old);
1792
1793 return 0;
1794}
1795
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796static PyObject*
1797element_subscr(PyObject* self_, PyObject* item)
1798{
1799 ElementObject* self = (ElementObject*) self_;
1800
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 if (PyIndex_Check(item)) {
1802 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
1804 if (i == -1 && PyErr_Occurred()) {
1805 return NULL;
1806 }
1807 if (i < 0 && self->extra)
1808 i += self->extra->length;
1809 return element_getitem(self_, i);
1810 }
1811 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001812 Py_ssize_t start, stop, step, slicelen, i;
1813 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001814 PyObject* list;
1815
1816 if (!self->extra)
1817 return PyList_New(0);
1818
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001819 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 return NULL;
1821 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001822 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1823 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001824
1825 if (slicelen <= 0)
1826 return PyList_New(0);
1827 else {
1828 list = PyList_New(slicelen);
1829 if (!list)
1830 return NULL;
1831
1832 for (cur = start, i = 0; i < slicelen;
1833 cur += step, i++) {
1834 PyObject* item = self->extra->children[cur];
1835 Py_INCREF(item);
1836 PyList_SET_ITEM(list, i, item);
1837 }
1838
1839 return list;
1840 }
1841 }
1842 else {
1843 PyErr_SetString(PyExc_TypeError,
1844 "element indices must be integers");
1845 return NULL;
1846 }
1847}
1848
1849static int
1850element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1851{
1852 ElementObject* self = (ElementObject*) self_;
1853
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854 if (PyIndex_Check(item)) {
1855 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001856
1857 if (i == -1 && PyErr_Occurred()) {
1858 return -1;
1859 }
1860 if (i < 0 && self->extra)
1861 i += self->extra->length;
1862 return element_setitem(self_, i, value);
1863 }
1864 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001865 Py_ssize_t start, stop, step, slicelen, newlen, i;
1866 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001867
1868 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870
Victor Stinner5f0af232013-07-11 23:01:36 +02001871 if (!self->extra) {
1872 if (create_extra(self, NULL) < 0)
1873 return -1;
1874 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001875
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001876 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001877 return -1;
1878 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001879 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1880 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881
Eli Bendersky865756a2012-03-09 13:38:15 +02001882 if (value == NULL) {
1883 /* Delete slice */
1884 size_t cur;
1885 Py_ssize_t i;
1886
1887 if (slicelen <= 0)
1888 return 0;
1889
1890 /* Since we're deleting, the direction of the range doesn't matter,
1891 * so for simplicity make it always ascending.
1892 */
1893 if (step < 0) {
1894 stop = start + 1;
1895 start = stop + step * (slicelen - 1) - 1;
1896 step = -step;
1897 }
1898
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001899 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001900
1901 /* recycle is a list that will contain all the children
1902 * scheduled for removal.
1903 */
1904 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001905 return -1;
1906 }
1907
1908 /* This loop walks over all the children that have to be deleted,
1909 * with cur pointing at them. num_moved is the amount of children
1910 * until the next deleted child that have to be "shifted down" to
1911 * occupy the deleted's places.
1912 * Note that in the ith iteration, shifting is done i+i places down
1913 * because i children were already removed.
1914 */
1915 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1916 /* Compute how many children have to be moved, clipping at the
1917 * list end.
1918 */
1919 Py_ssize_t num_moved = step - 1;
1920 if (cur + step >= (size_t)self->extra->length) {
1921 num_moved = self->extra->length - cur - 1;
1922 }
1923
1924 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1925
1926 memmove(
1927 self->extra->children + cur - i,
1928 self->extra->children + cur + 1,
1929 num_moved * sizeof(PyObject *));
1930 }
1931
1932 /* Leftover "tail" after the last removed child */
1933 cur = start + (size_t)slicelen * step;
1934 if (cur < (size_t)self->extra->length) {
1935 memmove(
1936 self->extra->children + cur - slicelen,
1937 self->extra->children + cur,
1938 (self->extra->length - cur) * sizeof(PyObject *));
1939 }
1940
1941 self->extra->length -= slicelen;
1942
1943 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001944 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001945 return 0;
1946 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001947
1948 /* A new slice is actually being assigned */
1949 seq = PySequence_Fast(value, "");
1950 if (!seq) {
1951 PyErr_Format(
1952 PyExc_TypeError,
1953 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1954 );
1955 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001956 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001957 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001958
1959 if (step != 1 && newlen != slicelen)
1960 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001961 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001962 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001963 "attempt to assign sequence of size %zd "
1964 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001965 newlen, slicelen
1966 );
1967 return -1;
1968 }
1969
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001970 /* Resize before creating the recycle bin, to prevent refleaks. */
1971 if (newlen > slicelen) {
1972 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001973 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001974 return -1;
1975 }
1976 }
1977
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001978 for (i = 0; i < newlen; i++) {
1979 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1980 if (!Element_Check(element)) {
1981 raise_type_error(element);
1982 Py_DECREF(seq);
1983 return -1;
1984 }
1985 }
1986
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001987 if (slicelen > 0) {
1988 /* to avoid recursive calls to this method (via decref), move
1989 old items to the recycle bin here, and get rid of them when
1990 we're done modifying the element */
1991 recycle = PyList_New(slicelen);
1992 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001993 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001994 return -1;
1995 }
1996 for (cur = start, i = 0; i < slicelen;
1997 cur += step, i++)
1998 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1999 }
2000
2001 if (newlen < slicelen) {
2002 /* delete slice */
2003 for (i = stop; i < self->extra->length; i++)
2004 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2005 } else if (newlen > slicelen) {
2006 /* insert slice */
2007 for (i = self->extra->length-1; i >= stop; i--)
2008 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2009 }
2010
2011 /* replace the slice */
2012 for (cur = start, i = 0; i < newlen;
2013 cur += step, i++) {
2014 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2015 Py_INCREF(element);
2016 self->extra->children[cur] = element;
2017 }
2018
2019 self->extra->length += newlen - slicelen;
2020
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002021 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002022
2023 /* discard the recycle bin, and everything in it */
2024 Py_XDECREF(recycle);
2025
2026 return 0;
2027 }
2028 else {
2029 PyErr_SetString(PyExc_TypeError,
2030 "element indices must be integers");
2031 return -1;
2032 }
2033}
2034
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002035static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002036element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002038 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002039 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002040 return res;
2041}
2042
Serhiy Storchakadde08152015-11-25 15:28:13 +02002043static PyObject*
2044element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002045{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002046 PyObject *res = element_get_text(self);
2047 Py_XINCREF(res);
2048 return res;
2049}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002050
Serhiy Storchakadde08152015-11-25 15:28:13 +02002051static PyObject*
2052element_tail_getter(ElementObject *self, void *closure)
2053{
2054 PyObject *res = element_get_tail(self);
2055 Py_XINCREF(res);
2056 return res;
2057}
2058
2059static PyObject*
2060element_attrib_getter(ElementObject *self, void *closure)
2061{
2062 PyObject *res;
2063 if (!self->extra) {
2064 if (create_extra(self, NULL) < 0)
2065 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002066 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002067 res = element_get_attrib(self);
2068 Py_XINCREF(res);
2069 return res;
2070}
Victor Stinner4d463432013-07-11 23:05:03 +02002071
Serhiy Storchakadde08152015-11-25 15:28:13 +02002072/* macro for setter validation */
2073#define _VALIDATE_ATTR_VALUE(V) \
2074 if ((V) == NULL) { \
2075 PyErr_SetString( \
2076 PyExc_AttributeError, \
2077 "can't delete element attribute"); \
2078 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002079 }
2080
Serhiy Storchakadde08152015-11-25 15:28:13 +02002081static int
2082element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2083{
2084 _VALIDATE_ATTR_VALUE(value);
2085 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002086 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002087 return 0;
2088}
2089
2090static int
2091element_text_setter(ElementObject *self, PyObject *value, void *closure)
2092{
2093 _VALIDATE_ATTR_VALUE(value);
2094 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002095 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002096 return 0;
2097}
2098
2099static int
2100element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2101{
2102 _VALIDATE_ATTR_VALUE(value);
2103 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002104 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002105 return 0;
2106}
2107
2108static int
2109element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2110{
2111 _VALIDATE_ATTR_VALUE(value);
2112 if (!self->extra) {
2113 if (create_extra(self, NULL) < 0)
2114 return -1;
2115 }
2116 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002117 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002118 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002119}
2120
2121static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002122 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002123 0, /* sq_concat */
2124 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002125 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002126 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002127 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002128 0,
2129};
2130
Eli Bendersky64d11e62012-06-15 07:42:50 +03002131/******************************* Element iterator ****************************/
2132
2133/* ElementIterObject represents the iteration state over an XML element in
2134 * pre-order traversal. To keep track of which sub-element should be returned
2135 * next, a stack of parents is maintained. This is a standard stack-based
2136 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002137 * The stack is managed using a continuous array.
2138 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002139 * the current one is exhausted, and the next child to examine in that parent.
2140 */
2141typedef struct ParentLocator_t {
2142 ElementObject *parent;
2143 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144} ParentLocator;
2145
2146typedef struct {
2147 PyObject_HEAD
2148 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002149 Py_ssize_t parent_stack_used;
2150 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002151 ElementObject *root_element;
2152 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002153 int gettext;
2154} ElementIterObject;
2155
2156
2157static void
2158elementiter_dealloc(ElementIterObject *it)
2159{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002160 Py_ssize_t i = it->parent_stack_used;
2161 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002162 /* bpo-31095: UnTrack is needed before calling any callbacks */
2163 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002164 while (i--)
2165 Py_XDECREF(it->parent_stack[i].parent);
2166 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002167
2168 Py_XDECREF(it->sought_tag);
2169 Py_XDECREF(it->root_element);
2170
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171 PyObject_GC_Del(it);
2172}
2173
2174static int
2175elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2176{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002177 Py_ssize_t i = it->parent_stack_used;
2178 while (i--)
2179 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002180
2181 Py_VISIT(it->root_element);
2182 Py_VISIT(it->sought_tag);
2183 return 0;
2184}
2185
2186/* Helper function for elementiter_next. Add a new parent to the parent stack.
2187 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002188static int
2189parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002190{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002191 ParentLocator *item;
2192
2193 if (it->parent_stack_used >= it->parent_stack_size) {
2194 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2195 ParentLocator *parent_stack = it->parent_stack;
2196 PyMem_Resize(parent_stack, ParentLocator, new_size);
2197 if (parent_stack == NULL)
2198 return -1;
2199 it->parent_stack = parent_stack;
2200 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002201 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002202 item = it->parent_stack + it->parent_stack_used++;
2203 Py_INCREF(parent);
2204 item->parent = parent;
2205 item->child_index = 0;
2206 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207}
2208
2209static PyObject *
2210elementiter_next(ElementIterObject *it)
2211{
2212 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002213 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002214 * A short note on gettext: this function serves both the iter() and
2215 * itertext() methods to avoid code duplication. However, there are a few
2216 * small differences in the way these iterations work. Namely:
2217 * - itertext() only yields text from nodes that have it, and continues
2218 * iterating when a node doesn't have text (so it doesn't return any
2219 * node like iter())
2220 * - itertext() also has to handle tail, after finishing with all the
2221 * children of a node.
2222 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002223 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002224 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002225 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002226
2227 while (1) {
2228 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002229 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002230 * iterator is exhausted.
2231 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002232 if (!it->parent_stack_used) {
2233 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002234 PyErr_SetNone(PyExc_StopIteration);
2235 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002236 }
2237
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002238 elem = it->root_element; /* steals a reference */
2239 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002240 }
2241 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002242 /* See if there are children left to traverse in the current parent. If
2243 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002244 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002245 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2246 Py_ssize_t child_index = item->child_index;
2247 ElementObjectExtra *extra;
2248 elem = item->parent;
2249 extra = elem->extra;
2250 if (!extra || child_index >= extra->length) {
2251 it->parent_stack_used--;
2252 /* Note that extra condition on it->parent_stack_used here;
2253 * this is because itertext() is supposed to only return *inner*
2254 * text, not text following the element it began iteration with.
2255 */
2256 if (it->gettext && it->parent_stack_used) {
2257 text = element_get_tail(elem);
2258 goto gettext;
2259 }
2260 Py_DECREF(elem);
2261 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002262 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002263
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002264 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002265 elem = (ElementObject *)extra->children[child_index];
2266 item->child_index++;
2267 Py_INCREF(elem);
2268 }
2269
2270 if (parent_stack_push_new(it, elem) < 0) {
2271 Py_DECREF(elem);
2272 PyErr_NoMemory();
2273 return NULL;
2274 }
2275 if (it->gettext) {
2276 text = element_get_text(elem);
2277 goto gettext;
2278 }
2279
2280 if (it->sought_tag == Py_None)
2281 return (PyObject *)elem;
2282
2283 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2284 if (rc > 0)
2285 return (PyObject *)elem;
2286
2287 Py_DECREF(elem);
2288 if (rc < 0)
2289 return NULL;
2290 continue;
2291
2292gettext:
2293 if (!text) {
2294 Py_DECREF(elem);
2295 return NULL;
2296 }
2297 if (text == Py_None) {
2298 Py_DECREF(elem);
2299 }
2300 else {
2301 Py_INCREF(text);
2302 Py_DECREF(elem);
2303 rc = PyObject_IsTrue(text);
2304 if (rc > 0)
2305 return text;
2306 Py_DECREF(text);
2307 if (rc < 0)
2308 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002309 }
2310 }
2311
2312 return NULL;
2313}
2314
2315
2316static PyTypeObject ElementIter_Type = {
2317 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002318 /* Using the module's name since the pure-Python implementation does not
2319 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002320 "_elementtree._element_iterator", /* tp_name */
2321 sizeof(ElementIterObject), /* tp_basicsize */
2322 0, /* tp_itemsize */
2323 /* methods */
2324 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002325 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002326 0, /* tp_getattr */
2327 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002328 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002329 0, /* tp_repr */
2330 0, /* tp_as_number */
2331 0, /* tp_as_sequence */
2332 0, /* tp_as_mapping */
2333 0, /* tp_hash */
2334 0, /* tp_call */
2335 0, /* tp_str */
2336 0, /* tp_getattro */
2337 0, /* tp_setattro */
2338 0, /* tp_as_buffer */
2339 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2340 0, /* tp_doc */
2341 (traverseproc)elementiter_traverse, /* tp_traverse */
2342 0, /* tp_clear */
2343 0, /* tp_richcompare */
2344 0, /* tp_weaklistoffset */
2345 PyObject_SelfIter, /* tp_iter */
2346 (iternextfunc)elementiter_next, /* tp_iternext */
2347 0, /* tp_methods */
2348 0, /* tp_members */
2349 0, /* tp_getset */
2350 0, /* tp_base */
2351 0, /* tp_dict */
2352 0, /* tp_descr_get */
2353 0, /* tp_descr_set */
2354 0, /* tp_dictoffset */
2355 0, /* tp_init */
2356 0, /* tp_alloc */
2357 0, /* tp_new */
2358};
2359
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002360#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002361
2362static PyObject *
2363create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2364{
2365 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002366
2367 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2368 if (!it)
2369 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002370
Victor Stinner4d463432013-07-11 23:05:03 +02002371 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002372 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002373 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002374 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002375 it->root_element = self;
2376
Eli Bendersky64d11e62012-06-15 07:42:50 +03002377 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002378
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002379 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002380 if (it->parent_stack == NULL) {
2381 Py_DECREF(it);
2382 PyErr_NoMemory();
2383 return NULL;
2384 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002385 it->parent_stack_used = 0;
2386 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002387
Eli Bendersky64d11e62012-06-15 07:42:50 +03002388 return (PyObject *)it;
2389}
2390
2391
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392/* ==================================================================== */
2393/* the tree builder type */
2394
2395typedef struct {
2396 PyObject_HEAD
2397
Eli Bendersky58d548d2012-05-29 15:45:16 +03002398 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399
Antoine Pitrouee329312012-10-04 19:53:29 +02002400 PyObject *this; /* current node */
2401 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402
Eli Bendersky58d548d2012-05-29 15:45:16 +03002403 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404
Eli Bendersky58d548d2012-05-29 15:45:16 +03002405 PyObject *stack; /* element stack */
2406 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407
Eli Bendersky48d358b2012-05-30 17:57:50 +03002408 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002409 PyObject *comment_factory;
2410 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002411
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002412 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002413 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002414 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2415 PyObject *end_event_obj;
2416 PyObject *start_ns_event_obj;
2417 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002418 PyObject *comment_event_obj;
2419 PyObject *pi_event_obj;
2420
2421 char insert_comments;
2422 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423} TreeBuilderObject;
2424
Christian Heimes90aa7642007-12-19 02:45:37 +00002425#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426
2427/* -------------------------------------------------------------------- */
2428/* constructor and destructor */
2429
Eli Bendersky58d548d2012-05-29 15:45:16 +03002430static PyObject *
2431treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002433 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2434 if (t != NULL) {
2435 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436
Eli Bendersky58d548d2012-05-29 15:45:16 +03002437 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002438 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002439 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002440 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441
Eli Bendersky58d548d2012-05-29 15:45:16 +03002442 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002443 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002444 t->comment_factory = NULL;
2445 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002446 t->stack = PyList_New(20);
2447 if (!t->stack) {
2448 Py_DECREF(t->this);
2449 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002450 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002451 return NULL;
2452 }
2453 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002455 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002456 t->start_event_obj = t->end_event_obj = NULL;
2457 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002458 t->comment_event_obj = t->pi_event_obj = NULL;
2459 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002460 }
2461 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462}
2463
Serhiy Storchakacb985562015-05-04 15:32:48 +03002464/*[clinic input]
2465_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002466
Serhiy Storchakacb985562015-05-04 15:32:48 +03002467 element_factory: object = NULL
Stefan Behnel43851a22019-05-01 21:20:38 +02002468 *
2469 comment_factory: object = NULL
2470 pi_factory: object = NULL
2471 insert_comments: bool = False
2472 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002473
2474[clinic start generated code]*/
2475
2476static int
2477_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002478 PyObject *element_factory,
2479 PyObject *comment_factory,
2480 PyObject *pi_factory,
2481 int insert_comments, int insert_pis)
2482/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002483{
Stefan Behnel43851a22019-05-01 21:20:38 +02002484 if (element_factory && element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002485 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002486 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002487 } else {
2488 Py_CLEAR(self->element_factory);
2489 }
2490
2491 if (!comment_factory || comment_factory == Py_None) {
2492 elementtreestate *st = ET_STATE_GLOBAL;
2493 comment_factory = st->comment_factory;
2494 }
2495 if (comment_factory) {
2496 Py_INCREF(comment_factory);
2497 Py_XSETREF(self->comment_factory, comment_factory);
2498 self->insert_comments = insert_comments;
2499 } else {
2500 Py_CLEAR(self->comment_factory);
2501 self->insert_comments = 0;
2502 }
2503
2504 if (!pi_factory || pi_factory == Py_None) {
2505 elementtreestate *st = ET_STATE_GLOBAL;
2506 pi_factory = st->pi_factory;
2507 }
2508 if (pi_factory) {
2509 Py_INCREF(pi_factory);
2510 Py_XSETREF(self->pi_factory, pi_factory);
2511 self->insert_pis = insert_pis;
2512 } else {
2513 Py_CLEAR(self->pi_factory);
2514 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002515 }
2516
Eli Bendersky58d548d2012-05-29 15:45:16 +03002517 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518}
2519
Eli Bendersky48d358b2012-05-30 17:57:50 +03002520static int
2521treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2522{
Stefan Behnel43851a22019-05-01 21:20:38 +02002523 Py_VISIT(self->pi_event_obj);
2524 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002525 Py_VISIT(self->end_ns_event_obj);
2526 Py_VISIT(self->start_ns_event_obj);
2527 Py_VISIT(self->end_event_obj);
2528 Py_VISIT(self->start_event_obj);
2529 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002530 Py_VISIT(self->root);
2531 Py_VISIT(self->this);
2532 Py_VISIT(self->last);
2533 Py_VISIT(self->data);
2534 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002535 Py_VISIT(self->pi_factory);
2536 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002537 Py_VISIT(self->element_factory);
2538 return 0;
2539}
2540
2541static int
2542treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543{
Stefan Behnel43851a22019-05-01 21:20:38 +02002544 Py_CLEAR(self->pi_event_obj);
2545 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002546 Py_CLEAR(self->end_ns_event_obj);
2547 Py_CLEAR(self->start_ns_event_obj);
2548 Py_CLEAR(self->end_event_obj);
2549 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002550 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002551 Py_CLEAR(self->stack);
2552 Py_CLEAR(self->data);
2553 Py_CLEAR(self->last);
2554 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002555 Py_CLEAR(self->pi_factory);
2556 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002557 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002558 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002559 return 0;
2560}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561
Eli Bendersky48d358b2012-05-30 17:57:50 +03002562static void
2563treebuilder_dealloc(TreeBuilderObject *self)
2564{
2565 PyObject_GC_UnTrack(self);
2566 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002567 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568}
2569
2570/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002571/* helpers for handling of arbitrary element-like objects */
2572
Stefan Behnel43851a22019-05-01 21:20:38 +02002573/*[clinic input]
2574_elementtree._set_factories
2575
2576 comment_factory: object
2577 pi_factory: object
2578 /
2579
2580Change the factories used to create comments and processing instructions.
2581
2582For internal use only.
2583[clinic start generated code]*/
2584
2585static PyObject *
2586_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2587 PyObject *pi_factory)
2588/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2589{
2590 elementtreestate *st = ET_STATE_GLOBAL;
2591 PyObject *old;
2592
2593 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2594 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2595 Py_TYPE(comment_factory)->tp_name);
2596 return NULL;
2597 }
2598 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2599 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2600 Py_TYPE(pi_factory)->tp_name);
2601 return NULL;
2602 }
2603
2604 old = PyTuple_Pack(2,
2605 st->comment_factory ? st->comment_factory : Py_None,
2606 st->pi_factory ? st->pi_factory : Py_None);
2607
2608 if (comment_factory == Py_None) {
2609 Py_CLEAR(st->comment_factory);
2610 } else {
2611 Py_INCREF(comment_factory);
2612 Py_XSETREF(st->comment_factory, comment_factory);
2613 }
2614 if (pi_factory == Py_None) {
2615 Py_CLEAR(st->pi_factory);
2616 } else {
2617 Py_INCREF(pi_factory);
2618 Py_XSETREF(st->pi_factory, pi_factory);
2619 }
2620
2621 return old;
2622}
2623
Antoine Pitrouee329312012-10-04 19:53:29 +02002624static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002625treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002626 PyObject **dest, _Py_Identifier *name)
2627{
2628 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002629 PyObject *tmp = JOIN_OBJ(*dest);
2630 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2631 *data = NULL;
2632 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002633 return 0;
2634 }
2635 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002636 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002637 int r;
2638 if (joined == NULL)
2639 return -1;
2640 r = _PyObject_SetAttrId(element, name, joined);
2641 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002642 if (r < 0)
2643 return -1;
2644 Py_CLEAR(*data);
2645 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002646 }
2647}
2648
Serhiy Storchaka576def02017-03-30 09:47:31 +03002649LOCAL(int)
2650treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002651{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002652 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002653
Serhiy Storchaka576def02017-03-30 09:47:31 +03002654 if (!self->data) {
2655 return 0;
2656 }
2657
2658 if (self->this == element) {
2659 _Py_IDENTIFIER(text);
2660 return treebuilder_set_element_text_or_tail(
2661 element, &self->data,
2662 &((ElementObject *) element)->text, &PyId_text);
2663 }
2664 else {
2665 _Py_IDENTIFIER(tail);
2666 return treebuilder_set_element_text_or_tail(
2667 element, &self->data,
2668 &((ElementObject *) element)->tail, &PyId_tail);
2669 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002670}
2671
2672static int
2673treebuilder_add_subelement(PyObject *element, PyObject *child)
2674{
2675 _Py_IDENTIFIER(append);
2676 if (Element_CheckExact(element)) {
2677 ElementObject *elem = (ElementObject *) element;
2678 return element_add_subelement(elem, child);
2679 }
2680 else {
2681 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002682 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002683 if (res == NULL)
2684 return -1;
2685 Py_DECREF(res);
2686 return 0;
2687 }
2688}
2689
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002690LOCAL(int)
2691treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2692 PyObject *node)
2693{
2694 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002695 PyObject *res;
2696 PyObject *event = PyTuple_Pack(2, action, node);
2697 if (event == NULL)
2698 return -1;
Stefan Behnel43851a22019-05-01 21:20:38 +02002699 res = _PyObject_FastCall(self->events_append, &event, 1);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002700 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002701 if (res == NULL)
2702 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002703 Py_DECREF(res);
2704 }
2705 return 0;
2706}
2707
Antoine Pitrouee329312012-10-04 19:53:29 +02002708/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709/* handlers */
2710
2711LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2713 PyObject* attrib)
2714{
2715 PyObject* node;
2716 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002717 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Serhiy Storchaka576def02017-03-30 09:47:31 +03002719 if (treebuilder_flush_data(self) < 0) {
2720 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721 }
2722
Stefan Behnel43851a22019-05-01 21:20:38 +02002723 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002724 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002725 } else if (attrib == Py_None) {
2726 attrib = PyDict_New();
2727 if (!attrib)
2728 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002729 node = PyObject_CallFunctionObjArgs(self->element_factory,
2730 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002731 Py_DECREF(attrib);
2732 }
2733 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002734 node = PyObject_CallFunctionObjArgs(self->element_factory,
2735 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002736 }
2737 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002739 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740
Antoine Pitrouee329312012-10-04 19:53:29 +02002741 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
2743 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002744 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002745 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 } else {
2747 if (self->root) {
2748 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002749 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 "multiple elements on top level"
2751 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002752 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 }
2754 Py_INCREF(node);
2755 self->root = node;
2756 }
2757
2758 if (self->index < PyList_GET_SIZE(self->stack)) {
2759 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002760 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 Py_INCREF(this);
2762 } else {
2763 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002764 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 }
2766 self->index++;
2767
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002769 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002771 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002773 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2774 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775
2776 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002777
2778 error:
2779 Py_DECREF(node);
2780 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781}
2782
2783LOCAL(PyObject*)
2784treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2785{
2786 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002787 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002788 /* ignore calls to data before the first call to start */
2789 Py_RETURN_NONE;
2790 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 /* store the first item as is */
2792 Py_INCREF(data); self->data = data;
2793 } else {
2794 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002795 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2796 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002797 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798 /* expat often generates single character data sections; handle
2799 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002800 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2801 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002803 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804 } else if (PyList_CheckExact(self->data)) {
2805 if (PyList_Append(self->data, data) < 0)
2806 return NULL;
2807 } else {
2808 PyObject* list = PyList_New(2);
2809 if (!list)
2810 return NULL;
2811 PyList_SET_ITEM(list, 0, self->data);
2812 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2813 self->data = list;
2814 }
2815 }
2816
2817 Py_RETURN_NONE;
2818}
2819
2820LOCAL(PyObject*)
2821treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2822{
2823 PyObject* item;
2824
Serhiy Storchaka576def02017-03-30 09:47:31 +03002825 if (treebuilder_flush_data(self) < 0) {
2826 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 }
2828
2829 if (self->index == 0) {
2830 PyErr_SetString(
2831 PyExc_IndexError,
2832 "pop from empty stack"
2833 );
2834 return NULL;
2835 }
2836
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002837 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002838 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002839 self->index--;
2840 self->this = PyList_GET_ITEM(self->stack, self->index);
2841 Py_INCREF(self->this);
2842 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002843
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002844 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2845 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002846
2847 Py_INCREF(self->last);
2848 return (PyObject*) self->last;
2849}
2850
Stefan Behnel43851a22019-05-01 21:20:38 +02002851LOCAL(PyObject*)
2852treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2853{
2854 PyObject* comment = NULL;
2855 PyObject* this;
2856
2857 if (treebuilder_flush_data(self) < 0) {
2858 return NULL;
2859 }
2860
2861 if (self->comment_factory) {
2862 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2863 if (!comment)
2864 return NULL;
2865
2866 this = self->this;
2867 if (self->insert_comments && this != Py_None) {
2868 if (treebuilder_add_subelement(this, comment) < 0)
2869 goto error;
2870 }
2871 } else {
2872 Py_INCREF(text);
2873 comment = text;
2874 }
2875
2876 if (self->events_append && self->comment_event_obj) {
2877 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2878 goto error;
2879 }
2880
2881 return comment;
2882
2883 error:
2884 Py_DECREF(comment);
2885 return NULL;
2886}
2887
2888LOCAL(PyObject*)
2889treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2890{
2891 PyObject* pi = NULL;
2892 PyObject* this;
2893 PyObject* stack[2] = {target, text};
2894
2895 if (treebuilder_flush_data(self) < 0) {
2896 return NULL;
2897 }
2898
2899 if (self->pi_factory) {
2900 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2901 if (!pi) {
2902 return NULL;
2903 }
2904
2905 this = self->this;
2906 if (self->insert_pis && this != Py_None) {
2907 if (treebuilder_add_subelement(this, pi) < 0)
2908 goto error;
2909 }
2910 } else {
2911 pi = PyTuple_Pack(2, target, text);
2912 if (!pi) {
2913 return NULL;
2914 }
2915 }
2916
2917 if (self->events_append && self->pi_event_obj) {
2918 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2919 goto error;
2920 }
2921
2922 return pi;
2923
2924 error:
2925 Py_DECREF(pi);
2926 return NULL;
2927}
2928
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002929LOCAL(PyObject*)
2930treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2931{
2932 PyObject* parcel;
2933
2934 if (self->events_append && self->start_ns_event_obj) {
2935 parcel = PyTuple_Pack(2, prefix, uri);
2936 if (!parcel) {
2937 return NULL;
2938 }
2939
2940 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2941 Py_DECREF(parcel);
2942 return NULL;
2943 }
2944 Py_DECREF(parcel);
2945 }
2946
2947 Py_RETURN_NONE;
2948}
2949
2950LOCAL(PyObject*)
2951treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2952{
2953 if (self->events_append && self->end_ns_event_obj) {
2954 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2955 return NULL;
2956 }
2957 }
2958
2959 Py_RETURN_NONE;
2960}
2961
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962/* -------------------------------------------------------------------- */
2963/* methods (in alphabetical order) */
2964
Serhiy Storchakacb985562015-05-04 15:32:48 +03002965/*[clinic input]
2966_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967
Serhiy Storchakacb985562015-05-04 15:32:48 +03002968 data: object
2969 /
2970
2971[clinic start generated code]*/
2972
2973static PyObject *
2974_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2975/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2976{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 return treebuilder_handle_data(self, data);
2978}
2979
Serhiy Storchakacb985562015-05-04 15:32:48 +03002980/*[clinic input]
2981_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982
Serhiy Storchakacb985562015-05-04 15:32:48 +03002983 tag: object
2984 /
2985
2986[clinic start generated code]*/
2987
2988static PyObject *
2989_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2990/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2991{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992 return treebuilder_handle_end(self, tag);
2993}
2994
Stefan Behnel43851a22019-05-01 21:20:38 +02002995/*[clinic input]
2996_elementtree.TreeBuilder.comment
2997
2998 text: object
2999 /
3000
3001[clinic start generated code]*/
3002
3003static PyObject *
3004_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
3005/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
3006{
3007 return treebuilder_handle_comment(self, text);
3008}
3009
3010/*[clinic input]
3011_elementtree.TreeBuilder.pi
3012
3013 target: object
3014 text: object = None
3015 /
3016
3017[clinic start generated code]*/
3018
3019static PyObject *
3020_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3021 PyObject *text)
3022/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3023{
3024 return treebuilder_handle_pi(self, target, text);
3025}
3026
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027LOCAL(PyObject*)
3028treebuilder_done(TreeBuilderObject* self)
3029{
3030 PyObject* res;
3031
3032 /* FIXME: check stack size? */
3033
3034 if (self->root)
3035 res = self->root;
3036 else
3037 res = Py_None;
3038
3039 Py_INCREF(res);
3040 return res;
3041}
3042
Serhiy Storchakacb985562015-05-04 15:32:48 +03003043/*[clinic input]
3044_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045
Serhiy Storchakacb985562015-05-04 15:32:48 +03003046[clinic start generated code]*/
3047
3048static PyObject *
3049_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3050/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3051{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 return treebuilder_done(self);
3053}
3054
Serhiy Storchakacb985562015-05-04 15:32:48 +03003055/*[clinic input]
3056_elementtree.TreeBuilder.start
3057
3058 tag: object
3059 attrs: object = None
3060 /
3061
3062[clinic start generated code]*/
3063
3064static PyObject *
3065_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3066 PyObject *attrs)
3067/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003069 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070}
3071
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072/* ==================================================================== */
3073/* the expat interface */
3074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003077
3078/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3079 * cached globally without being in per-module state.
3080 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003081static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083
Eli Bendersky52467b12012-06-01 07:13:08 +03003084static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3085 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087typedef struct {
3088 PyObject_HEAD
3089
3090 XML_Parser parser;
3091
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003092 PyObject *target;
3093 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003095 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003097 PyObject *handle_start_ns;
3098 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003099 PyObject *handle_start;
3100 PyObject *handle_data;
3101 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003103 PyObject *handle_comment;
3104 PyObject *handle_pi;
3105 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003107 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109} XMLParserObject;
3110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111/* helpers */
3112
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113LOCAL(PyObject*)
3114makeuniversal(XMLParserObject* self, const char* string)
3115{
3116 /* convert a UTF-8 tag/attribute name from the expat parser
3117 to a universal name string */
3118
Antoine Pitrouc1948842012-10-01 23:40:37 +02003119 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120 PyObject* key;
3121 PyObject* value;
3122
3123 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003124 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003125 if (!key)
3126 return NULL;
3127
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003128 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129
3130 if (value) {
3131 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003132 }
3133 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 /* new name. convert to universal name, and decode as
3135 necessary */
3136
3137 PyObject* tag;
3138 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003139 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140
3141 /* look for namespace separator */
3142 for (i = 0; i < size; i++)
3143 if (string[i] == '}')
3144 break;
3145 if (i != size) {
3146 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003147 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003148 if (tag == NULL) {
3149 Py_DECREF(key);
3150 return NULL;
3151 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003152 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 p[0] = '{';
3154 memcpy(p+1, string, size);
3155 size++;
3156 } else {
3157 /* plain name; use key as tag */
3158 Py_INCREF(key);
3159 tag = key;
3160 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003163 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003164 value = PyUnicode_DecodeUTF8(p, size, "strict");
3165 Py_DECREF(tag);
3166 if (!value) {
3167 Py_DECREF(key);
3168 return NULL;
3169 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170
3171 /* add to names dictionary */
3172 if (PyDict_SetItem(self->names, key, value) < 0) {
3173 Py_DECREF(key);
3174 Py_DECREF(value);
3175 return NULL;
3176 }
3177 }
3178
3179 Py_DECREF(key);
3180 return value;
3181}
3182
Eli Bendersky5b77d812012-03-16 08:20:05 +02003183/* Set the ParseError exception with the given parameters.
3184 * If message is not NULL, it's used as the error string. Otherwise, the
3185 * message string is the default for the given error_code.
3186*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003187static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003188expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3189 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003190{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003191 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003192 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003193
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003194 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003195 message ? message : EXPAT(ErrorString)(error_code),
3196 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003197 if (errmsg == NULL)
3198 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003199
Stefan Behnel43851a22019-05-01 21:20:38 +02003200 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003201 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003202 if (!error)
3203 return;
3204
Eli Bendersky5b77d812012-03-16 08:20:05 +02003205 /* Add code and position attributes */
3206 code = PyLong_FromLong((long)error_code);
3207 if (!code) {
3208 Py_DECREF(error);
3209 return;
3210 }
3211 if (PyObject_SetAttrString(error, "code", code) == -1) {
3212 Py_DECREF(error);
3213 Py_DECREF(code);
3214 return;
3215 }
3216 Py_DECREF(code);
3217
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003218 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003219 if (!position) {
3220 Py_DECREF(error);
3221 return;
3222 }
3223 if (PyObject_SetAttrString(error, "position", position) == -1) {
3224 Py_DECREF(error);
3225 Py_DECREF(position);
3226 return;
3227 }
3228 Py_DECREF(position);
3229
Eli Bendersky532d03e2013-08-10 08:00:39 -07003230 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003231 Py_DECREF(error);
3232}
3233
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234/* -------------------------------------------------------------------- */
3235/* handlers */
3236
3237static void
3238expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3239 int data_len)
3240{
3241 PyObject* key;
3242 PyObject* value;
3243 PyObject* res;
3244
3245 if (data_len < 2 || data_in[0] != '&')
3246 return;
3247
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003248 if (PyErr_Occurred())
3249 return;
3250
Neal Norwitz0269b912007-08-08 06:56:02 +00003251 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 if (!key)
3253 return;
3254
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003255 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256
3257 if (value) {
3258 if (TreeBuilder_CheckExact(self->target))
3259 res = treebuilder_handle_data(
3260 (TreeBuilderObject*) self->target, value
3261 );
3262 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003263 res = _PyObject_FastCall(self->handle_data, &value, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 else
3265 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003267 } else if (!PyErr_Occurred()) {
3268 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003269 char message[128] = "undefined entity ";
3270 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003271 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003272 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003274 EXPAT(GetErrorColumnNumber)(self->parser),
3275 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 );
3277 }
3278
3279 Py_DECREF(key);
3280}
3281
3282static void
3283expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3284 const XML_Char **attrib_in)
3285{
3286 PyObject* res;
3287 PyObject* tag;
3288 PyObject* attrib;
3289 int ok;
3290
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003291 if (PyErr_Occurred())
3292 return;
3293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 /* tag name */
3295 tag = makeuniversal(self, tag_in);
3296 if (!tag)
3297 return; /* parser will look for errors */
3298
3299 /* attributes */
3300 if (attrib_in[0]) {
3301 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003302 if (!attrib) {
3303 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003305 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 while (attrib_in[0] && attrib_in[1]) {
3307 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003308 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309 if (!key || !value) {
3310 Py_XDECREF(value);
3311 Py_XDECREF(key);
3312 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003313 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 return;
3315 }
3316 ok = PyDict_SetItem(attrib, key, value);
3317 Py_DECREF(value);
3318 Py_DECREF(key);
3319 if (ok < 0) {
3320 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003321 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322 return;
3323 }
3324 attrib_in += 2;
3325 }
3326 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003327 Py_INCREF(Py_None);
3328 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003329 }
3330
3331 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332 /* shortcut */
3333 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3334 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003335 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003336 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003337 if (attrib == Py_None) {
3338 Py_DECREF(attrib);
3339 attrib = PyDict_New();
3340 if (!attrib) {
3341 Py_DECREF(tag);
3342 return;
3343 }
3344 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003345 res = PyObject_CallFunctionObjArgs(self->handle_start,
3346 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003347 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348 res = NULL;
3349
3350 Py_DECREF(tag);
3351 Py_DECREF(attrib);
3352
3353 Py_XDECREF(res);
3354}
3355
3356static void
3357expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3358 int data_len)
3359{
3360 PyObject* data;
3361 PyObject* res;
3362
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003363 if (PyErr_Occurred())
3364 return;
3365
Neal Norwitz0269b912007-08-08 06:56:02 +00003366 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003367 if (!data)
3368 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369
3370 if (TreeBuilder_CheckExact(self->target))
3371 /* shortcut */
3372 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3373 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003374 res = _PyObject_FastCall(self->handle_data, &data, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375 else
3376 res = NULL;
3377
3378 Py_DECREF(data);
3379
3380 Py_XDECREF(res);
3381}
3382
3383static void
3384expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3385{
3386 PyObject* tag;
3387 PyObject* res = NULL;
3388
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003389 if (PyErr_Occurred())
3390 return;
3391
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 if (TreeBuilder_CheckExact(self->target))
3393 /* shortcut */
3394 /* the standard tree builder doesn't look at the end tag */
3395 res = treebuilder_handle_end(
3396 (TreeBuilderObject*) self->target, Py_None
3397 );
3398 else if (self->handle_end) {
3399 tag = makeuniversal(self, tag_in);
3400 if (tag) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003401 res = _PyObject_FastCall(self->handle_end, &tag, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 Py_DECREF(tag);
3403 }
3404 }
3405
3406 Py_XDECREF(res);
3407}
3408
3409static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003410expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3411 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003412{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003413 PyObject* res = NULL;
3414 PyObject* uri;
3415 PyObject* prefix;
3416 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003417
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003418 if (PyErr_Occurred())
3419 return;
3420
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003421 if (!uri_in)
3422 uri_in = "";
3423 if (!prefix_in)
3424 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003425
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003426 if (TreeBuilder_CheckExact(self->target)) {
3427 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3428 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003429
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003430 if (target->events_append && target->start_ns_event_obj) {
3431 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3432 if (!prefix)
3433 return;
3434 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3435 if (!uri) {
3436 Py_DECREF(prefix);
3437 return;
3438 }
3439
3440 res = treebuilder_handle_start_ns(target, prefix, uri);
3441 Py_DECREF(uri);
3442 Py_DECREF(prefix);
3443 }
3444 } else if (self->handle_start_ns) {
3445 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3446 if (!prefix)
3447 return;
3448 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3449 if (!uri) {
3450 Py_DECREF(prefix);
3451 return;
3452 }
3453
3454 stack[0] = prefix;
3455 stack[1] = uri;
3456 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3457 Py_DECREF(uri);
3458 Py_DECREF(prefix);
3459 }
3460
3461 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462}
3463
3464static void
3465expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3466{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003467 PyObject *res = NULL;
3468 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003469
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003470 if (PyErr_Occurred())
3471 return;
3472
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003473 if (!prefix_in)
3474 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003475
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003476 if (TreeBuilder_CheckExact(self->target)) {
3477 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3478 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3479
3480 if (target->events_append && target->end_ns_event_obj) {
3481 res = treebuilder_handle_end_ns(target, Py_None);
3482 }
3483 } else if (self->handle_end_ns) {
3484 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3485 if (!prefix)
3486 return;
3487
3488 res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3489 Py_DECREF(prefix);
3490 }
3491
3492 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003493}
3494
3495static void
3496expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3497{
Stefan Behnel43851a22019-05-01 21:20:38 +02003498 PyObject* comment = NULL;
3499 PyObject* res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003500
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003501 if (PyErr_Occurred())
3502 return;
3503
Stefan Behnel43851a22019-05-01 21:20:38 +02003504 if (TreeBuilder_CheckExact(self->target)) {
3505 /* shortcut */
3506 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3507
Neal Norwitz0269b912007-08-08 06:56:02 +00003508 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003509 if (!comment)
3510 return; /* parser will look for errors */
3511
3512 res = treebuilder_handle_comment(target, comment);
3513 } else if (self->handle_comment) {
3514 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3515 if (!comment)
3516 return;
3517
3518 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003519 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003520
3521 Py_XDECREF(res);
3522 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523}
3524
Eli Bendersky45839902013-01-13 05:14:47 -08003525static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003526expat_start_doctype_handler(XMLParserObject *self,
3527 const XML_Char *doctype_name,
3528 const XML_Char *sysid,
3529 const XML_Char *pubid,
3530 int has_internal_subset)
3531{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003532 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003533 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003534 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003535
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003536 if (PyErr_Occurred())
3537 return;
3538
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003539 doctype_name_obj = makeuniversal(self, doctype_name);
3540 if (!doctype_name_obj)
3541 return;
3542
3543 if (sysid) {
3544 sysid_obj = makeuniversal(self, sysid);
3545 if (!sysid_obj) {
3546 Py_DECREF(doctype_name_obj);
3547 return;
3548 }
3549 } else {
3550 Py_INCREF(Py_None);
3551 sysid_obj = Py_None;
3552 }
3553
3554 if (pubid) {
3555 pubid_obj = makeuniversal(self, pubid);
3556 if (!pubid_obj) {
3557 Py_DECREF(doctype_name_obj);
3558 Py_DECREF(sysid_obj);
3559 return;
3560 }
3561 } else {
3562 Py_INCREF(Py_None);
3563 pubid_obj = Py_None;
3564 }
3565
3566 /* If the target has a handler for doctype, call it. */
3567 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003568 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3569 doctype_name_obj, pubid_obj,
3570 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003571 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003572 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003573 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3574 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3575 "The doctype() method of XMLParser is ignored. "
3576 "Define doctype() method on the TreeBuilder target.",
3577 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003578 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003579 }
3580
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003581 Py_DECREF(doctype_name_obj);
3582 Py_DECREF(pubid_obj);
3583 Py_DECREF(sysid_obj);
3584}
3585
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003586static void
3587expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3588 const XML_Char* data_in)
3589{
Stefan Behnel43851a22019-05-01 21:20:38 +02003590 PyObject* pi_target = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 PyObject* data;
3592 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003593 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003595 if (PyErr_Occurred())
3596 return;
3597
Stefan Behnel43851a22019-05-01 21:20:38 +02003598 if (TreeBuilder_CheckExact(self->target)) {
3599 /* shortcut */
3600 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3601
3602 if (target->events_append && target->pi_event_obj) {
3603 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3604 if (!pi_target)
3605 goto error;
3606 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3607 if (!data)
3608 goto error;
3609 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610 Py_XDECREF(res);
3611 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003612 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003614 } else if (self->handle_pi) {
3615 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3616 if (!pi_target)
3617 goto error;
3618 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3619 if (!data)
3620 goto error;
3621
3622 stack[0] = pi_target;
3623 stack[1] = data;
3624 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3625 Py_XDECREF(res);
3626 Py_DECREF(data);
3627 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003628 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003629
3630 return;
3631
3632 error:
3633 Py_XDECREF(pi_target);
3634 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635}
3636
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638
Eli Bendersky52467b12012-06-01 07:13:08 +03003639static PyObject *
3640xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641{
Eli Bendersky52467b12012-06-01 07:13:08 +03003642 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3643 if (self) {
3644 self->parser = NULL;
3645 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003646 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003647 self->handle_start = self->handle_data = self->handle_end = NULL;
3648 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003649 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003650 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003651 return (PyObject *)self;
3652}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653
scoderc8d8e152017-09-14 22:00:03 +02003654static int
3655ignore_attribute_error(PyObject *value)
3656{
3657 if (value == NULL) {
3658 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3659 return -1;
3660 }
3661 PyErr_Clear();
3662 }
3663 return 0;
3664}
3665
Serhiy Storchakacb985562015-05-04 15:32:48 +03003666/*[clinic input]
3667_elementtree.XMLParser.__init__
3668
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003669 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003670 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003671 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003672
3673[clinic start generated code]*/
3674
Eli Bendersky52467b12012-06-01 07:13:08 +03003675static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003676_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3677 const char *encoding)
3678/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003679{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003680 self->entity = PyDict_New();
3681 if (!self->entity)
3682 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683
Serhiy Storchakacb985562015-05-04 15:32:48 +03003684 self->names = PyDict_New();
3685 if (!self->names) {
3686 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003687 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003689
Serhiy Storchakacb985562015-05-04 15:32:48 +03003690 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3691 if (!self->parser) {
3692 Py_CLEAR(self->entity);
3693 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003695 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003697 /* expat < 2.1.0 has no XML_SetHashSalt() */
3698 if (EXPAT(SetHashSalt) != NULL) {
3699 EXPAT(SetHashSalt)(self->parser,
3700 (unsigned long)_Py_HashSecret.expat.hashsalt);
3701 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003702
Eli Bendersky52467b12012-06-01 07:13:08 +03003703 if (target) {
3704 Py_INCREF(target);
3705 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003706 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003707 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708 Py_CLEAR(self->entity);
3709 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003710 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003711 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003712 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003713 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003714
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003715 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3716 if (ignore_attribute_error(self->handle_start_ns)) {
3717 return -1;
3718 }
3719 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3720 if (ignore_attribute_error(self->handle_end_ns)) {
3721 return -1;
3722 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003723 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003724 if (ignore_attribute_error(self->handle_start)) {
3725 return -1;
3726 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003727 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003728 if (ignore_attribute_error(self->handle_data)) {
3729 return -1;
3730 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003731 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003732 if (ignore_attribute_error(self->handle_end)) {
3733 return -1;
3734 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003735 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003736 if (ignore_attribute_error(self->handle_comment)) {
3737 return -1;
3738 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003739 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003740 if (ignore_attribute_error(self->handle_pi)) {
3741 return -1;
3742 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003743 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003744 if (ignore_attribute_error(self->handle_close)) {
3745 return -1;
3746 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003747 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003748 if (ignore_attribute_error(self->handle_doctype)) {
3749 return -1;
3750 }
Eli Bendersky45839902013-01-13 05:14:47 -08003751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003752 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003753 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003754 if (self->handle_start_ns || self->handle_end_ns)
3755 EXPAT(SetNamespaceDeclHandler)(
3756 self->parser,
3757 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3758 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3759 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003760 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003761 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762 (XML_StartElementHandler) expat_start_handler,
3763 (XML_EndElementHandler) expat_end_handler
3764 );
3765 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003766 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003767 (XML_DefaultHandler) expat_default_handler
3768 );
3769 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003770 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003771 (XML_CharacterDataHandler) expat_data_handler
3772 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003773 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003774 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003775 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003776 (XML_CommentHandler) expat_comment_handler
3777 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003778 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003779 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003780 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003781 (XML_ProcessingInstructionHandler) expat_pi_handler
3782 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003783 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003784 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003785 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3786 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003787 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003788 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003789 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003790 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003791
Eli Bendersky52467b12012-06-01 07:13:08 +03003792 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003793}
3794
Eli Bendersky52467b12012-06-01 07:13:08 +03003795static int
3796xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3797{
3798 Py_VISIT(self->handle_close);
3799 Py_VISIT(self->handle_pi);
3800 Py_VISIT(self->handle_comment);
3801 Py_VISIT(self->handle_end);
3802 Py_VISIT(self->handle_data);
3803 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003804 Py_VISIT(self->handle_start_ns);
3805 Py_VISIT(self->handle_end_ns);
3806 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003807
3808 Py_VISIT(self->target);
3809 Py_VISIT(self->entity);
3810 Py_VISIT(self->names);
3811
3812 return 0;
3813}
3814
3815static int
3816xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003817{
Victor Stinnere727d412017-09-18 05:29:37 -07003818 if (self->parser != NULL) {
3819 XML_Parser parser = self->parser;
3820 self->parser = NULL;
3821 EXPAT(ParserFree)(parser);
3822 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003823
Antoine Pitrouc1948842012-10-01 23:40:37 +02003824 Py_CLEAR(self->handle_close);
3825 Py_CLEAR(self->handle_pi);
3826 Py_CLEAR(self->handle_comment);
3827 Py_CLEAR(self->handle_end);
3828 Py_CLEAR(self->handle_data);
3829 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003830 Py_CLEAR(self->handle_start_ns);
3831 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003832 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003833
Antoine Pitrouc1948842012-10-01 23:40:37 +02003834 Py_CLEAR(self->target);
3835 Py_CLEAR(self->entity);
3836 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003837
Eli Bendersky52467b12012-06-01 07:13:08 +03003838 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003839}
3840
Eli Bendersky52467b12012-06-01 07:13:08 +03003841static void
3842xmlparser_dealloc(XMLParserObject* self)
3843{
3844 PyObject_GC_UnTrack(self);
3845 xmlparser_gc_clear(self);
3846 Py_TYPE(self)->tp_free((PyObject *)self);
3847}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003848
3849LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003850expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003851{
3852 int ok;
3853
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003854 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003855 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3856
3857 if (PyErr_Occurred())
3858 return NULL;
3859
3860 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003861 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003862 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003863 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003864 EXPAT(GetErrorColumnNumber)(self->parser),
3865 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003866 );
3867 return NULL;
3868 }
3869
3870 Py_RETURN_NONE;
3871}
3872
Serhiy Storchakacb985562015-05-04 15:32:48 +03003873/*[clinic input]
3874_elementtree.XMLParser.close
3875
3876[clinic start generated code]*/
3877
3878static PyObject *
3879_elementtree_XMLParser_close_impl(XMLParserObject *self)
3880/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003881{
3882 /* end feeding data to parser */
3883
3884 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003885 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003886 if (!res)
3887 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003888
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003889 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003890 Py_DECREF(res);
3891 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003892 }
3893 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003894 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003895 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003896 }
3897 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003898 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003899 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003900}
3901
Serhiy Storchakacb985562015-05-04 15:32:48 +03003902/*[clinic input]
3903_elementtree.XMLParser.feed
3904
3905 data: object
3906 /
3907
3908[clinic start generated code]*/
3909
3910static PyObject *
3911_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3912/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003913{
3914 /* feed data to parser */
3915
Serhiy Storchakacb985562015-05-04 15:32:48 +03003916 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003917 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003918 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3919 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003920 return NULL;
3921 if (data_len > INT_MAX) {
3922 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3923 return NULL;
3924 }
3925 /* Explicitly set UTF-8 encoding. Return code ignored. */
3926 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003927 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003928 }
3929 else {
3930 Py_buffer view;
3931 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003932 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003933 return NULL;
3934 if (view.len > INT_MAX) {
3935 PyBuffer_Release(&view);
3936 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3937 return NULL;
3938 }
3939 res = expat_parse(self, view.buf, (int)view.len, 0);
3940 PyBuffer_Release(&view);
3941 return res;
3942 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003943}
3944
Serhiy Storchakacb985562015-05-04 15:32:48 +03003945/*[clinic input]
3946_elementtree.XMLParser._parse_whole
3947
3948 file: object
3949 /
3950
3951[clinic start generated code]*/
3952
3953static PyObject *
3954_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3955/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003956{
Eli Benderskya3699232013-05-19 18:47:23 -07003957 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003958 PyObject* reader;
3959 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003960 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003961 PyObject* res;
3962
Serhiy Storchakacb985562015-05-04 15:32:48 +03003963 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003964 if (!reader)
3965 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003967 /* read from open file object */
3968 for (;;) {
3969
3970 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3971
3972 if (!buffer) {
3973 /* read failed (e.g. due to KeyboardInterrupt) */
3974 Py_DECREF(reader);
3975 return NULL;
3976 }
3977
Eli Benderskyf996e772012-03-16 05:53:30 +02003978 if (PyUnicode_CheckExact(buffer)) {
3979 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003980 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003981 Py_DECREF(buffer);
3982 break;
3983 }
3984 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003985 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003986 if (!temp) {
3987 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003988 Py_DECREF(reader);
3989 return NULL;
3990 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003991 buffer = temp;
3992 }
3993 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003994 Py_DECREF(buffer);
3995 break;
3996 }
3997
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003998 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3999 Py_DECREF(buffer);
4000 Py_DECREF(reader);
4001 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
4002 return NULL;
4003 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004004 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02004005 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004006 );
4007
4008 Py_DECREF(buffer);
4009
4010 if (!res) {
4011 Py_DECREF(reader);
4012 return NULL;
4013 }
4014 Py_DECREF(res);
4015
4016 }
4017
4018 Py_DECREF(reader);
4019
4020 res = expat_parse(self, "", 0, 1);
4021
4022 if (res && TreeBuilder_CheckExact(self->target)) {
4023 Py_DECREF(res);
4024 return treebuilder_done((TreeBuilderObject*) self->target);
4025 }
4026
4027 return res;
4028}
4029
Serhiy Storchakacb985562015-05-04 15:32:48 +03004030/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004031_elementtree.XMLParser._setevents
4032
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004033 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004034 events_to_report: object = None
4035 /
4036
4037[clinic start generated code]*/
4038
4039static PyObject *
4040_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4041 PyObject *events_queue,
4042 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004043/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004044{
4045 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004046 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004047 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004048 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004049
4050 if (!TreeBuilder_CheckExact(self->target)) {
4051 PyErr_SetString(
4052 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004053 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004054 "targets"
4055 );
4056 return NULL;
4057 }
4058
4059 target = (TreeBuilderObject*) self->target;
4060
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004061 events_append = PyObject_GetAttrString(events_queue, "append");
4062 if (events_append == NULL)
4063 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004064 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004065
4066 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004067 Py_CLEAR(target->start_event_obj);
4068 Py_CLEAR(target->end_event_obj);
4069 Py_CLEAR(target->start_ns_event_obj);
4070 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004071 Py_CLEAR(target->comment_event_obj);
4072 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004073
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004074 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004075 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004076 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004077 Py_RETURN_NONE;
4078 }
4079
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004080 if (!(events_seq = PySequence_Fast(events_to_report,
4081 "events must be a sequence"))) {
4082 return NULL;
4083 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004084
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004085 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004086 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004087 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004088 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004089 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004090 } else if (PyBytes_Check(event_name_obj)) {
4091 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004092 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004093 if (event_name == NULL) {
4094 Py_DECREF(events_seq);
4095 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4096 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004097 }
4098
4099 Py_INCREF(event_name_obj);
4100 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004101 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004102 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004103 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004104 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004105 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004106 EXPAT(SetNamespaceDeclHandler)(
4107 self->parser,
4108 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4109 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4110 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004111 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004112 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004113 EXPAT(SetNamespaceDeclHandler)(
4114 self->parser,
4115 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4116 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4117 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004118 } else if (strcmp(event_name, "comment") == 0) {
4119 Py_XSETREF(target->comment_event_obj, event_name_obj);
4120 EXPAT(SetCommentHandler)(
4121 self->parser,
4122 (XML_CommentHandler) expat_comment_handler
4123 );
4124 } else if (strcmp(event_name, "pi") == 0) {
4125 Py_XSETREF(target->pi_event_obj, event_name_obj);
4126 EXPAT(SetProcessingInstructionHandler)(
4127 self->parser,
4128 (XML_ProcessingInstructionHandler) expat_pi_handler
4129 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004130 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004131 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004132 Py_DECREF(events_seq);
4133 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004134 return NULL;
4135 }
4136 }
4137
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004138 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004139 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004140}
4141
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004142static PyMemberDef xmlparser_members[] = {
4143 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4144 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4145 {NULL}
4146};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004147
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004148static PyObject*
4149xmlparser_version_getter(XMLParserObject *self, void *closure)
4150{
4151 return PyUnicode_FromFormat(
4152 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4153 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004154}
4155
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004156static PyGetSetDef xmlparser_getsetlist[] = {
4157 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4158 {NULL},
4159};
4160
Serhiy Storchakacb985562015-05-04 15:32:48 +03004161#include "clinic/_elementtree.c.h"
4162
4163static PyMethodDef element_methods[] = {
4164
4165 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4166
4167 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4168 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4169
4170 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4171 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4172 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4173
4174 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4175 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4176 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4177 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4178
4179 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4180 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4181 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4182
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004183 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004184 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4185
4186 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4187 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4188
4189 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4190
4191 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4192 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4193 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4194 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4195 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4196
4197 {NULL, NULL}
4198};
4199
4200static PyMappingMethods element_as_mapping = {
4201 (lenfunc) element_length,
4202 (binaryfunc) element_subscr,
4203 (objobjargproc) element_ass_subscr,
4204};
4205
Serhiy Storchakadde08152015-11-25 15:28:13 +02004206static PyGetSetDef element_getsetlist[] = {
4207 {"tag",
4208 (getter)element_tag_getter,
4209 (setter)element_tag_setter,
4210 "A string identifying what kind of data this element represents"},
4211 {"text",
4212 (getter)element_text_getter,
4213 (setter)element_text_setter,
4214 "A string of text directly after the start tag, or None"},
4215 {"tail",
4216 (getter)element_tail_getter,
4217 (setter)element_tail_setter,
4218 "A string of text directly after the end tag, or None"},
4219 {"attrib",
4220 (getter)element_attrib_getter,
4221 (setter)element_attrib_setter,
4222 "A dictionary containing the element's attributes"},
4223 {NULL},
4224};
4225
Serhiy Storchakacb985562015-05-04 15:32:48 +03004226static PyTypeObject Element_Type = {
4227 PyVarObject_HEAD_INIT(NULL, 0)
4228 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4229 /* methods */
4230 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004231 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004232 0, /* tp_getattr */
4233 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004234 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004235 (reprfunc)element_repr, /* tp_repr */
4236 0, /* tp_as_number */
4237 &element_as_sequence, /* tp_as_sequence */
4238 &element_as_mapping, /* tp_as_mapping */
4239 0, /* tp_hash */
4240 0, /* tp_call */
4241 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004242 PyObject_GenericGetAttr, /* tp_getattro */
4243 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004244 0, /* tp_as_buffer */
4245 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4246 /* tp_flags */
4247 0, /* tp_doc */
4248 (traverseproc)element_gc_traverse, /* tp_traverse */
4249 (inquiry)element_gc_clear, /* tp_clear */
4250 0, /* tp_richcompare */
4251 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4252 0, /* tp_iter */
4253 0, /* tp_iternext */
4254 element_methods, /* tp_methods */
4255 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004256 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004257 0, /* tp_base */
4258 0, /* tp_dict */
4259 0, /* tp_descr_get */
4260 0, /* tp_descr_set */
4261 0, /* tp_dictoffset */
4262 (initproc)element_init, /* tp_init */
4263 PyType_GenericAlloc, /* tp_alloc */
4264 element_new, /* tp_new */
4265 0, /* tp_free */
4266};
4267
4268static PyMethodDef treebuilder_methods[] = {
4269 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4270 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4271 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004272 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4273 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004274 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4275 {NULL, NULL}
4276};
4277
4278static PyTypeObject TreeBuilder_Type = {
4279 PyVarObject_HEAD_INIT(NULL, 0)
4280 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4281 /* methods */
4282 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004283 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004284 0, /* tp_getattr */
4285 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004286 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004287 0, /* tp_repr */
4288 0, /* tp_as_number */
4289 0, /* tp_as_sequence */
4290 0, /* tp_as_mapping */
4291 0, /* tp_hash */
4292 0, /* tp_call */
4293 0, /* tp_str */
4294 0, /* tp_getattro */
4295 0, /* tp_setattro */
4296 0, /* tp_as_buffer */
4297 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4298 /* tp_flags */
4299 0, /* tp_doc */
4300 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4301 (inquiry)treebuilder_gc_clear, /* tp_clear */
4302 0, /* tp_richcompare */
4303 0, /* tp_weaklistoffset */
4304 0, /* tp_iter */
4305 0, /* tp_iternext */
4306 treebuilder_methods, /* tp_methods */
4307 0, /* tp_members */
4308 0, /* tp_getset */
4309 0, /* tp_base */
4310 0, /* tp_dict */
4311 0, /* tp_descr_get */
4312 0, /* tp_descr_set */
4313 0, /* tp_dictoffset */
4314 _elementtree_TreeBuilder___init__, /* tp_init */
4315 PyType_GenericAlloc, /* tp_alloc */
4316 treebuilder_new, /* tp_new */
4317 0, /* tp_free */
4318};
4319
4320static PyMethodDef xmlparser_methods[] = {
4321 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4322 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4323 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4324 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004325 {NULL, NULL}
4326};
4327
Neal Norwitz227b5332006-03-22 09:28:35 +00004328static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004329 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004330 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004331 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004332 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004333 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004334 0, /* tp_getattr */
4335 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004336 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004337 0, /* tp_repr */
4338 0, /* tp_as_number */
4339 0, /* tp_as_sequence */
4340 0, /* tp_as_mapping */
4341 0, /* tp_hash */
4342 0, /* tp_call */
4343 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004344 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004345 0, /* tp_setattro */
4346 0, /* tp_as_buffer */
4347 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4348 /* tp_flags */
4349 0, /* tp_doc */
4350 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4351 (inquiry)xmlparser_gc_clear, /* tp_clear */
4352 0, /* tp_richcompare */
4353 0, /* tp_weaklistoffset */
4354 0, /* tp_iter */
4355 0, /* tp_iternext */
4356 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004357 xmlparser_members, /* tp_members */
4358 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004359 0, /* tp_base */
4360 0, /* tp_dict */
4361 0, /* tp_descr_get */
4362 0, /* tp_descr_set */
4363 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004364 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004365 PyType_GenericAlloc, /* tp_alloc */
4366 xmlparser_new, /* tp_new */
4367 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004368};
4369
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004370/* ==================================================================== */
4371/* python module interface */
4372
4373static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004374 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004375 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004376 {NULL, NULL}
4377};
4378
Martin v. Löwis1a214512008-06-11 05:26:20 +00004379
Eli Bendersky532d03e2013-08-10 08:00:39 -07004380static struct PyModuleDef elementtreemodule = {
4381 PyModuleDef_HEAD_INIT,
4382 "_elementtree",
4383 NULL,
4384 sizeof(elementtreestate),
4385 _functions,
4386 NULL,
4387 elementtree_traverse,
4388 elementtree_clear,
4389 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004390};
4391
Neal Norwitzf6657e62006-12-28 04:47:50 +00004392PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004393PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004394{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004395 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004396 elementtreestate *st;
4397
4398 m = PyState_FindModule(&elementtreemodule);
4399 if (m) {
4400 Py_INCREF(m);
4401 return m;
4402 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004403
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004404 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004405 if (PyType_Ready(&ElementIter_Type) < 0)
4406 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004407 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004408 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004409 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004410 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004411 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004412 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004413
Eli Bendersky532d03e2013-08-10 08:00:39 -07004414 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004415 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004416 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004417 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004418
Eli Bendersky828efde2012-04-05 05:40:58 +03004419 if (!(temp = PyImport_ImportModule("copy")))
4420 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004421 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004422 Py_XDECREF(temp);
4423
Victor Stinnerb136f112017-07-10 22:28:02 +02004424 if (st->deepcopy_obj == NULL) {
4425 return NULL;
4426 }
4427
4428 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004429 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004430 return NULL;
4431
Eli Bendersky20d41742012-06-01 09:48:37 +03004432 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004433 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4434 if (expat_capi) {
4435 /* check that it's usable */
4436 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004437 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004438 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4439 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004440 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004441 PyErr_SetString(PyExc_ImportError,
4442 "pyexpat version is incompatible");
4443 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004444 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004445 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004446 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004447 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004448
Eli Bendersky532d03e2013-08-10 08:00:39 -07004449 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004450 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004451 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004452 Py_INCREF(st->parseerror_obj);
4453 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004454
Eli Bendersky092af1f2012-03-04 07:14:03 +02004455 Py_INCREF((PyObject *)&Element_Type);
4456 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4457
Eli Bendersky58d548d2012-05-29 15:45:16 +03004458 Py_INCREF((PyObject *)&TreeBuilder_Type);
4459 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4460
Eli Bendersky52467b12012-06-01 07:13:08 +03004461 Py_INCREF((PyObject *)&XMLParser_Type);
4462 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004463
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004464 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004465}