blob: e9a0ea21b292fae49f417166774a8055d89af1a7 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200672 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200683 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200953 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954 PyObject *instancedict = NULL, *children;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 /* Construct the state object. */
967 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
968 if (noattrib)
969 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975 else
976 instancedict = Py_BuildValue("{sOsOsOsOsO}",
977 PICKLED_TAG, self->tag,
978 PICKLED_CHILDREN, children,
979 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700980 PICKLED_TEXT, JOIN_OBJ(self->text),
981 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800982 if (instancedict) {
983 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 else {
987 for (i = 0; i < PyList_GET_SIZE(children); i++)
988 Py_DECREF(PyList_GET_ITEM(children, i));
989 Py_DECREF(children);
990
991 return NULL;
992 }
993}
994
995static PyObject *
996element_setstate_from_attributes(ElementObject *self,
997 PyObject *tag,
998 PyObject *attrib,
999 PyObject *text,
1000 PyObject *tail,
1001 PyObject *children)
1002{
1003 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
1006 if (!tag) {
1007 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1008 return NULL;
1009 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001011 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
Oren Milman39ecb9c2017-10-10 23:26:24 +03001014 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1015 Py_INCREF(JOIN_OBJ(text));
1016 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
Oren Milman39ecb9c2017-10-10 23:26:24 +03001018 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1019 Py_INCREF(JOIN_OBJ(tail));
1020 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021
1022 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001024 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001025 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026
1027 /* Compute 'nchildren'. */
1028 if (children) {
1029 if (!PyList_Check(children)) {
1030 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1031 return NULL;
1032 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 nchildren = PyList_GET_SIZE(children);
1034
1035 /* (Re-)allocate 'extra'.
1036 Avoid DECREFs calling into this code again (cycles, etc.)
1037 */
1038 oldextra = self->extra;
1039 self->extra = NULL;
1040 if (element_resize(self, nchildren)) {
1041 assert(!self->extra || !self->extra->length);
1042 clear_extra(self);
1043 self->extra = oldextra;
1044 return NULL;
1045 }
1046 assert(self->extra);
1047 assert(self->extra->allocated >= nchildren);
1048 if (oldextra) {
1049 assert(self->extra->attrib == Py_None);
1050 self->extra->attrib = oldextra->attrib;
1051 oldextra->attrib = Py_None;
1052 }
1053
1054 /* Copy children */
1055 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001056 PyObject *child = PyList_GET_ITEM(children, i);
1057 if (!Element_Check(child)) {
1058 raise_type_error(child);
1059 self->extra->length = i;
1060 dealloc_extra(oldextra);
1061 return NULL;
1062 }
1063 Py_INCREF(child);
1064 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 }
1066
1067 assert(!self->extra->length);
1068 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 }
1070 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 if (element_resize(self, 0)) {
1072 return NULL;
1073 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
1075
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076 /* Stash attrib. */
1077 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001079 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001081 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082
1083 Py_RETURN_NONE;
1084}
1085
1086/* __setstate__ for Element instance from the Python implementation.
1087 * 'state' should be the instance dict.
1088 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090static PyObject *
1091element_setstate_from_Python(ElementObject *self, PyObject *state)
1092{
1093 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1094 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1095 PyObject *args;
1096 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 tag = attrib = text = tail = children = NULL;
1100 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001101 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103
1104 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1105 &attrib, &text, &tail, &children))
1106 retval = element_setstate_from_attributes(self, tag, attrib, text,
1107 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001109 retval = NULL;
1110
1111 Py_DECREF(args);
1112 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113}
1114
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115/*[clinic input]
1116_elementtree.Element.__setstate__
1117
1118 state: object
1119 /
1120
1121[clinic start generated code]*/
1122
Eli Bendersky698bdb22013-01-10 06:01:06 -08001123static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001124_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1125/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001126{
1127 if (!PyDict_CheckExact(state)) {
1128 PyErr_Format(PyExc_TypeError,
1129 "Don't know how to unpickle \"%.200R\" as an Element",
1130 state);
1131 return NULL;
1132 }
1133 else
1134 return element_setstate_from_Python(self, state);
1135}
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137LOCAL(int)
1138checkpath(PyObject* tag)
1139{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 Py_ssize_t i;
1141 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142
1143 /* check if a tag contains an xpath character */
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145#define PATHCHAR(ch) \
1146 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1150 void *data = PyUnicode_DATA(tag);
1151 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001152 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1153 PyUnicode_READ(kind, data, 1) == '}' || (
1154 PyUnicode_READ(kind, data, 1) == '*' &&
1155 PyUnicode_READ(kind, data, 2) == '}'))) {
1156 /* wildcard: '{}tag' or '{*}tag' */
1157 return 1;
1158 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001159 for (i = 0; i < len; i++) {
1160 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1161 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001162 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 return 1;
1167 }
1168 return 0;
1169 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001170 if (PyBytes_Check(tag)) {
1171 char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001172 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1173 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001174 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001175 /* wildcard: '{}tag' or '{*}tag' */
1176 return 1;
1177 }
1178 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 if (p[i] == '{')
1180 check = 0;
1181 else if (p[i] == '}')
1182 check = 1;
1183 else if (check && PATHCHAR(p[i]))
1184 return 1;
1185 }
1186 return 0;
1187 }
1188
1189 return 1; /* unknown type; might be path expression */
1190}
1191
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192/*[clinic input]
1193_elementtree.Element.extend
1194
1195 elements: object
1196 /
1197
1198[clinic start generated code]*/
1199
1200static PyObject *
1201_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1202/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001203{
1204 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206
Serhiy Storchakacb985562015-05-04 15:32:48 +03001207 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001208 if (!seq) {
1209 PyErr_Format(
1210 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001211 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212 );
1213 return NULL;
1214 }
1215
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001216 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001217 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001219 if (element_add_subelement(self, element) < 0) {
1220 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001222 return NULL;
1223 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001224 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001225 }
1226
1227 Py_DECREF(seq);
1228
1229 Py_RETURN_NONE;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.find
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001246 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001247
Serhiy Storchakacb985562015-05-04 15:32:48 +03001248 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001249 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001250 return _PyObject_CallMethodIdObjArgs(
1251 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001253 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 if (!self->extra)
1256 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001257
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001258 for (i = 0; i < self->extra->length; i++) {
1259 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001260 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001261 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001262 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001263 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001264 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 Py_DECREF(item);
1267 if (rc < 0)
1268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 }
1270
1271 Py_RETURN_NONE;
1272}
1273
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274/*[clinic input]
1275_elementtree.Element.findtext
1276
1277 path: object
1278 default: object = None
1279 namespaces: object = None
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1285 PyObject *default_value,
1286 PyObject *namespaces)
1287/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001289 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001290 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001291 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001292
Serhiy Storchakacb985562015-05-04 15:32:48 +03001293 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001294 return _PyObject_CallMethodIdObjArgs(
1295 st->elementpath_obj, &PyId_findtext,
1296 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297 );
1298
1299 if (!self->extra) {
1300 Py_INCREF(default_value);
1301 return default_value;
1302 }
1303
1304 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001305 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001306 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001307 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001308 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001309 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001310 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001311 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001312 if (text == Py_None) {
1313 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001314 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001315 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001316 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001317 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 return text;
1319 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001320 Py_DECREF(item);
1321 if (rc < 0)
1322 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 }
1324
1325 Py_INCREF(default_value);
1326 return default_value;
1327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.findall
1331
1332 path: object
1333 namespaces: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1339 PyObject *namespaces)
1340/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001342 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001344 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001345
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001346 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001347 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001348 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001349 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001351 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352
1353 out = PyList_New(0);
1354 if (!out)
1355 return NULL;
1356
1357 if (!self->extra)
1358 return out;
1359
1360 for (i = 0; i < self->extra->length; i++) {
1361 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001362 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001363 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001364 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001365 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001366 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1367 Py_DECREF(item);
1368 Py_DECREF(out);
1369 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001371 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 }
1373
1374 return out;
1375}
1376
Serhiy Storchakacb985562015-05-04 15:32:48 +03001377/*[clinic input]
1378_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001379
Serhiy Storchakacb985562015-05-04 15:32:48 +03001380 path: object
1381 namespaces: object = None
1382
1383[clinic start generated code]*/
1384
1385static PyObject *
1386_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1387 PyObject *namespaces)
1388/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1389{
1390 PyObject* tag = path;
1391 _Py_IDENTIFIER(iterfind);
1392 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Victor Stinnerf5616342016-12-09 15:26:00 +01001394 return _PyObject_CallMethodIdObjArgs(
1395 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001396}
1397
Serhiy Storchakacb985562015-05-04 15:32:48 +03001398/*[clinic input]
1399_elementtree.Element.get
1400
1401 key: object
1402 default: object = None
1403
1404[clinic start generated code]*/
1405
1406static PyObject *
1407_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1408 PyObject *default_value)
1409/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410{
1411 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412
1413 if (!self->extra || self->extra->attrib == Py_None)
1414 value = default_value;
1415 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001416 value = PyDict_GetItemWithError(self->extra->attrib, key);
1417 if (!value) {
1418 if (PyErr_Occurred()) {
1419 return NULL;
1420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001422 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001423 }
1424
1425 Py_INCREF(value);
1426 return value;
1427}
1428
Serhiy Storchakacb985562015-05-04 15:32:48 +03001429/*[clinic input]
1430_elementtree.Element.getchildren
1431
1432[clinic start generated code]*/
1433
1434static PyObject *
1435_elementtree_Element_getchildren_impl(ElementObject *self)
1436/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001437{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001438 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439 PyObject* list;
1440
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001441 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1442 "This method will be removed in future versions. "
1443 "Use 'list(elem)' or iteration over elem instead.",
1444 1) < 0) {
1445 return NULL;
1446 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001447
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448 if (!self->extra)
1449 return PyList_New(0);
1450
1451 list = PyList_New(self->extra->length);
1452 if (!list)
1453 return NULL;
1454
1455 for (i = 0; i < self->extra->length; i++) {
1456 PyObject* item = self->extra->children[i];
1457 Py_INCREF(item);
1458 PyList_SET_ITEM(list, i, item);
1459 }
1460
1461 return list;
1462}
1463
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001464
Eli Bendersky64d11e62012-06-15 07:42:50 +03001465static PyObject *
1466create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1467
1468
Serhiy Storchakacb985562015-05-04 15:32:48 +03001469/*[clinic input]
1470_elementtree.Element.iter
1471
1472 tag: object = None
1473
1474[clinic start generated code]*/
1475
Eli Bendersky64d11e62012-06-15 07:42:50 +03001476static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1478/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001479{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001480 if (PyUnicode_Check(tag)) {
1481 if (PyUnicode_READY(tag) < 0)
1482 return NULL;
1483 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1484 tag = Py_None;
1485 }
1486 else if (PyBytes_Check(tag)) {
1487 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1488 tag = Py_None;
1489 }
1490
Eli Bendersky64d11e62012-06-15 07:42:50 +03001491 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492}
1493
1494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001496_elementtree.Element.getiterator
1497
1498 tag: object = None
1499
1500[clinic start generated code]*/
1501
1502static PyObject *
1503_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1504/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1505{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001506 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001507 "This method will be removed in future versions. "
1508 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1509 1) < 0) {
1510 return NULL;
1511 }
1512 return _elementtree_Element_iter_impl(self, tag);
1513}
1514
1515
1516/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_itertext_impl(ElementObject *self)
1523/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1524{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001525 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526}
1527
Eli Bendersky64d11e62012-06-15 07:42:50 +03001528
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001530element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001531{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001532 ElementObject* self = (ElementObject*) self_;
1533
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534 if (!self->extra || index < 0 || index >= self->extra->length) {
1535 PyErr_SetString(
1536 PyExc_IndexError,
1537 "child index out of range"
1538 );
1539 return NULL;
1540 }
1541
1542 Py_INCREF(self->extra->children[index]);
1543 return self->extra->children[index];
1544}
1545
Serhiy Storchakacb985562015-05-04 15:32:48 +03001546/*[clinic input]
1547_elementtree.Element.insert
1548
1549 index: Py_ssize_t
1550 subelement: object(subclass_of='&Element_Type')
1551 /
1552
1553[clinic start generated code]*/
1554
1555static PyObject *
1556_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1557 PyObject *subelement)
1558/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001560 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561
Victor Stinner5f0af232013-07-11 23:01:36 +02001562 if (!self->extra) {
1563 if (create_extra(self, NULL) < 0)
1564 return NULL;
1565 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001567 if (index < 0) {
1568 index += self->extra->length;
1569 if (index < 0)
1570 index = 0;
1571 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 if (index > self->extra->length)
1573 index = self->extra->length;
1574
1575 if (element_resize(self, 1) < 0)
1576 return NULL;
1577
1578 for (i = self->extra->length; i > index; i--)
1579 self->extra->children[i] = self->extra->children[i-1];
1580
Serhiy Storchakacb985562015-05-04 15:32:48 +03001581 Py_INCREF(subelement);
1582 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583
1584 self->extra->length++;
1585
1586 Py_RETURN_NONE;
1587}
1588
Serhiy Storchakacb985562015-05-04 15:32:48 +03001589/*[clinic input]
1590_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591
Serhiy Storchakacb985562015-05-04 15:32:48 +03001592[clinic start generated code]*/
1593
1594static PyObject *
1595_elementtree_Element_items_impl(ElementObject *self)
1596/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1597{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 if (!self->extra || self->extra->attrib == Py_None)
1599 return PyList_New(0);
1600
1601 return PyDict_Items(self->extra->attrib);
1602}
1603
Serhiy Storchakacb985562015-05-04 15:32:48 +03001604/*[clinic input]
1605_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606
Serhiy Storchakacb985562015-05-04 15:32:48 +03001607[clinic start generated code]*/
1608
1609static PyObject *
1610_elementtree_Element_keys_impl(ElementObject *self)
1611/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1612{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 if (!self->extra || self->extra->attrib == Py_None)
1614 return PyList_New(0);
1615
1616 return PyDict_Keys(self->extra->attrib);
1617}
1618
Martin v. Löwis18e16552006-02-15 17:27:45 +00001619static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620element_length(ElementObject* self)
1621{
1622 if (!self->extra)
1623 return 0;
1624
1625 return self->extra->length;
1626}
1627
Serhiy Storchakacb985562015-05-04 15:32:48 +03001628/*[clinic input]
1629_elementtree.Element.makeelement
1630
1631 tag: object
1632 attrib: object
1633 /
1634
1635[clinic start generated code]*/
1636
1637static PyObject *
1638_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1639 PyObject *attrib)
1640/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641{
1642 PyObject* elem;
1643
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644 attrib = PyDict_Copy(attrib);
1645 if (!attrib)
1646 return NULL;
1647
Eli Bendersky092af1f2012-03-04 07:14:03 +02001648 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649
1650 Py_DECREF(attrib);
1651
1652 return elem;
1653}
1654
Serhiy Storchakacb985562015-05-04 15:32:48 +03001655/*[clinic input]
1656_elementtree.Element.remove
1657
1658 subelement: object(subclass_of='&Element_Type')
1659 /
1660
1661[clinic start generated code]*/
1662
1663static PyObject *
1664_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1665/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001667 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001668 int rc;
1669 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001670
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671 if (!self->extra) {
1672 /* element has no children, so raise exception */
1673 PyErr_SetString(
1674 PyExc_ValueError,
1675 "list.remove(x): x not in list"
1676 );
1677 return NULL;
1678 }
1679
1680 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001681 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001683 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001684 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001685 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001686 if (rc < 0)
1687 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001688 }
1689
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001690 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001691 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692 PyErr_SetString(
1693 PyExc_ValueError,
1694 "list.remove(x): x not in list"
1695 );
1696 return NULL;
1697 }
1698
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001699 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001700
1701 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001702 for (; i < self->extra->length; i++)
1703 self->extra->children[i] = self->extra->children[i+1];
1704
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001705 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001706 Py_RETURN_NONE;
1707}
1708
1709static PyObject*
1710element_repr(ElementObject* self)
1711{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001712 int status;
1713
1714 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001715 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001716
1717 status = Py_ReprEnter((PyObject *)self);
1718 if (status == 0) {
1719 PyObject *res;
1720 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1721 Py_ReprLeave((PyObject *)self);
1722 return res;
1723 }
1724 if (status > 0)
1725 PyErr_Format(PyExc_RuntimeError,
1726 "reentrant call inside %s.__repr__",
1727 Py_TYPE(self)->tp_name);
1728 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001729}
1730
Serhiy Storchakacb985562015-05-04 15:32:48 +03001731/*[clinic input]
1732_elementtree.Element.set
1733
1734 key: object
1735 value: object
1736 /
1737
1738[clinic start generated code]*/
1739
1740static PyObject *
1741_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1742 PyObject *value)
1743/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001744{
1745 PyObject* attrib;
1746
Victor Stinner5f0af232013-07-11 23:01:36 +02001747 if (!self->extra) {
1748 if (create_extra(self, NULL) < 0)
1749 return NULL;
1750 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751
1752 attrib = element_get_attrib(self);
1753 if (!attrib)
1754 return NULL;
1755
1756 if (PyDict_SetItem(attrib, key, value) < 0)
1757 return NULL;
1758
1759 Py_RETURN_NONE;
1760}
1761
1762static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001763element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001764{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001765 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001766 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001767 PyObject* old;
1768
1769 if (!self->extra || index < 0 || index >= self->extra->length) {
1770 PyErr_SetString(
1771 PyExc_IndexError,
1772 "child assignment index out of range");
1773 return -1;
1774 }
1775
1776 old = self->extra->children[index];
1777
1778 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001779 if (!Element_Check(item)) {
1780 raise_type_error(item);
1781 return -1;
1782 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001783 Py_INCREF(item);
1784 self->extra->children[index] = item;
1785 } else {
1786 self->extra->length--;
1787 for (i = index; i < self->extra->length; i++)
1788 self->extra->children[i] = self->extra->children[i+1];
1789 }
1790
1791 Py_DECREF(old);
1792
1793 return 0;
1794}
1795
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796static PyObject*
1797element_subscr(PyObject* self_, PyObject* item)
1798{
1799 ElementObject* self = (ElementObject*) self_;
1800
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 if (PyIndex_Check(item)) {
1802 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
1804 if (i == -1 && PyErr_Occurred()) {
1805 return NULL;
1806 }
1807 if (i < 0 && self->extra)
1808 i += self->extra->length;
1809 return element_getitem(self_, i);
1810 }
1811 else if (PySlice_Check(item)) {
1812 Py_ssize_t start, stop, step, slicelen, cur, i;
1813 PyObject* list;
1814
1815 if (!self->extra)
1816 return PyList_New(0);
1817
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001818 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001819 return NULL;
1820 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001821 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1822 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001823
1824 if (slicelen <= 0)
1825 return PyList_New(0);
1826 else {
1827 list = PyList_New(slicelen);
1828 if (!list)
1829 return NULL;
1830
1831 for (cur = start, i = 0; i < slicelen;
1832 cur += step, i++) {
1833 PyObject* item = self->extra->children[cur];
1834 Py_INCREF(item);
1835 PyList_SET_ITEM(list, i, item);
1836 }
1837
1838 return list;
1839 }
1840 }
1841 else {
1842 PyErr_SetString(PyExc_TypeError,
1843 "element indices must be integers");
1844 return NULL;
1845 }
1846}
1847
1848static int
1849element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1850{
1851 ElementObject* self = (ElementObject*) self_;
1852
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853 if (PyIndex_Check(item)) {
1854 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
1856 if (i == -1 && PyErr_Occurred()) {
1857 return -1;
1858 }
1859 if (i < 0 && self->extra)
1860 i += self->extra->length;
1861 return element_setitem(self_, i, value);
1862 }
1863 else if (PySlice_Check(item)) {
1864 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1865
1866 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001867 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001868
Victor Stinner5f0af232013-07-11 23:01:36 +02001869 if (!self->extra) {
1870 if (create_extra(self, NULL) < 0)
1871 return -1;
1872 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001873
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001874 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001875 return -1;
1876 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001877 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1878 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001879
Eli Bendersky865756a2012-03-09 13:38:15 +02001880 if (value == NULL) {
1881 /* Delete slice */
1882 size_t cur;
1883 Py_ssize_t i;
1884
1885 if (slicelen <= 0)
1886 return 0;
1887
1888 /* Since we're deleting, the direction of the range doesn't matter,
1889 * so for simplicity make it always ascending.
1890 */
1891 if (step < 0) {
1892 stop = start + 1;
1893 start = stop + step * (slicelen - 1) - 1;
1894 step = -step;
1895 }
1896
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001897 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001898
1899 /* recycle is a list that will contain all the children
1900 * scheduled for removal.
1901 */
1902 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001903 return -1;
1904 }
1905
1906 /* This loop walks over all the children that have to be deleted,
1907 * with cur pointing at them. num_moved is the amount of children
1908 * until the next deleted child that have to be "shifted down" to
1909 * occupy the deleted's places.
1910 * Note that in the ith iteration, shifting is done i+i places down
1911 * because i children were already removed.
1912 */
1913 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1914 /* Compute how many children have to be moved, clipping at the
1915 * list end.
1916 */
1917 Py_ssize_t num_moved = step - 1;
1918 if (cur + step >= (size_t)self->extra->length) {
1919 num_moved = self->extra->length - cur - 1;
1920 }
1921
1922 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1923
1924 memmove(
1925 self->extra->children + cur - i,
1926 self->extra->children + cur + 1,
1927 num_moved * sizeof(PyObject *));
1928 }
1929
1930 /* Leftover "tail" after the last removed child */
1931 cur = start + (size_t)slicelen * step;
1932 if (cur < (size_t)self->extra->length) {
1933 memmove(
1934 self->extra->children + cur - slicelen,
1935 self->extra->children + cur,
1936 (self->extra->length - cur) * sizeof(PyObject *));
1937 }
1938
1939 self->extra->length -= slicelen;
1940
1941 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001942 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001943 return 0;
1944 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001945
1946 /* A new slice is actually being assigned */
1947 seq = PySequence_Fast(value, "");
1948 if (!seq) {
1949 PyErr_Format(
1950 PyExc_TypeError,
1951 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1952 );
1953 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001954 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001955 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001956
1957 if (step != 1 && newlen != slicelen)
1958 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001959 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001960 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001961 "attempt to assign sequence of size %zd "
1962 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001963 newlen, slicelen
1964 );
1965 return -1;
1966 }
1967
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001968 /* Resize before creating the recycle bin, to prevent refleaks. */
1969 if (newlen > slicelen) {
1970 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001971 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001972 return -1;
1973 }
1974 }
1975
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001976 for (i = 0; i < newlen; i++) {
1977 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1978 if (!Element_Check(element)) {
1979 raise_type_error(element);
1980 Py_DECREF(seq);
1981 return -1;
1982 }
1983 }
1984
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001985 if (slicelen > 0) {
1986 /* to avoid recursive calls to this method (via decref), move
1987 old items to the recycle bin here, and get rid of them when
1988 we're done modifying the element */
1989 recycle = PyList_New(slicelen);
1990 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001991 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001992 return -1;
1993 }
1994 for (cur = start, i = 0; i < slicelen;
1995 cur += step, i++)
1996 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1997 }
1998
1999 if (newlen < slicelen) {
2000 /* delete slice */
2001 for (i = stop; i < self->extra->length; i++)
2002 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2003 } else if (newlen > slicelen) {
2004 /* insert slice */
2005 for (i = self->extra->length-1; i >= stop; i--)
2006 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2007 }
2008
2009 /* replace the slice */
2010 for (cur = start, i = 0; i < newlen;
2011 cur += step, i++) {
2012 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2013 Py_INCREF(element);
2014 self->extra->children[cur] = element;
2015 }
2016
2017 self->extra->length += newlen - slicelen;
2018
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002019 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002020
2021 /* discard the recycle bin, and everything in it */
2022 Py_XDECREF(recycle);
2023
2024 return 0;
2025 }
2026 else {
2027 PyErr_SetString(PyExc_TypeError,
2028 "element indices must be integers");
2029 return -1;
2030 }
2031}
2032
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002033static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002034element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002035{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002036 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002037 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002038 return res;
2039}
2040
Serhiy Storchakadde08152015-11-25 15:28:13 +02002041static PyObject*
2042element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002043{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002044 PyObject *res = element_get_text(self);
2045 Py_XINCREF(res);
2046 return res;
2047}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002048
Serhiy Storchakadde08152015-11-25 15:28:13 +02002049static PyObject*
2050element_tail_getter(ElementObject *self, void *closure)
2051{
2052 PyObject *res = element_get_tail(self);
2053 Py_XINCREF(res);
2054 return res;
2055}
2056
2057static PyObject*
2058element_attrib_getter(ElementObject *self, void *closure)
2059{
2060 PyObject *res;
2061 if (!self->extra) {
2062 if (create_extra(self, NULL) < 0)
2063 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002064 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002065 res = element_get_attrib(self);
2066 Py_XINCREF(res);
2067 return res;
2068}
Victor Stinner4d463432013-07-11 23:05:03 +02002069
Serhiy Storchakadde08152015-11-25 15:28:13 +02002070/* macro for setter validation */
2071#define _VALIDATE_ATTR_VALUE(V) \
2072 if ((V) == NULL) { \
2073 PyErr_SetString( \
2074 PyExc_AttributeError, \
2075 "can't delete element attribute"); \
2076 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002077 }
2078
Serhiy Storchakadde08152015-11-25 15:28:13 +02002079static int
2080element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2081{
2082 _VALIDATE_ATTR_VALUE(value);
2083 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002084 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002085 return 0;
2086}
2087
2088static int
2089element_text_setter(ElementObject *self, PyObject *value, void *closure)
2090{
2091 _VALIDATE_ATTR_VALUE(value);
2092 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002093 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002094 return 0;
2095}
2096
2097static int
2098element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2099{
2100 _VALIDATE_ATTR_VALUE(value);
2101 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002102 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002103 return 0;
2104}
2105
2106static int
2107element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2108{
2109 _VALIDATE_ATTR_VALUE(value);
2110 if (!self->extra) {
2111 if (create_extra(self, NULL) < 0)
2112 return -1;
2113 }
2114 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002115 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002116 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002117}
2118
2119static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002120 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002121 0, /* sq_concat */
2122 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002123 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002124 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002125 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002126 0,
2127};
2128
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129/******************************* Element iterator ****************************/
2130
2131/* ElementIterObject represents the iteration state over an XML element in
2132 * pre-order traversal. To keep track of which sub-element should be returned
2133 * next, a stack of parents is maintained. This is a standard stack-based
2134 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002135 * The stack is managed using a continuous array.
2136 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002137 * the current one is exhausted, and the next child to examine in that parent.
2138 */
2139typedef struct ParentLocator_t {
2140 ElementObject *parent;
2141 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142} ParentLocator;
2143
2144typedef struct {
2145 PyObject_HEAD
2146 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002147 Py_ssize_t parent_stack_used;
2148 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 ElementObject *root_element;
2150 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002151 int gettext;
2152} ElementIterObject;
2153
2154
2155static void
2156elementiter_dealloc(ElementIterObject *it)
2157{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002158 Py_ssize_t i = it->parent_stack_used;
2159 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002160 /* bpo-31095: UnTrack is needed before calling any callbacks */
2161 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002162 while (i--)
2163 Py_XDECREF(it->parent_stack[i].parent);
2164 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002165
2166 Py_XDECREF(it->sought_tag);
2167 Py_XDECREF(it->root_element);
2168
Eli Bendersky64d11e62012-06-15 07:42:50 +03002169 PyObject_GC_Del(it);
2170}
2171
2172static int
2173elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2174{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002175 Py_ssize_t i = it->parent_stack_used;
2176 while (i--)
2177 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002178
2179 Py_VISIT(it->root_element);
2180 Py_VISIT(it->sought_tag);
2181 return 0;
2182}
2183
2184/* Helper function for elementiter_next. Add a new parent to the parent stack.
2185 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002186static int
2187parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002189 ParentLocator *item;
2190
2191 if (it->parent_stack_used >= it->parent_stack_size) {
2192 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2193 ParentLocator *parent_stack = it->parent_stack;
2194 PyMem_Resize(parent_stack, ParentLocator, new_size);
2195 if (parent_stack == NULL)
2196 return -1;
2197 it->parent_stack = parent_stack;
2198 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002199 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002200 item = it->parent_stack + it->parent_stack_used++;
2201 Py_INCREF(parent);
2202 item->parent = parent;
2203 item->child_index = 0;
2204 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002205}
2206
2207static PyObject *
2208elementiter_next(ElementIterObject *it)
2209{
2210 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002211 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002212 * A short note on gettext: this function serves both the iter() and
2213 * itertext() methods to avoid code duplication. However, there are a few
2214 * small differences in the way these iterations work. Namely:
2215 * - itertext() only yields text from nodes that have it, and continues
2216 * iterating when a node doesn't have text (so it doesn't return any
2217 * node like iter())
2218 * - itertext() also has to handle tail, after finishing with all the
2219 * children of a node.
2220 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002221 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002222 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002223 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002224
2225 while (1) {
2226 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002227 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002228 * iterator is exhausted.
2229 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002230 if (!it->parent_stack_used) {
2231 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002232 PyErr_SetNone(PyExc_StopIteration);
2233 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002234 }
2235
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002236 elem = it->root_element; /* steals a reference */
2237 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002238 }
2239 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002240 /* See if there are children left to traverse in the current parent. If
2241 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002242 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002243 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2244 Py_ssize_t child_index = item->child_index;
2245 ElementObjectExtra *extra;
2246 elem = item->parent;
2247 extra = elem->extra;
2248 if (!extra || child_index >= extra->length) {
2249 it->parent_stack_used--;
2250 /* Note that extra condition on it->parent_stack_used here;
2251 * this is because itertext() is supposed to only return *inner*
2252 * text, not text following the element it began iteration with.
2253 */
2254 if (it->gettext && it->parent_stack_used) {
2255 text = element_get_tail(elem);
2256 goto gettext;
2257 }
2258 Py_DECREF(elem);
2259 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002260 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002261
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002262 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002263 elem = (ElementObject *)extra->children[child_index];
2264 item->child_index++;
2265 Py_INCREF(elem);
2266 }
2267
2268 if (parent_stack_push_new(it, elem) < 0) {
2269 Py_DECREF(elem);
2270 PyErr_NoMemory();
2271 return NULL;
2272 }
2273 if (it->gettext) {
2274 text = element_get_text(elem);
2275 goto gettext;
2276 }
2277
2278 if (it->sought_tag == Py_None)
2279 return (PyObject *)elem;
2280
2281 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2282 if (rc > 0)
2283 return (PyObject *)elem;
2284
2285 Py_DECREF(elem);
2286 if (rc < 0)
2287 return NULL;
2288 continue;
2289
2290gettext:
2291 if (!text) {
2292 Py_DECREF(elem);
2293 return NULL;
2294 }
2295 if (text == Py_None) {
2296 Py_DECREF(elem);
2297 }
2298 else {
2299 Py_INCREF(text);
2300 Py_DECREF(elem);
2301 rc = PyObject_IsTrue(text);
2302 if (rc > 0)
2303 return text;
2304 Py_DECREF(text);
2305 if (rc < 0)
2306 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002307 }
2308 }
2309
2310 return NULL;
2311}
2312
2313
2314static PyTypeObject ElementIter_Type = {
2315 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002316 /* Using the module's name since the pure-Python implementation does not
2317 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002318 "_elementtree._element_iterator", /* tp_name */
2319 sizeof(ElementIterObject), /* tp_basicsize */
2320 0, /* tp_itemsize */
2321 /* methods */
2322 (destructor)elementiter_dealloc, /* tp_dealloc */
2323 0, /* tp_print */
2324 0, /* tp_getattr */
2325 0, /* tp_setattr */
2326 0, /* tp_reserved */
2327 0, /* tp_repr */
2328 0, /* tp_as_number */
2329 0, /* tp_as_sequence */
2330 0, /* tp_as_mapping */
2331 0, /* tp_hash */
2332 0, /* tp_call */
2333 0, /* tp_str */
2334 0, /* tp_getattro */
2335 0, /* tp_setattro */
2336 0, /* tp_as_buffer */
2337 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2338 0, /* tp_doc */
2339 (traverseproc)elementiter_traverse, /* tp_traverse */
2340 0, /* tp_clear */
2341 0, /* tp_richcompare */
2342 0, /* tp_weaklistoffset */
2343 PyObject_SelfIter, /* tp_iter */
2344 (iternextfunc)elementiter_next, /* tp_iternext */
2345 0, /* tp_methods */
2346 0, /* tp_members */
2347 0, /* tp_getset */
2348 0, /* tp_base */
2349 0, /* tp_dict */
2350 0, /* tp_descr_get */
2351 0, /* tp_descr_set */
2352 0, /* tp_dictoffset */
2353 0, /* tp_init */
2354 0, /* tp_alloc */
2355 0, /* tp_new */
2356};
2357
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002358#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002359
2360static PyObject *
2361create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2362{
2363 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002364
2365 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2366 if (!it)
2367 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002368
Victor Stinner4d463432013-07-11 23:05:03 +02002369 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002370 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002371 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002372 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002373 it->root_element = self;
2374
Eli Bendersky64d11e62012-06-15 07:42:50 +03002375 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002376
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002377 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002378 if (it->parent_stack == NULL) {
2379 Py_DECREF(it);
2380 PyErr_NoMemory();
2381 return NULL;
2382 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002383 it->parent_stack_used = 0;
2384 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002385
Eli Bendersky64d11e62012-06-15 07:42:50 +03002386 return (PyObject *)it;
2387}
2388
2389
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390/* ==================================================================== */
2391/* the tree builder type */
2392
2393typedef struct {
2394 PyObject_HEAD
2395
Eli Bendersky58d548d2012-05-29 15:45:16 +03002396 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397
Antoine Pitrouee329312012-10-04 19:53:29 +02002398 PyObject *this; /* current node */
2399 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400
Eli Bendersky58d548d2012-05-29 15:45:16 +03002401 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402
Eli Bendersky58d548d2012-05-29 15:45:16 +03002403 PyObject *stack; /* element stack */
2404 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405
Eli Bendersky48d358b2012-05-30 17:57:50 +03002406 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002407 PyObject *comment_factory;
2408 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002409
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002411 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002412 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2413 PyObject *end_event_obj;
2414 PyObject *start_ns_event_obj;
2415 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002416 PyObject *comment_event_obj;
2417 PyObject *pi_event_obj;
2418
2419 char insert_comments;
2420 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421} TreeBuilderObject;
2422
Christian Heimes90aa7642007-12-19 02:45:37 +00002423#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424
2425/* -------------------------------------------------------------------- */
2426/* constructor and destructor */
2427
Eli Bendersky58d548d2012-05-29 15:45:16 +03002428static PyObject *
2429treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2432 if (t != NULL) {
2433 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434
Eli Bendersky58d548d2012-05-29 15:45:16 +03002435 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002436 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002437 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002438 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439
Eli Bendersky58d548d2012-05-29 15:45:16 +03002440 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002441 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002442 t->comment_factory = NULL;
2443 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002444 t->stack = PyList_New(20);
2445 if (!t->stack) {
2446 Py_DECREF(t->this);
2447 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002448 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002449 return NULL;
2450 }
2451 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002453 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002454 t->start_event_obj = t->end_event_obj = NULL;
2455 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002456 t->comment_event_obj = t->pi_event_obj = NULL;
2457 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002458 }
2459 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460}
2461
Serhiy Storchakacb985562015-05-04 15:32:48 +03002462/*[clinic input]
2463_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002464
Serhiy Storchakacb985562015-05-04 15:32:48 +03002465 element_factory: object = NULL
Stefan Behnel43851a22019-05-01 21:20:38 +02002466 *
2467 comment_factory: object = NULL
2468 pi_factory: object = NULL
2469 insert_comments: bool = False
2470 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002471
2472[clinic start generated code]*/
2473
2474static int
2475_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002476 PyObject *element_factory,
2477 PyObject *comment_factory,
2478 PyObject *pi_factory,
2479 int insert_comments, int insert_pis)
2480/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002481{
Stefan Behnel43851a22019-05-01 21:20:38 +02002482 if (element_factory && element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002483 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002484 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002485 } else {
2486 Py_CLEAR(self->element_factory);
2487 }
2488
2489 if (!comment_factory || comment_factory == Py_None) {
2490 elementtreestate *st = ET_STATE_GLOBAL;
2491 comment_factory = st->comment_factory;
2492 }
2493 if (comment_factory) {
2494 Py_INCREF(comment_factory);
2495 Py_XSETREF(self->comment_factory, comment_factory);
2496 self->insert_comments = insert_comments;
2497 } else {
2498 Py_CLEAR(self->comment_factory);
2499 self->insert_comments = 0;
2500 }
2501
2502 if (!pi_factory || pi_factory == Py_None) {
2503 elementtreestate *st = ET_STATE_GLOBAL;
2504 pi_factory = st->pi_factory;
2505 }
2506 if (pi_factory) {
2507 Py_INCREF(pi_factory);
2508 Py_XSETREF(self->pi_factory, pi_factory);
2509 self->insert_pis = insert_pis;
2510 } else {
2511 Py_CLEAR(self->pi_factory);
2512 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002513 }
2514
Eli Bendersky58d548d2012-05-29 15:45:16 +03002515 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516}
2517
Eli Bendersky48d358b2012-05-30 17:57:50 +03002518static int
2519treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2520{
Stefan Behnel43851a22019-05-01 21:20:38 +02002521 Py_VISIT(self->pi_event_obj);
2522 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002523 Py_VISIT(self->end_ns_event_obj);
2524 Py_VISIT(self->start_ns_event_obj);
2525 Py_VISIT(self->end_event_obj);
2526 Py_VISIT(self->start_event_obj);
2527 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002528 Py_VISIT(self->root);
2529 Py_VISIT(self->this);
2530 Py_VISIT(self->last);
2531 Py_VISIT(self->data);
2532 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002533 Py_VISIT(self->pi_factory);
2534 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002535 Py_VISIT(self->element_factory);
2536 return 0;
2537}
2538
2539static int
2540treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541{
Stefan Behnel43851a22019-05-01 21:20:38 +02002542 Py_CLEAR(self->pi_event_obj);
2543 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002544 Py_CLEAR(self->end_ns_event_obj);
2545 Py_CLEAR(self->start_ns_event_obj);
2546 Py_CLEAR(self->end_event_obj);
2547 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002548 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002549 Py_CLEAR(self->stack);
2550 Py_CLEAR(self->data);
2551 Py_CLEAR(self->last);
2552 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002553 Py_CLEAR(self->pi_factory);
2554 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002555 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002556 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002557 return 0;
2558}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559
Eli Bendersky48d358b2012-05-30 17:57:50 +03002560static void
2561treebuilder_dealloc(TreeBuilderObject *self)
2562{
2563 PyObject_GC_UnTrack(self);
2564 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002565 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566}
2567
2568/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002569/* helpers for handling of arbitrary element-like objects */
2570
Stefan Behnel43851a22019-05-01 21:20:38 +02002571/*[clinic input]
2572_elementtree._set_factories
2573
2574 comment_factory: object
2575 pi_factory: object
2576 /
2577
2578Change the factories used to create comments and processing instructions.
2579
2580For internal use only.
2581[clinic start generated code]*/
2582
2583static PyObject *
2584_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2585 PyObject *pi_factory)
2586/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2587{
2588 elementtreestate *st = ET_STATE_GLOBAL;
2589 PyObject *old;
2590
2591 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2592 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2593 Py_TYPE(comment_factory)->tp_name);
2594 return NULL;
2595 }
2596 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2597 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2598 Py_TYPE(pi_factory)->tp_name);
2599 return NULL;
2600 }
2601
2602 old = PyTuple_Pack(2,
2603 st->comment_factory ? st->comment_factory : Py_None,
2604 st->pi_factory ? st->pi_factory : Py_None);
2605
2606 if (comment_factory == Py_None) {
2607 Py_CLEAR(st->comment_factory);
2608 } else {
2609 Py_INCREF(comment_factory);
2610 Py_XSETREF(st->comment_factory, comment_factory);
2611 }
2612 if (pi_factory == Py_None) {
2613 Py_CLEAR(st->pi_factory);
2614 } else {
2615 Py_INCREF(pi_factory);
2616 Py_XSETREF(st->pi_factory, pi_factory);
2617 }
2618
2619 return old;
2620}
2621
Antoine Pitrouee329312012-10-04 19:53:29 +02002622static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002623treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002624 PyObject **dest, _Py_Identifier *name)
2625{
2626 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002627 PyObject *tmp = JOIN_OBJ(*dest);
2628 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2629 *data = NULL;
2630 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002631 return 0;
2632 }
2633 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002634 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002635 int r;
2636 if (joined == NULL)
2637 return -1;
2638 r = _PyObject_SetAttrId(element, name, joined);
2639 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002640 if (r < 0)
2641 return -1;
2642 Py_CLEAR(*data);
2643 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002644 }
2645}
2646
Serhiy Storchaka576def02017-03-30 09:47:31 +03002647LOCAL(int)
2648treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002649{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002650 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002651
Serhiy Storchaka576def02017-03-30 09:47:31 +03002652 if (!self->data) {
2653 return 0;
2654 }
2655
2656 if (self->this == element) {
2657 _Py_IDENTIFIER(text);
2658 return treebuilder_set_element_text_or_tail(
2659 element, &self->data,
2660 &((ElementObject *) element)->text, &PyId_text);
2661 }
2662 else {
2663 _Py_IDENTIFIER(tail);
2664 return treebuilder_set_element_text_or_tail(
2665 element, &self->data,
2666 &((ElementObject *) element)->tail, &PyId_tail);
2667 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002668}
2669
2670static int
2671treebuilder_add_subelement(PyObject *element, PyObject *child)
2672{
2673 _Py_IDENTIFIER(append);
2674 if (Element_CheckExact(element)) {
2675 ElementObject *elem = (ElementObject *) element;
2676 return element_add_subelement(elem, child);
2677 }
2678 else {
2679 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002680 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002681 if (res == NULL)
2682 return -1;
2683 Py_DECREF(res);
2684 return 0;
2685 }
2686}
2687
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002688LOCAL(int)
2689treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2690 PyObject *node)
2691{
2692 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002693 PyObject *res;
2694 PyObject *event = PyTuple_Pack(2, action, node);
2695 if (event == NULL)
2696 return -1;
Stefan Behnel43851a22019-05-01 21:20:38 +02002697 res = _PyObject_FastCall(self->events_append, &event, 1);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002698 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002699 if (res == NULL)
2700 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002701 Py_DECREF(res);
2702 }
2703 return 0;
2704}
2705
Antoine Pitrouee329312012-10-04 19:53:29 +02002706/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707/* handlers */
2708
2709LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2711 PyObject* attrib)
2712{
2713 PyObject* node;
2714 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002715 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Serhiy Storchaka576def02017-03-30 09:47:31 +03002717 if (treebuilder_flush_data(self) < 0) {
2718 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719 }
2720
Stefan Behnel43851a22019-05-01 21:20:38 +02002721 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002722 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002723 } else if (attrib == Py_None) {
2724 attrib = PyDict_New();
2725 if (!attrib)
2726 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002727 node = PyObject_CallFunctionObjArgs(self->element_factory,
2728 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002729 Py_DECREF(attrib);
2730 }
2731 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002732 node = PyObject_CallFunctionObjArgs(self->element_factory,
2733 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002734 }
2735 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002737 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
Antoine Pitrouee329312012-10-04 19:53:29 +02002739 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740
2741 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002742 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002743 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 } else {
2745 if (self->root) {
2746 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002747 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748 "multiple elements on top level"
2749 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002750 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 }
2752 Py_INCREF(node);
2753 self->root = node;
2754 }
2755
2756 if (self->index < PyList_GET_SIZE(self->stack)) {
2757 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002758 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 Py_INCREF(this);
2760 } else {
2761 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002762 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763 }
2764 self->index++;
2765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002767 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002769 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002771 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2772 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773
2774 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002775
2776 error:
2777 Py_DECREF(node);
2778 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779}
2780
2781LOCAL(PyObject*)
2782treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2783{
2784 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002785 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002786 /* ignore calls to data before the first call to start */
2787 Py_RETURN_NONE;
2788 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 /* store the first item as is */
2790 Py_INCREF(data); self->data = data;
2791 } else {
2792 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002793 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2794 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002795 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 /* expat often generates single character data sections; handle
2797 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002798 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2799 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002801 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802 } else if (PyList_CheckExact(self->data)) {
2803 if (PyList_Append(self->data, data) < 0)
2804 return NULL;
2805 } else {
2806 PyObject* list = PyList_New(2);
2807 if (!list)
2808 return NULL;
2809 PyList_SET_ITEM(list, 0, self->data);
2810 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2811 self->data = list;
2812 }
2813 }
2814
2815 Py_RETURN_NONE;
2816}
2817
2818LOCAL(PyObject*)
2819treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2820{
2821 PyObject* item;
2822
Serhiy Storchaka576def02017-03-30 09:47:31 +03002823 if (treebuilder_flush_data(self) < 0) {
2824 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825 }
2826
2827 if (self->index == 0) {
2828 PyErr_SetString(
2829 PyExc_IndexError,
2830 "pop from empty stack"
2831 );
2832 return NULL;
2833 }
2834
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002835 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002836 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002837 self->index--;
2838 self->this = PyList_GET_ITEM(self->stack, self->index);
2839 Py_INCREF(self->this);
2840 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002842 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2843 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002844
2845 Py_INCREF(self->last);
2846 return (PyObject*) self->last;
2847}
2848
Stefan Behnel43851a22019-05-01 21:20:38 +02002849LOCAL(PyObject*)
2850treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2851{
2852 PyObject* comment = NULL;
2853 PyObject* this;
2854
2855 if (treebuilder_flush_data(self) < 0) {
2856 return NULL;
2857 }
2858
2859 if (self->comment_factory) {
2860 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2861 if (!comment)
2862 return NULL;
2863
2864 this = self->this;
2865 if (self->insert_comments && this != Py_None) {
2866 if (treebuilder_add_subelement(this, comment) < 0)
2867 goto error;
2868 }
2869 } else {
2870 Py_INCREF(text);
2871 comment = text;
2872 }
2873
2874 if (self->events_append && self->comment_event_obj) {
2875 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2876 goto error;
2877 }
2878
2879 return comment;
2880
2881 error:
2882 Py_DECREF(comment);
2883 return NULL;
2884}
2885
2886LOCAL(PyObject*)
2887treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2888{
2889 PyObject* pi = NULL;
2890 PyObject* this;
2891 PyObject* stack[2] = {target, text};
2892
2893 if (treebuilder_flush_data(self) < 0) {
2894 return NULL;
2895 }
2896
2897 if (self->pi_factory) {
2898 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2899 if (!pi) {
2900 return NULL;
2901 }
2902
2903 this = self->this;
2904 if (self->insert_pis && this != Py_None) {
2905 if (treebuilder_add_subelement(this, pi) < 0)
2906 goto error;
2907 }
2908 } else {
2909 pi = PyTuple_Pack(2, target, text);
2910 if (!pi) {
2911 return NULL;
2912 }
2913 }
2914
2915 if (self->events_append && self->pi_event_obj) {
2916 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2917 goto error;
2918 }
2919
2920 return pi;
2921
2922 error:
2923 Py_DECREF(pi);
2924 return NULL;
2925}
2926
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002927LOCAL(PyObject*)
2928treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2929{
2930 PyObject* parcel;
2931
2932 if (self->events_append && self->start_ns_event_obj) {
2933 parcel = PyTuple_Pack(2, prefix, uri);
2934 if (!parcel) {
2935 return NULL;
2936 }
2937
2938 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2939 Py_DECREF(parcel);
2940 return NULL;
2941 }
2942 Py_DECREF(parcel);
2943 }
2944
2945 Py_RETURN_NONE;
2946}
2947
2948LOCAL(PyObject*)
2949treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2950{
2951 if (self->events_append && self->end_ns_event_obj) {
2952 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2953 return NULL;
2954 }
2955 }
2956
2957 Py_RETURN_NONE;
2958}
2959
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960/* -------------------------------------------------------------------- */
2961/* methods (in alphabetical order) */
2962
Serhiy Storchakacb985562015-05-04 15:32:48 +03002963/*[clinic input]
2964_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965
Serhiy Storchakacb985562015-05-04 15:32:48 +03002966 data: object
2967 /
2968
2969[clinic start generated code]*/
2970
2971static PyObject *
2972_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2973/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2974{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 return treebuilder_handle_data(self, data);
2976}
2977
Serhiy Storchakacb985562015-05-04 15:32:48 +03002978/*[clinic input]
2979_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980
Serhiy Storchakacb985562015-05-04 15:32:48 +03002981 tag: object
2982 /
2983
2984[clinic start generated code]*/
2985
2986static PyObject *
2987_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2988/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2989{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 return treebuilder_handle_end(self, tag);
2991}
2992
Stefan Behnel43851a22019-05-01 21:20:38 +02002993/*[clinic input]
2994_elementtree.TreeBuilder.comment
2995
2996 text: object
2997 /
2998
2999[clinic start generated code]*/
3000
3001static PyObject *
3002_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
3003/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
3004{
3005 return treebuilder_handle_comment(self, text);
3006}
3007
3008/*[clinic input]
3009_elementtree.TreeBuilder.pi
3010
3011 target: object
3012 text: object = None
3013 /
3014
3015[clinic start generated code]*/
3016
3017static PyObject *
3018_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3019 PyObject *text)
3020/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3021{
3022 return treebuilder_handle_pi(self, target, text);
3023}
3024
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025LOCAL(PyObject*)
3026treebuilder_done(TreeBuilderObject* self)
3027{
3028 PyObject* res;
3029
3030 /* FIXME: check stack size? */
3031
3032 if (self->root)
3033 res = self->root;
3034 else
3035 res = Py_None;
3036
3037 Py_INCREF(res);
3038 return res;
3039}
3040
Serhiy Storchakacb985562015-05-04 15:32:48 +03003041/*[clinic input]
3042_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043
Serhiy Storchakacb985562015-05-04 15:32:48 +03003044[clinic start generated code]*/
3045
3046static PyObject *
3047_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3048/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3049{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 return treebuilder_done(self);
3051}
3052
Serhiy Storchakacb985562015-05-04 15:32:48 +03003053/*[clinic input]
3054_elementtree.TreeBuilder.start
3055
3056 tag: object
3057 attrs: object = None
3058 /
3059
3060[clinic start generated code]*/
3061
3062static PyObject *
3063_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3064 PyObject *attrs)
3065/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003067 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068}
3069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070/* ==================================================================== */
3071/* the expat interface */
3072
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003075
3076/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3077 * cached globally without being in per-module state.
3078 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003079static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003080#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003081
Eli Bendersky52467b12012-06-01 07:13:08 +03003082static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3083 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3084
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085typedef struct {
3086 PyObject_HEAD
3087
3088 XML_Parser parser;
3089
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003090 PyObject *target;
3091 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003093 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003095 PyObject *handle_start_ns;
3096 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003097 PyObject *handle_start;
3098 PyObject *handle_data;
3099 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003101 PyObject *handle_comment;
3102 PyObject *handle_pi;
3103 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003105 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003106
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107} XMLParserObject;
3108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109/* helpers */
3110
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003111LOCAL(PyObject*)
3112makeuniversal(XMLParserObject* self, const char* string)
3113{
3114 /* convert a UTF-8 tag/attribute name from the expat parser
3115 to a universal name string */
3116
Antoine Pitrouc1948842012-10-01 23:40:37 +02003117 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003118 PyObject* key;
3119 PyObject* value;
3120
3121 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003122 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123 if (!key)
3124 return NULL;
3125
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003126 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127
3128 if (value) {
3129 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003130 }
3131 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 /* new name. convert to universal name, and decode as
3133 necessary */
3134
3135 PyObject* tag;
3136 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003137 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138
3139 /* look for namespace separator */
3140 for (i = 0; i < size; i++)
3141 if (string[i] == '}')
3142 break;
3143 if (i != size) {
3144 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003145 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003146 if (tag == NULL) {
3147 Py_DECREF(key);
3148 return NULL;
3149 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003150 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151 p[0] = '{';
3152 memcpy(p+1, string, size);
3153 size++;
3154 } else {
3155 /* plain name; use key as tag */
3156 Py_INCREF(key);
3157 tag = key;
3158 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003161 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003162 value = PyUnicode_DecodeUTF8(p, size, "strict");
3163 Py_DECREF(tag);
3164 if (!value) {
3165 Py_DECREF(key);
3166 return NULL;
3167 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168
3169 /* add to names dictionary */
3170 if (PyDict_SetItem(self->names, key, value) < 0) {
3171 Py_DECREF(key);
3172 Py_DECREF(value);
3173 return NULL;
3174 }
3175 }
3176
3177 Py_DECREF(key);
3178 return value;
3179}
3180
Eli Bendersky5b77d812012-03-16 08:20:05 +02003181/* Set the ParseError exception with the given parameters.
3182 * If message is not NULL, it's used as the error string. Otherwise, the
3183 * message string is the default for the given error_code.
3184*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003185static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003186expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3187 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003188{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003189 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003190 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003191
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003192 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003193 message ? message : EXPAT(ErrorString)(error_code),
3194 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003195 if (errmsg == NULL)
3196 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003197
Stefan Behnel43851a22019-05-01 21:20:38 +02003198 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003199 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003200 if (!error)
3201 return;
3202
Eli Bendersky5b77d812012-03-16 08:20:05 +02003203 /* Add code and position attributes */
3204 code = PyLong_FromLong((long)error_code);
3205 if (!code) {
3206 Py_DECREF(error);
3207 return;
3208 }
3209 if (PyObject_SetAttrString(error, "code", code) == -1) {
3210 Py_DECREF(error);
3211 Py_DECREF(code);
3212 return;
3213 }
3214 Py_DECREF(code);
3215
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003216 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003217 if (!position) {
3218 Py_DECREF(error);
3219 return;
3220 }
3221 if (PyObject_SetAttrString(error, "position", position) == -1) {
3222 Py_DECREF(error);
3223 Py_DECREF(position);
3224 return;
3225 }
3226 Py_DECREF(position);
3227
Eli Bendersky532d03e2013-08-10 08:00:39 -07003228 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003229 Py_DECREF(error);
3230}
3231
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232/* -------------------------------------------------------------------- */
3233/* handlers */
3234
3235static void
3236expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3237 int data_len)
3238{
3239 PyObject* key;
3240 PyObject* value;
3241 PyObject* res;
3242
3243 if (data_len < 2 || data_in[0] != '&')
3244 return;
3245
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003246 if (PyErr_Occurred())
3247 return;
3248
Neal Norwitz0269b912007-08-08 06:56:02 +00003249 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250 if (!key)
3251 return;
3252
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003253 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254
3255 if (value) {
3256 if (TreeBuilder_CheckExact(self->target))
3257 res = treebuilder_handle_data(
3258 (TreeBuilderObject*) self->target, value
3259 );
3260 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003261 res = _PyObject_FastCall(self->handle_data, &value, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 else
3263 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003265 } else if (!PyErr_Occurred()) {
3266 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003267 char message[128] = "undefined entity ";
3268 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003269 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003270 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003272 EXPAT(GetErrorColumnNumber)(self->parser),
3273 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 );
3275 }
3276
3277 Py_DECREF(key);
3278}
3279
3280static void
3281expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3282 const XML_Char **attrib_in)
3283{
3284 PyObject* res;
3285 PyObject* tag;
3286 PyObject* attrib;
3287 int ok;
3288
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003289 if (PyErr_Occurred())
3290 return;
3291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 /* tag name */
3293 tag = makeuniversal(self, tag_in);
3294 if (!tag)
3295 return; /* parser will look for errors */
3296
3297 /* attributes */
3298 if (attrib_in[0]) {
3299 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003300 if (!attrib) {
3301 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003303 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 while (attrib_in[0] && attrib_in[1]) {
3305 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003306 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 if (!key || !value) {
3308 Py_XDECREF(value);
3309 Py_XDECREF(key);
3310 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003311 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312 return;
3313 }
3314 ok = PyDict_SetItem(attrib, key, value);
3315 Py_DECREF(value);
3316 Py_DECREF(key);
3317 if (ok < 0) {
3318 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003319 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320 return;
3321 }
3322 attrib_in += 2;
3323 }
3324 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003325 Py_INCREF(Py_None);
3326 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003327 }
3328
3329 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330 /* shortcut */
3331 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3332 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003333 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003334 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003335 if (attrib == Py_None) {
3336 Py_DECREF(attrib);
3337 attrib = PyDict_New();
3338 if (!attrib) {
3339 Py_DECREF(tag);
3340 return;
3341 }
3342 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003343 res = PyObject_CallFunctionObjArgs(self->handle_start,
3344 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003345 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 res = NULL;
3347
3348 Py_DECREF(tag);
3349 Py_DECREF(attrib);
3350
3351 Py_XDECREF(res);
3352}
3353
3354static void
3355expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3356 int data_len)
3357{
3358 PyObject* data;
3359 PyObject* res;
3360
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003361 if (PyErr_Occurred())
3362 return;
3363
Neal Norwitz0269b912007-08-08 06:56:02 +00003364 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003365 if (!data)
3366 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367
3368 if (TreeBuilder_CheckExact(self->target))
3369 /* shortcut */
3370 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3371 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003372 res = _PyObject_FastCall(self->handle_data, &data, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 else
3374 res = NULL;
3375
3376 Py_DECREF(data);
3377
3378 Py_XDECREF(res);
3379}
3380
3381static void
3382expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3383{
3384 PyObject* tag;
3385 PyObject* res = NULL;
3386
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003387 if (PyErr_Occurred())
3388 return;
3389
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390 if (TreeBuilder_CheckExact(self->target))
3391 /* shortcut */
3392 /* the standard tree builder doesn't look at the end tag */
3393 res = treebuilder_handle_end(
3394 (TreeBuilderObject*) self->target, Py_None
3395 );
3396 else if (self->handle_end) {
3397 tag = makeuniversal(self, tag_in);
3398 if (tag) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003399 res = _PyObject_FastCall(self->handle_end, &tag, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 Py_DECREF(tag);
3401 }
3402 }
3403
3404 Py_XDECREF(res);
3405}
3406
3407static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003408expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3409 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003410{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003411 PyObject* res = NULL;
3412 PyObject* uri;
3413 PyObject* prefix;
3414 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003415
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003416 if (PyErr_Occurred())
3417 return;
3418
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003419 if (!uri_in)
3420 uri_in = "";
3421 if (!prefix_in)
3422 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003423
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003424 if (TreeBuilder_CheckExact(self->target)) {
3425 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3426 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003427
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003428 if (target->events_append && target->start_ns_event_obj) {
3429 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3430 if (!prefix)
3431 return;
3432 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3433 if (!uri) {
3434 Py_DECREF(prefix);
3435 return;
3436 }
3437
3438 res = treebuilder_handle_start_ns(target, prefix, uri);
3439 Py_DECREF(uri);
3440 Py_DECREF(prefix);
3441 }
3442 } else if (self->handle_start_ns) {
3443 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3444 if (!prefix)
3445 return;
3446 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3447 if (!uri) {
3448 Py_DECREF(prefix);
3449 return;
3450 }
3451
3452 stack[0] = prefix;
3453 stack[1] = uri;
3454 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3455 Py_DECREF(uri);
3456 Py_DECREF(prefix);
3457 }
3458
3459 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460}
3461
3462static void
3463expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3464{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003465 PyObject *res = NULL;
3466 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003467
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003468 if (PyErr_Occurred())
3469 return;
3470
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003471 if (!prefix_in)
3472 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003473
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003474 if (TreeBuilder_CheckExact(self->target)) {
3475 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3476 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3477
3478 if (target->events_append && target->end_ns_event_obj) {
3479 res = treebuilder_handle_end_ns(target, Py_None);
3480 }
3481 } else if (self->handle_end_ns) {
3482 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3483 if (!prefix)
3484 return;
3485
3486 res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3487 Py_DECREF(prefix);
3488 }
3489
3490 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491}
3492
3493static void
3494expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3495{
Stefan Behnel43851a22019-05-01 21:20:38 +02003496 PyObject* comment = NULL;
3497 PyObject* res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003499 if (PyErr_Occurred())
3500 return;
3501
Stefan Behnel43851a22019-05-01 21:20:38 +02003502 if (TreeBuilder_CheckExact(self->target)) {
3503 /* shortcut */
3504 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3505
Neal Norwitz0269b912007-08-08 06:56:02 +00003506 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003507 if (!comment)
3508 return; /* parser will look for errors */
3509
3510 res = treebuilder_handle_comment(target, comment);
3511 } else if (self->handle_comment) {
3512 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3513 if (!comment)
3514 return;
3515
3516 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003518
3519 Py_XDECREF(res);
3520 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521}
3522
Eli Bendersky45839902013-01-13 05:14:47 -08003523static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003524expat_start_doctype_handler(XMLParserObject *self,
3525 const XML_Char *doctype_name,
3526 const XML_Char *sysid,
3527 const XML_Char *pubid,
3528 int has_internal_subset)
3529{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003530 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003531 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003532 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003533
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003534 if (PyErr_Occurred())
3535 return;
3536
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003537 doctype_name_obj = makeuniversal(self, doctype_name);
3538 if (!doctype_name_obj)
3539 return;
3540
3541 if (sysid) {
3542 sysid_obj = makeuniversal(self, sysid);
3543 if (!sysid_obj) {
3544 Py_DECREF(doctype_name_obj);
3545 return;
3546 }
3547 } else {
3548 Py_INCREF(Py_None);
3549 sysid_obj = Py_None;
3550 }
3551
3552 if (pubid) {
3553 pubid_obj = makeuniversal(self, pubid);
3554 if (!pubid_obj) {
3555 Py_DECREF(doctype_name_obj);
3556 Py_DECREF(sysid_obj);
3557 return;
3558 }
3559 } else {
3560 Py_INCREF(Py_None);
3561 pubid_obj = Py_None;
3562 }
3563
3564 /* If the target has a handler for doctype, call it. */
3565 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003566 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3567 doctype_name_obj, pubid_obj,
3568 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003569 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003570 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003571 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3572 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3573 "The doctype() method of XMLParser is ignored. "
3574 "Define doctype() method on the TreeBuilder target.",
3575 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003576 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003577 }
3578
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003579 Py_DECREF(doctype_name_obj);
3580 Py_DECREF(pubid_obj);
3581 Py_DECREF(sysid_obj);
3582}
3583
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003584static void
3585expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3586 const XML_Char* data_in)
3587{
Stefan Behnel43851a22019-05-01 21:20:38 +02003588 PyObject* pi_target = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589 PyObject* data;
3590 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003591 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003593 if (PyErr_Occurred())
3594 return;
3595
Stefan Behnel43851a22019-05-01 21:20:38 +02003596 if (TreeBuilder_CheckExact(self->target)) {
3597 /* shortcut */
3598 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3599
3600 if (target->events_append && target->pi_event_obj) {
3601 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3602 if (!pi_target)
3603 goto error;
3604 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3605 if (!data)
3606 goto error;
3607 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608 Py_XDECREF(res);
3609 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003610 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003612 } else if (self->handle_pi) {
3613 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3614 if (!pi_target)
3615 goto error;
3616 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3617 if (!data)
3618 goto error;
3619
3620 stack[0] = pi_target;
3621 stack[1] = data;
3622 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3623 Py_XDECREF(res);
3624 Py_DECREF(data);
3625 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003627
3628 return;
3629
3630 error:
3631 Py_XDECREF(pi_target);
3632 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633}
3634
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636
Eli Bendersky52467b12012-06-01 07:13:08 +03003637static PyObject *
3638xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003639{
Eli Bendersky52467b12012-06-01 07:13:08 +03003640 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3641 if (self) {
3642 self->parser = NULL;
3643 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003644 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003645 self->handle_start = self->handle_data = self->handle_end = NULL;
3646 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003647 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003649 return (PyObject *)self;
3650}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003651
scoderc8d8e152017-09-14 22:00:03 +02003652static int
3653ignore_attribute_error(PyObject *value)
3654{
3655 if (value == NULL) {
3656 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3657 return -1;
3658 }
3659 PyErr_Clear();
3660 }
3661 return 0;
3662}
3663
Serhiy Storchakacb985562015-05-04 15:32:48 +03003664/*[clinic input]
3665_elementtree.XMLParser.__init__
3666
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003667 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003668 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003669 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003670
3671[clinic start generated code]*/
3672
Eli Bendersky52467b12012-06-01 07:13:08 +03003673static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003674_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3675 const char *encoding)
3676/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003677{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003678 self->entity = PyDict_New();
3679 if (!self->entity)
3680 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681
Serhiy Storchakacb985562015-05-04 15:32:48 +03003682 self->names = PyDict_New();
3683 if (!self->names) {
3684 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003685 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003687
Serhiy Storchakacb985562015-05-04 15:32:48 +03003688 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3689 if (!self->parser) {
3690 Py_CLEAR(self->entity);
3691 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003693 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003695 /* expat < 2.1.0 has no XML_SetHashSalt() */
3696 if (EXPAT(SetHashSalt) != NULL) {
3697 EXPAT(SetHashSalt)(self->parser,
3698 (unsigned long)_Py_HashSecret.expat.hashsalt);
3699 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700
Eli Bendersky52467b12012-06-01 07:13:08 +03003701 if (target) {
3702 Py_INCREF(target);
3703 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003704 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003705 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003706 Py_CLEAR(self->entity);
3707 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003708 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003709 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003710 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003711 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003712
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003713 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3714 if (ignore_attribute_error(self->handle_start_ns)) {
3715 return -1;
3716 }
3717 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3718 if (ignore_attribute_error(self->handle_end_ns)) {
3719 return -1;
3720 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003721 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003722 if (ignore_attribute_error(self->handle_start)) {
3723 return -1;
3724 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003725 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003726 if (ignore_attribute_error(self->handle_data)) {
3727 return -1;
3728 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003729 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003730 if (ignore_attribute_error(self->handle_end)) {
3731 return -1;
3732 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003733 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003734 if (ignore_attribute_error(self->handle_comment)) {
3735 return -1;
3736 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003737 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003738 if (ignore_attribute_error(self->handle_pi)) {
3739 return -1;
3740 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003741 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003742 if (ignore_attribute_error(self->handle_close)) {
3743 return -1;
3744 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003745 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003746 if (ignore_attribute_error(self->handle_doctype)) {
3747 return -1;
3748 }
Eli Bendersky45839902013-01-13 05:14:47 -08003749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003750 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003751 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003752 if (self->handle_start_ns || self->handle_end_ns)
3753 EXPAT(SetNamespaceDeclHandler)(
3754 self->parser,
3755 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3756 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3757 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003758 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003759 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003760 (XML_StartElementHandler) expat_start_handler,
3761 (XML_EndElementHandler) expat_end_handler
3762 );
3763 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003764 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003765 (XML_DefaultHandler) expat_default_handler
3766 );
3767 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003768 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003769 (XML_CharacterDataHandler) expat_data_handler
3770 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003771 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003772 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003773 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003774 (XML_CommentHandler) expat_comment_handler
3775 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003776 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003777 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003778 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003779 (XML_ProcessingInstructionHandler) expat_pi_handler
3780 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003781 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003782 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003783 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3784 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003785 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003786 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003787 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003788 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003789
Eli Bendersky52467b12012-06-01 07:13:08 +03003790 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003791}
3792
Eli Bendersky52467b12012-06-01 07:13:08 +03003793static int
3794xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3795{
3796 Py_VISIT(self->handle_close);
3797 Py_VISIT(self->handle_pi);
3798 Py_VISIT(self->handle_comment);
3799 Py_VISIT(self->handle_end);
3800 Py_VISIT(self->handle_data);
3801 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003802 Py_VISIT(self->handle_start_ns);
3803 Py_VISIT(self->handle_end_ns);
3804 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003805
3806 Py_VISIT(self->target);
3807 Py_VISIT(self->entity);
3808 Py_VISIT(self->names);
3809
3810 return 0;
3811}
3812
3813static int
3814xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003815{
Victor Stinnere727d412017-09-18 05:29:37 -07003816 if (self->parser != NULL) {
3817 XML_Parser parser = self->parser;
3818 self->parser = NULL;
3819 EXPAT(ParserFree)(parser);
3820 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003821
Antoine Pitrouc1948842012-10-01 23:40:37 +02003822 Py_CLEAR(self->handle_close);
3823 Py_CLEAR(self->handle_pi);
3824 Py_CLEAR(self->handle_comment);
3825 Py_CLEAR(self->handle_end);
3826 Py_CLEAR(self->handle_data);
3827 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003828 Py_CLEAR(self->handle_start_ns);
3829 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003830 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003831
Antoine Pitrouc1948842012-10-01 23:40:37 +02003832 Py_CLEAR(self->target);
3833 Py_CLEAR(self->entity);
3834 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003835
Eli Bendersky52467b12012-06-01 07:13:08 +03003836 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003837}
3838
Eli Bendersky52467b12012-06-01 07:13:08 +03003839static void
3840xmlparser_dealloc(XMLParserObject* self)
3841{
3842 PyObject_GC_UnTrack(self);
3843 xmlparser_gc_clear(self);
3844 Py_TYPE(self)->tp_free((PyObject *)self);
3845}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003846
3847LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003848expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003849{
3850 int ok;
3851
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003852 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003853 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3854
3855 if (PyErr_Occurred())
3856 return NULL;
3857
3858 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003859 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003860 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003861 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003862 EXPAT(GetErrorColumnNumber)(self->parser),
3863 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003864 );
3865 return NULL;
3866 }
3867
3868 Py_RETURN_NONE;
3869}
3870
Serhiy Storchakacb985562015-05-04 15:32:48 +03003871/*[clinic input]
3872_elementtree.XMLParser.close
3873
3874[clinic start generated code]*/
3875
3876static PyObject *
3877_elementtree_XMLParser_close_impl(XMLParserObject *self)
3878/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003879{
3880 /* end feeding data to parser */
3881
3882 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003883 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003884 if (!res)
3885 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003886
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003887 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003888 Py_DECREF(res);
3889 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003890 }
3891 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003892 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003893 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003894 }
3895 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003896 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003897 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003898}
3899
Serhiy Storchakacb985562015-05-04 15:32:48 +03003900/*[clinic input]
3901_elementtree.XMLParser.feed
3902
3903 data: object
3904 /
3905
3906[clinic start generated code]*/
3907
3908static PyObject *
3909_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3910/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003911{
3912 /* feed data to parser */
3913
Serhiy Storchakacb985562015-05-04 15:32:48 +03003914 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003915 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003916 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3917 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003918 return NULL;
3919 if (data_len > INT_MAX) {
3920 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3921 return NULL;
3922 }
3923 /* Explicitly set UTF-8 encoding. Return code ignored. */
3924 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003925 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003926 }
3927 else {
3928 Py_buffer view;
3929 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003930 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003931 return NULL;
3932 if (view.len > INT_MAX) {
3933 PyBuffer_Release(&view);
3934 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3935 return NULL;
3936 }
3937 res = expat_parse(self, view.buf, (int)view.len, 0);
3938 PyBuffer_Release(&view);
3939 return res;
3940 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003941}
3942
Serhiy Storchakacb985562015-05-04 15:32:48 +03003943/*[clinic input]
3944_elementtree.XMLParser._parse_whole
3945
3946 file: object
3947 /
3948
3949[clinic start generated code]*/
3950
3951static PyObject *
3952_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3953/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003954{
Eli Benderskya3699232013-05-19 18:47:23 -07003955 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003956 PyObject* reader;
3957 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003958 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003959 PyObject* res;
3960
Serhiy Storchakacb985562015-05-04 15:32:48 +03003961 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003962 if (!reader)
3963 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003964
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003965 /* read from open file object */
3966 for (;;) {
3967
3968 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3969
3970 if (!buffer) {
3971 /* read failed (e.g. due to KeyboardInterrupt) */
3972 Py_DECREF(reader);
3973 return NULL;
3974 }
3975
Eli Benderskyf996e772012-03-16 05:53:30 +02003976 if (PyUnicode_CheckExact(buffer)) {
3977 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003978 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003979 Py_DECREF(buffer);
3980 break;
3981 }
3982 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003983 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003984 if (!temp) {
3985 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003986 Py_DECREF(reader);
3987 return NULL;
3988 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003989 buffer = temp;
3990 }
3991 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003992 Py_DECREF(buffer);
3993 break;
3994 }
3995
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003996 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3997 Py_DECREF(buffer);
3998 Py_DECREF(reader);
3999 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
4000 return NULL;
4001 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004002 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02004003 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004004 );
4005
4006 Py_DECREF(buffer);
4007
4008 if (!res) {
4009 Py_DECREF(reader);
4010 return NULL;
4011 }
4012 Py_DECREF(res);
4013
4014 }
4015
4016 Py_DECREF(reader);
4017
4018 res = expat_parse(self, "", 0, 1);
4019
4020 if (res && TreeBuilder_CheckExact(self->target)) {
4021 Py_DECREF(res);
4022 return treebuilder_done((TreeBuilderObject*) self->target);
4023 }
4024
4025 return res;
4026}
4027
Serhiy Storchakacb985562015-05-04 15:32:48 +03004028/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004029_elementtree.XMLParser._setevents
4030
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004031 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004032 events_to_report: object = None
4033 /
4034
4035[clinic start generated code]*/
4036
4037static PyObject *
4038_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4039 PyObject *events_queue,
4040 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004041/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004042{
4043 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004044 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004045 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004046 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004047
4048 if (!TreeBuilder_CheckExact(self->target)) {
4049 PyErr_SetString(
4050 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004051 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004052 "targets"
4053 );
4054 return NULL;
4055 }
4056
4057 target = (TreeBuilderObject*) self->target;
4058
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004059 events_append = PyObject_GetAttrString(events_queue, "append");
4060 if (events_append == NULL)
4061 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004062 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004063
4064 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004065 Py_CLEAR(target->start_event_obj);
4066 Py_CLEAR(target->end_event_obj);
4067 Py_CLEAR(target->start_ns_event_obj);
4068 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004069 Py_CLEAR(target->comment_event_obj);
4070 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004071
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004072 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004073 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004074 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004075 Py_RETURN_NONE;
4076 }
4077
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004078 if (!(events_seq = PySequence_Fast(events_to_report,
4079 "events must be a sequence"))) {
4080 return NULL;
4081 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004082
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004083 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004084 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004085 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004086 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004087 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004088 } else if (PyBytes_Check(event_name_obj)) {
4089 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004090 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004091 if (event_name == NULL) {
4092 Py_DECREF(events_seq);
4093 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4094 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004095 }
4096
4097 Py_INCREF(event_name_obj);
4098 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004099 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004100 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004101 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004102 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004103 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004104 EXPAT(SetNamespaceDeclHandler)(
4105 self->parser,
4106 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4107 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4108 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004109 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004110 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004111 EXPAT(SetNamespaceDeclHandler)(
4112 self->parser,
4113 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4114 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4115 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004116 } else if (strcmp(event_name, "comment") == 0) {
4117 Py_XSETREF(target->comment_event_obj, event_name_obj);
4118 EXPAT(SetCommentHandler)(
4119 self->parser,
4120 (XML_CommentHandler) expat_comment_handler
4121 );
4122 } else if (strcmp(event_name, "pi") == 0) {
4123 Py_XSETREF(target->pi_event_obj, event_name_obj);
4124 EXPAT(SetProcessingInstructionHandler)(
4125 self->parser,
4126 (XML_ProcessingInstructionHandler) expat_pi_handler
4127 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004128 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004129 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004130 Py_DECREF(events_seq);
4131 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004132 return NULL;
4133 }
4134 }
4135
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004136 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004137 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004138}
4139
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004140static PyMemberDef xmlparser_members[] = {
4141 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4142 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4143 {NULL}
4144};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004145
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004146static PyObject*
4147xmlparser_version_getter(XMLParserObject *self, void *closure)
4148{
4149 return PyUnicode_FromFormat(
4150 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4151 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004152}
4153
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004154static PyGetSetDef xmlparser_getsetlist[] = {
4155 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4156 {NULL},
4157};
4158
Serhiy Storchakacb985562015-05-04 15:32:48 +03004159#include "clinic/_elementtree.c.h"
4160
4161static PyMethodDef element_methods[] = {
4162
4163 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4164
4165 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4166 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4167
4168 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4169 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4170 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4171
4172 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4173 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4174 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4175 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4176
4177 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4178 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4179 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4180
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004181 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004182 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4183
4184 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4185 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4186
4187 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4188
4189 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4190 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4191 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4192 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4193 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4194
4195 {NULL, NULL}
4196};
4197
4198static PyMappingMethods element_as_mapping = {
4199 (lenfunc) element_length,
4200 (binaryfunc) element_subscr,
4201 (objobjargproc) element_ass_subscr,
4202};
4203
Serhiy Storchakadde08152015-11-25 15:28:13 +02004204static PyGetSetDef element_getsetlist[] = {
4205 {"tag",
4206 (getter)element_tag_getter,
4207 (setter)element_tag_setter,
4208 "A string identifying what kind of data this element represents"},
4209 {"text",
4210 (getter)element_text_getter,
4211 (setter)element_text_setter,
4212 "A string of text directly after the start tag, or None"},
4213 {"tail",
4214 (getter)element_tail_getter,
4215 (setter)element_tail_setter,
4216 "A string of text directly after the end tag, or None"},
4217 {"attrib",
4218 (getter)element_attrib_getter,
4219 (setter)element_attrib_setter,
4220 "A dictionary containing the element's attributes"},
4221 {NULL},
4222};
4223
Serhiy Storchakacb985562015-05-04 15:32:48 +03004224static PyTypeObject Element_Type = {
4225 PyVarObject_HEAD_INIT(NULL, 0)
4226 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4227 /* methods */
4228 (destructor)element_dealloc, /* tp_dealloc */
4229 0, /* tp_print */
4230 0, /* tp_getattr */
4231 0, /* tp_setattr */
4232 0, /* tp_reserved */
4233 (reprfunc)element_repr, /* tp_repr */
4234 0, /* tp_as_number */
4235 &element_as_sequence, /* tp_as_sequence */
4236 &element_as_mapping, /* tp_as_mapping */
4237 0, /* tp_hash */
4238 0, /* tp_call */
4239 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004240 PyObject_GenericGetAttr, /* tp_getattro */
4241 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004242 0, /* tp_as_buffer */
4243 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4244 /* tp_flags */
4245 0, /* tp_doc */
4246 (traverseproc)element_gc_traverse, /* tp_traverse */
4247 (inquiry)element_gc_clear, /* tp_clear */
4248 0, /* tp_richcompare */
4249 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4250 0, /* tp_iter */
4251 0, /* tp_iternext */
4252 element_methods, /* tp_methods */
4253 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004254 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004255 0, /* tp_base */
4256 0, /* tp_dict */
4257 0, /* tp_descr_get */
4258 0, /* tp_descr_set */
4259 0, /* tp_dictoffset */
4260 (initproc)element_init, /* tp_init */
4261 PyType_GenericAlloc, /* tp_alloc */
4262 element_new, /* tp_new */
4263 0, /* tp_free */
4264};
4265
4266static PyMethodDef treebuilder_methods[] = {
4267 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4268 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4269 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004270 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4271 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004272 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4273 {NULL, NULL}
4274};
4275
4276static PyTypeObject TreeBuilder_Type = {
4277 PyVarObject_HEAD_INIT(NULL, 0)
4278 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4279 /* methods */
4280 (destructor)treebuilder_dealloc, /* tp_dealloc */
4281 0, /* tp_print */
4282 0, /* tp_getattr */
4283 0, /* tp_setattr */
4284 0, /* tp_reserved */
4285 0, /* tp_repr */
4286 0, /* tp_as_number */
4287 0, /* tp_as_sequence */
4288 0, /* tp_as_mapping */
4289 0, /* tp_hash */
4290 0, /* tp_call */
4291 0, /* tp_str */
4292 0, /* tp_getattro */
4293 0, /* tp_setattro */
4294 0, /* tp_as_buffer */
4295 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4296 /* tp_flags */
4297 0, /* tp_doc */
4298 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4299 (inquiry)treebuilder_gc_clear, /* tp_clear */
4300 0, /* tp_richcompare */
4301 0, /* tp_weaklistoffset */
4302 0, /* tp_iter */
4303 0, /* tp_iternext */
4304 treebuilder_methods, /* tp_methods */
4305 0, /* tp_members */
4306 0, /* tp_getset */
4307 0, /* tp_base */
4308 0, /* tp_dict */
4309 0, /* tp_descr_get */
4310 0, /* tp_descr_set */
4311 0, /* tp_dictoffset */
4312 _elementtree_TreeBuilder___init__, /* tp_init */
4313 PyType_GenericAlloc, /* tp_alloc */
4314 treebuilder_new, /* tp_new */
4315 0, /* tp_free */
4316};
4317
4318static PyMethodDef xmlparser_methods[] = {
4319 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4320 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4321 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4322 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004323 {NULL, NULL}
4324};
4325
Neal Norwitz227b5332006-03-22 09:28:35 +00004326static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004327 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004328 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004329 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004330 (destructor)xmlparser_dealloc, /* tp_dealloc */
4331 0, /* tp_print */
4332 0, /* tp_getattr */
4333 0, /* tp_setattr */
4334 0, /* tp_reserved */
4335 0, /* tp_repr */
4336 0, /* tp_as_number */
4337 0, /* tp_as_sequence */
4338 0, /* tp_as_mapping */
4339 0, /* tp_hash */
4340 0, /* tp_call */
4341 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004342 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004343 0, /* tp_setattro */
4344 0, /* tp_as_buffer */
4345 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4346 /* tp_flags */
4347 0, /* tp_doc */
4348 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4349 (inquiry)xmlparser_gc_clear, /* tp_clear */
4350 0, /* tp_richcompare */
4351 0, /* tp_weaklistoffset */
4352 0, /* tp_iter */
4353 0, /* tp_iternext */
4354 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004355 xmlparser_members, /* tp_members */
4356 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004357 0, /* tp_base */
4358 0, /* tp_dict */
4359 0, /* tp_descr_get */
4360 0, /* tp_descr_set */
4361 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004362 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004363 PyType_GenericAlloc, /* tp_alloc */
4364 xmlparser_new, /* tp_new */
4365 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004366};
4367
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004368/* ==================================================================== */
4369/* python module interface */
4370
4371static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004372 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004373 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004374 {NULL, NULL}
4375};
4376
Martin v. Löwis1a214512008-06-11 05:26:20 +00004377
Eli Bendersky532d03e2013-08-10 08:00:39 -07004378static struct PyModuleDef elementtreemodule = {
4379 PyModuleDef_HEAD_INIT,
4380 "_elementtree",
4381 NULL,
4382 sizeof(elementtreestate),
4383 _functions,
4384 NULL,
4385 elementtree_traverse,
4386 elementtree_clear,
4387 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004388};
4389
Neal Norwitzf6657e62006-12-28 04:47:50 +00004390PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004391PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004392{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004393 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004394 elementtreestate *st;
4395
4396 m = PyState_FindModule(&elementtreemodule);
4397 if (m) {
4398 Py_INCREF(m);
4399 return m;
4400 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004401
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004402 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004403 if (PyType_Ready(&ElementIter_Type) < 0)
4404 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004405 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004406 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004407 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004408 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004409 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004410 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004411
Eli Bendersky532d03e2013-08-10 08:00:39 -07004412 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004413 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004414 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004415 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004416
Eli Bendersky828efde2012-04-05 05:40:58 +03004417 if (!(temp = PyImport_ImportModule("copy")))
4418 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004419 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004420 Py_XDECREF(temp);
4421
Victor Stinnerb136f112017-07-10 22:28:02 +02004422 if (st->deepcopy_obj == NULL) {
4423 return NULL;
4424 }
4425
4426 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004427 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004428 return NULL;
4429
Eli Bendersky20d41742012-06-01 09:48:37 +03004430 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004431 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4432 if (expat_capi) {
4433 /* check that it's usable */
4434 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004435 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004436 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4437 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004438 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004439 PyErr_SetString(PyExc_ImportError,
4440 "pyexpat version is incompatible");
4441 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004442 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004443 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004444 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004445 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004446
Eli Bendersky532d03e2013-08-10 08:00:39 -07004447 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004448 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004449 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004450 Py_INCREF(st->parseerror_obj);
4451 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004452
Eli Bendersky092af1f2012-03-04 07:14:03 +02004453 Py_INCREF((PyObject *)&Element_Type);
4454 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4455
Eli Bendersky58d548d2012-05-29 15:45:16 +03004456 Py_INCREF((PyObject *)&TreeBuilder_Type);
4457 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4458
Eli Bendersky52467b12012-06-01 07:13:08 +03004459 Py_INCREF((PyObject *)&XMLParser_Type);
4460 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004461
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004462 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004463}