blob: b69e3a45fe308f0cc1bd108ac9b7bfbe1ff60307 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200672 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200683 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200953 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954 PyObject *instancedict = NULL, *children;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 /* Construct the state object. */
967 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
968 if (noattrib)
969 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975 else
976 instancedict = Py_BuildValue("{sOsOsOsOsO}",
977 PICKLED_TAG, self->tag,
978 PICKLED_CHILDREN, children,
979 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700980 PICKLED_TEXT, JOIN_OBJ(self->text),
981 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800982 if (instancedict) {
983 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 else {
987 for (i = 0; i < PyList_GET_SIZE(children); i++)
988 Py_DECREF(PyList_GET_ITEM(children, i));
989 Py_DECREF(children);
990
991 return NULL;
992 }
993}
994
995static PyObject *
996element_setstate_from_attributes(ElementObject *self,
997 PyObject *tag,
998 PyObject *attrib,
999 PyObject *text,
1000 PyObject *tail,
1001 PyObject *children)
1002{
1003 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
1006 if (!tag) {
1007 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1008 return NULL;
1009 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001011 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
Oren Milman39ecb9c2017-10-10 23:26:24 +03001014 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1015 Py_INCREF(JOIN_OBJ(text));
1016 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
Oren Milman39ecb9c2017-10-10 23:26:24 +03001018 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1019 Py_INCREF(JOIN_OBJ(tail));
1020 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021
1022 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001024 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001025 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026
1027 /* Compute 'nchildren'. */
1028 if (children) {
1029 if (!PyList_Check(children)) {
1030 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1031 return NULL;
1032 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 nchildren = PyList_GET_SIZE(children);
1034
1035 /* (Re-)allocate 'extra'.
1036 Avoid DECREFs calling into this code again (cycles, etc.)
1037 */
1038 oldextra = self->extra;
1039 self->extra = NULL;
1040 if (element_resize(self, nchildren)) {
1041 assert(!self->extra || !self->extra->length);
1042 clear_extra(self);
1043 self->extra = oldextra;
1044 return NULL;
1045 }
1046 assert(self->extra);
1047 assert(self->extra->allocated >= nchildren);
1048 if (oldextra) {
1049 assert(self->extra->attrib == Py_None);
1050 self->extra->attrib = oldextra->attrib;
1051 oldextra->attrib = Py_None;
1052 }
1053
1054 /* Copy children */
1055 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001056 PyObject *child = PyList_GET_ITEM(children, i);
1057 if (!Element_Check(child)) {
1058 raise_type_error(child);
1059 self->extra->length = i;
1060 dealloc_extra(oldextra);
1061 return NULL;
1062 }
1063 Py_INCREF(child);
1064 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 }
1066
1067 assert(!self->extra->length);
1068 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 }
1070 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 if (element_resize(self, 0)) {
1072 return NULL;
1073 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
1075
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076 /* Stash attrib. */
1077 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001079 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001081 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082
1083 Py_RETURN_NONE;
1084}
1085
1086/* __setstate__ for Element instance from the Python implementation.
1087 * 'state' should be the instance dict.
1088 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090static PyObject *
1091element_setstate_from_Python(ElementObject *self, PyObject *state)
1092{
1093 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1094 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1095 PyObject *args;
1096 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 tag = attrib = text = tail = children = NULL;
1100 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001101 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103
1104 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1105 &attrib, &text, &tail, &children))
1106 retval = element_setstate_from_attributes(self, tag, attrib, text,
1107 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001109 retval = NULL;
1110
1111 Py_DECREF(args);
1112 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113}
1114
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115/*[clinic input]
1116_elementtree.Element.__setstate__
1117
1118 state: object
1119 /
1120
1121[clinic start generated code]*/
1122
Eli Bendersky698bdb22013-01-10 06:01:06 -08001123static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001124_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1125/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001126{
1127 if (!PyDict_CheckExact(state)) {
1128 PyErr_Format(PyExc_TypeError,
1129 "Don't know how to unpickle \"%.200R\" as an Element",
1130 state);
1131 return NULL;
1132 }
1133 else
1134 return element_setstate_from_Python(self, state);
1135}
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137LOCAL(int)
1138checkpath(PyObject* tag)
1139{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 Py_ssize_t i;
1141 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142
1143 /* check if a tag contains an xpath character */
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145#define PATHCHAR(ch) \
1146 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1150 void *data = PyUnicode_DATA(tag);
1151 unsigned int kind = PyUnicode_KIND(tag);
1152 for (i = 0; i < len; i++) {
1153 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1154 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001155 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001157 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159 return 1;
1160 }
1161 return 0;
1162 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001163 if (PyBytes_Check(tag)) {
1164 char *p = PyBytes_AS_STRING(tag);
1165 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 if (p[i] == '{')
1167 check = 0;
1168 else if (p[i] == '}')
1169 check = 1;
1170 else if (check && PATHCHAR(p[i]))
1171 return 1;
1172 }
1173 return 0;
1174 }
1175
1176 return 1; /* unknown type; might be path expression */
1177}
1178
Serhiy Storchakacb985562015-05-04 15:32:48 +03001179/*[clinic input]
1180_elementtree.Element.extend
1181
1182 elements: object
1183 /
1184
1185[clinic start generated code]*/
1186
1187static PyObject *
1188_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1189/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001190{
1191 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001192 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193
Serhiy Storchakacb985562015-05-04 15:32:48 +03001194 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001195 if (!seq) {
1196 PyErr_Format(
1197 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 );
1200 return NULL;
1201 }
1202
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001203 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 if (element_add_subelement(self, element) < 0) {
1207 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001209 return NULL;
1210 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001211 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212 }
1213
1214 Py_DECREF(seq);
1215
1216 Py_RETURN_NONE;
1217}
1218
Serhiy Storchakacb985562015-05-04 15:32:48 +03001219/*[clinic input]
1220_elementtree.Element.find
1221
1222 path: object
1223 namespaces: object = None
1224
1225[clinic start generated code]*/
1226
1227static PyObject *
1228_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1229 PyObject *namespaces)
1230/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001232 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001233 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001234
Serhiy Storchakacb985562015-05-04 15:32:48 +03001235 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001236 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001237 return _PyObject_CallMethodIdObjArgs(
1238 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001240 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241
1242 if (!self->extra)
1243 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001244
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245 for (i = 0; i < self->extra->length; i++) {
1246 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001248 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001250 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001253 Py_DECREF(item);
1254 if (rc < 0)
1255 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256 }
1257
1258 Py_RETURN_NONE;
1259}
1260
Serhiy Storchakacb985562015-05-04 15:32:48 +03001261/*[clinic input]
1262_elementtree.Element.findtext
1263
1264 path: object
1265 default: object = None
1266 namespaces: object = None
1267
1268[clinic start generated code]*/
1269
1270static PyObject *
1271_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1272 PyObject *default_value,
1273 PyObject *namespaces)
1274/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001276 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001278 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279
Serhiy Storchakacb985562015-05-04 15:32:48 +03001280 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001281 return _PyObject_CallMethodIdObjArgs(
1282 st->elementpath_obj, &PyId_findtext,
1283 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284 );
1285
1286 if (!self->extra) {
1287 Py_INCREF(default_value);
1288 return default_value;
1289 }
1290
1291 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001292 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001294 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001298 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001299 if (text == Py_None) {
1300 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001301 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001303 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001304 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 return text;
1306 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001307 Py_DECREF(item);
1308 if (rc < 0)
1309 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001310 }
1311
1312 Py_INCREF(default_value);
1313 return default_value;
1314}
1315
Serhiy Storchakacb985562015-05-04 15:32:48 +03001316/*[clinic input]
1317_elementtree.Element.findall
1318
1319 path: object
1320 namespaces: object = None
1321
1322[clinic start generated code]*/
1323
1324static PyObject *
1325_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1326 PyObject *namespaces)
1327/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001328{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001329 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001330 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001331 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001332
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001333 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001334 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001335 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001336 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001337 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001338 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001339
1340 out = PyList_New(0);
1341 if (!out)
1342 return NULL;
1343
1344 if (!self->extra)
1345 return out;
1346
1347 for (i = 0; i < self->extra->length; i++) {
1348 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001349 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001350 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001351 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001352 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001353 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1354 Py_DECREF(item);
1355 Py_DECREF(out);
1356 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001357 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001358 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001359 }
1360
1361 return out;
1362}
1363
Serhiy Storchakacb985562015-05-04 15:32:48 +03001364/*[clinic input]
1365_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001366
Serhiy Storchakacb985562015-05-04 15:32:48 +03001367 path: object
1368 namespaces: object = None
1369
1370[clinic start generated code]*/
1371
1372static PyObject *
1373_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1374 PyObject *namespaces)
1375/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1376{
1377 PyObject* tag = path;
1378 _Py_IDENTIFIER(iterfind);
1379 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001380
Victor Stinnerf5616342016-12-09 15:26:00 +01001381 return _PyObject_CallMethodIdObjArgs(
1382 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001383}
1384
Serhiy Storchakacb985562015-05-04 15:32:48 +03001385/*[clinic input]
1386_elementtree.Element.get
1387
1388 key: object
1389 default: object = None
1390
1391[clinic start generated code]*/
1392
1393static PyObject *
1394_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1395 PyObject *default_value)
1396/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397{
1398 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399
1400 if (!self->extra || self->extra->attrib == Py_None)
1401 value = default_value;
1402 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001403 value = PyDict_GetItemWithError(self->extra->attrib, key);
1404 if (!value) {
1405 if (PyErr_Occurred()) {
1406 return NULL;
1407 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001409 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410 }
1411
1412 Py_INCREF(value);
1413 return value;
1414}
1415
Serhiy Storchakacb985562015-05-04 15:32:48 +03001416/*[clinic input]
1417_elementtree.Element.getchildren
1418
1419[clinic start generated code]*/
1420
1421static PyObject *
1422_elementtree_Element_getchildren_impl(ElementObject *self)
1423/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001425 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001426 PyObject* list;
1427
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001428 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1429 "This method will be removed in future versions. "
1430 "Use 'list(elem)' or iteration over elem instead.",
1431 1) < 0) {
1432 return NULL;
1433 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001434
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435 if (!self->extra)
1436 return PyList_New(0);
1437
1438 list = PyList_New(self->extra->length);
1439 if (!list)
1440 return NULL;
1441
1442 for (i = 0; i < self->extra->length; i++) {
1443 PyObject* item = self->extra->children[i];
1444 Py_INCREF(item);
1445 PyList_SET_ITEM(list, i, item);
1446 }
1447
1448 return list;
1449}
1450
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001451
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452static PyObject *
1453create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1454
1455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456/*[clinic input]
1457_elementtree.Element.iter
1458
1459 tag: object = None
1460
1461[clinic start generated code]*/
1462
Eli Bendersky64d11e62012-06-15 07:42:50 +03001463static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001464_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1465/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001466{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001467 if (PyUnicode_Check(tag)) {
1468 if (PyUnicode_READY(tag) < 0)
1469 return NULL;
1470 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1471 tag = Py_None;
1472 }
1473 else if (PyBytes_Check(tag)) {
1474 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1475 tag = Py_None;
1476 }
1477
Eli Bendersky64d11e62012-06-15 07:42:50 +03001478 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001479}
1480
1481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001483_elementtree.Element.getiterator
1484
1485 tag: object = None
1486
1487[clinic start generated code]*/
1488
1489static PyObject *
1490_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1491/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1492{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001493 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001494 "This method will be removed in future versions. "
1495 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1496 1) < 0) {
1497 return NULL;
1498 }
1499 return _elementtree_Element_iter_impl(self, tag);
1500}
1501
1502
1503/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001504_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001505
Serhiy Storchakacb985562015-05-04 15:32:48 +03001506[clinic start generated code]*/
1507
1508static PyObject *
1509_elementtree_Element_itertext_impl(ElementObject *self)
1510/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1511{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001512 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513}
1514
Eli Bendersky64d11e62012-06-15 07:42:50 +03001515
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001517element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001519 ElementObject* self = (ElementObject*) self_;
1520
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521 if (!self->extra || index < 0 || index >= self->extra->length) {
1522 PyErr_SetString(
1523 PyExc_IndexError,
1524 "child index out of range"
1525 );
1526 return NULL;
1527 }
1528
1529 Py_INCREF(self->extra->children[index]);
1530 return self->extra->children[index];
1531}
1532
Serhiy Storchakacb985562015-05-04 15:32:48 +03001533/*[clinic input]
1534_elementtree.Element.insert
1535
1536 index: Py_ssize_t
1537 subelement: object(subclass_of='&Element_Type')
1538 /
1539
1540[clinic start generated code]*/
1541
1542static PyObject *
1543_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1544 PyObject *subelement)
1545/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001547 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548
Victor Stinner5f0af232013-07-11 23:01:36 +02001549 if (!self->extra) {
1550 if (create_extra(self, NULL) < 0)
1551 return NULL;
1552 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001553
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001554 if (index < 0) {
1555 index += self->extra->length;
1556 if (index < 0)
1557 index = 0;
1558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559 if (index > self->extra->length)
1560 index = self->extra->length;
1561
1562 if (element_resize(self, 1) < 0)
1563 return NULL;
1564
1565 for (i = self->extra->length; i > index; i--)
1566 self->extra->children[i] = self->extra->children[i-1];
1567
Serhiy Storchakacb985562015-05-04 15:32:48 +03001568 Py_INCREF(subelement);
1569 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570
1571 self->extra->length++;
1572
1573 Py_RETURN_NONE;
1574}
1575
Serhiy Storchakacb985562015-05-04 15:32:48 +03001576/*[clinic input]
1577_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578
Serhiy Storchakacb985562015-05-04 15:32:48 +03001579[clinic start generated code]*/
1580
1581static PyObject *
1582_elementtree_Element_items_impl(ElementObject *self)
1583/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1584{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585 if (!self->extra || self->extra->attrib == Py_None)
1586 return PyList_New(0);
1587
1588 return PyDict_Items(self->extra->attrib);
1589}
1590
Serhiy Storchakacb985562015-05-04 15:32:48 +03001591/*[clinic input]
1592_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593
Serhiy Storchakacb985562015-05-04 15:32:48 +03001594[clinic start generated code]*/
1595
1596static PyObject *
1597_elementtree_Element_keys_impl(ElementObject *self)
1598/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1599{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 if (!self->extra || self->extra->attrib == Py_None)
1601 return PyList_New(0);
1602
1603 return PyDict_Keys(self->extra->attrib);
1604}
1605
Martin v. Löwis18e16552006-02-15 17:27:45 +00001606static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607element_length(ElementObject* self)
1608{
1609 if (!self->extra)
1610 return 0;
1611
1612 return self->extra->length;
1613}
1614
Serhiy Storchakacb985562015-05-04 15:32:48 +03001615/*[clinic input]
1616_elementtree.Element.makeelement
1617
1618 tag: object
1619 attrib: object
1620 /
1621
1622[clinic start generated code]*/
1623
1624static PyObject *
1625_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1626 PyObject *attrib)
1627/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628{
1629 PyObject* elem;
1630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 attrib = PyDict_Copy(attrib);
1632 if (!attrib)
1633 return NULL;
1634
Eli Bendersky092af1f2012-03-04 07:14:03 +02001635 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
1637 Py_DECREF(attrib);
1638
1639 return elem;
1640}
1641
Serhiy Storchakacb985562015-05-04 15:32:48 +03001642/*[clinic input]
1643_elementtree.Element.remove
1644
1645 subelement: object(subclass_of='&Element_Type')
1646 /
1647
1648[clinic start generated code]*/
1649
1650static PyObject *
1651_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1652/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001654 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001655 int rc;
1656 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 if (!self->extra) {
1659 /* element has no children, so raise exception */
1660 PyErr_SetString(
1661 PyExc_ValueError,
1662 "list.remove(x): x not in list"
1663 );
1664 return NULL;
1665 }
1666
1667 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001668 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001670 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001671 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001673 if (rc < 0)
1674 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001675 }
1676
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001677 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001678 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001679 PyErr_SetString(
1680 PyExc_ValueError,
1681 "list.remove(x): x not in list"
1682 );
1683 return NULL;
1684 }
1685
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001686 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001687
1688 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001689 for (; i < self->extra->length; i++)
1690 self->extra->children[i] = self->extra->children[i+1];
1691
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001692 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 Py_RETURN_NONE;
1694}
1695
1696static PyObject*
1697element_repr(ElementObject* self)
1698{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001699 int status;
1700
1701 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001702 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001703
1704 status = Py_ReprEnter((PyObject *)self);
1705 if (status == 0) {
1706 PyObject *res;
1707 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1708 Py_ReprLeave((PyObject *)self);
1709 return res;
1710 }
1711 if (status > 0)
1712 PyErr_Format(PyExc_RuntimeError,
1713 "reentrant call inside %s.__repr__",
1714 Py_TYPE(self)->tp_name);
1715 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001716}
1717
Serhiy Storchakacb985562015-05-04 15:32:48 +03001718/*[clinic input]
1719_elementtree.Element.set
1720
1721 key: object
1722 value: object
1723 /
1724
1725[clinic start generated code]*/
1726
1727static PyObject *
1728_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1729 PyObject *value)
1730/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731{
1732 PyObject* attrib;
1733
Victor Stinner5f0af232013-07-11 23:01:36 +02001734 if (!self->extra) {
1735 if (create_extra(self, NULL) < 0)
1736 return NULL;
1737 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738
1739 attrib = element_get_attrib(self);
1740 if (!attrib)
1741 return NULL;
1742
1743 if (PyDict_SetItem(attrib, key, value) < 0)
1744 return NULL;
1745
1746 Py_RETURN_NONE;
1747}
1748
1749static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001750element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001752 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001753 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 PyObject* old;
1755
1756 if (!self->extra || index < 0 || index >= self->extra->length) {
1757 PyErr_SetString(
1758 PyExc_IndexError,
1759 "child assignment index out of range");
1760 return -1;
1761 }
1762
1763 old = self->extra->children[index];
1764
1765 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001766 if (!Element_Check(item)) {
1767 raise_type_error(item);
1768 return -1;
1769 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001770 Py_INCREF(item);
1771 self->extra->children[index] = item;
1772 } else {
1773 self->extra->length--;
1774 for (i = index; i < self->extra->length; i++)
1775 self->extra->children[i] = self->extra->children[i+1];
1776 }
1777
1778 Py_DECREF(old);
1779
1780 return 0;
1781}
1782
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001783static PyObject*
1784element_subscr(PyObject* self_, PyObject* item)
1785{
1786 ElementObject* self = (ElementObject*) self_;
1787
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001788 if (PyIndex_Check(item)) {
1789 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001790
1791 if (i == -1 && PyErr_Occurred()) {
1792 return NULL;
1793 }
1794 if (i < 0 && self->extra)
1795 i += self->extra->length;
1796 return element_getitem(self_, i);
1797 }
1798 else if (PySlice_Check(item)) {
1799 Py_ssize_t start, stop, step, slicelen, cur, i;
1800 PyObject* list;
1801
1802 if (!self->extra)
1803 return PyList_New(0);
1804
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001805 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001806 return NULL;
1807 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001808 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1809 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001810
1811 if (slicelen <= 0)
1812 return PyList_New(0);
1813 else {
1814 list = PyList_New(slicelen);
1815 if (!list)
1816 return NULL;
1817
1818 for (cur = start, i = 0; i < slicelen;
1819 cur += step, i++) {
1820 PyObject* item = self->extra->children[cur];
1821 Py_INCREF(item);
1822 PyList_SET_ITEM(list, i, item);
1823 }
1824
1825 return list;
1826 }
1827 }
1828 else {
1829 PyErr_SetString(PyExc_TypeError,
1830 "element indices must be integers");
1831 return NULL;
1832 }
1833}
1834
1835static int
1836element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1837{
1838 ElementObject* self = (ElementObject*) self_;
1839
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001840 if (PyIndex_Check(item)) {
1841 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001842
1843 if (i == -1 && PyErr_Occurred()) {
1844 return -1;
1845 }
1846 if (i < 0 && self->extra)
1847 i += self->extra->length;
1848 return element_setitem(self_, i, value);
1849 }
1850 else if (PySlice_Check(item)) {
1851 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1852
1853 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001854 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
Victor Stinner5f0af232013-07-11 23:01:36 +02001856 if (!self->extra) {
1857 if (create_extra(self, NULL) < 0)
1858 return -1;
1859 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001860
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001861 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001862 return -1;
1863 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001864 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1865 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866
Eli Bendersky865756a2012-03-09 13:38:15 +02001867 if (value == NULL) {
1868 /* Delete slice */
1869 size_t cur;
1870 Py_ssize_t i;
1871
1872 if (slicelen <= 0)
1873 return 0;
1874
1875 /* Since we're deleting, the direction of the range doesn't matter,
1876 * so for simplicity make it always ascending.
1877 */
1878 if (step < 0) {
1879 stop = start + 1;
1880 start = stop + step * (slicelen - 1) - 1;
1881 step = -step;
1882 }
1883
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001884 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001885
1886 /* recycle is a list that will contain all the children
1887 * scheduled for removal.
1888 */
1889 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001890 return -1;
1891 }
1892
1893 /* This loop walks over all the children that have to be deleted,
1894 * with cur pointing at them. num_moved is the amount of children
1895 * until the next deleted child that have to be "shifted down" to
1896 * occupy the deleted's places.
1897 * Note that in the ith iteration, shifting is done i+i places down
1898 * because i children were already removed.
1899 */
1900 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1901 /* Compute how many children have to be moved, clipping at the
1902 * list end.
1903 */
1904 Py_ssize_t num_moved = step - 1;
1905 if (cur + step >= (size_t)self->extra->length) {
1906 num_moved = self->extra->length - cur - 1;
1907 }
1908
1909 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1910
1911 memmove(
1912 self->extra->children + cur - i,
1913 self->extra->children + cur + 1,
1914 num_moved * sizeof(PyObject *));
1915 }
1916
1917 /* Leftover "tail" after the last removed child */
1918 cur = start + (size_t)slicelen * step;
1919 if (cur < (size_t)self->extra->length) {
1920 memmove(
1921 self->extra->children + cur - slicelen,
1922 self->extra->children + cur,
1923 (self->extra->length - cur) * sizeof(PyObject *));
1924 }
1925
1926 self->extra->length -= slicelen;
1927
1928 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001929 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001930 return 0;
1931 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001932
1933 /* A new slice is actually being assigned */
1934 seq = PySequence_Fast(value, "");
1935 if (!seq) {
1936 PyErr_Format(
1937 PyExc_TypeError,
1938 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1939 );
1940 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001941 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001942 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001943
1944 if (step != 1 && newlen != slicelen)
1945 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001946 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001948 "attempt to assign sequence of size %zd "
1949 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001950 newlen, slicelen
1951 );
1952 return -1;
1953 }
1954
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955 /* Resize before creating the recycle bin, to prevent refleaks. */
1956 if (newlen > slicelen) {
1957 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001958 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001959 return -1;
1960 }
1961 }
1962
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001963 for (i = 0; i < newlen; i++) {
1964 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1965 if (!Element_Check(element)) {
1966 raise_type_error(element);
1967 Py_DECREF(seq);
1968 return -1;
1969 }
1970 }
1971
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001972 if (slicelen > 0) {
1973 /* to avoid recursive calls to this method (via decref), move
1974 old items to the recycle bin here, and get rid of them when
1975 we're done modifying the element */
1976 recycle = PyList_New(slicelen);
1977 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001978 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001979 return -1;
1980 }
1981 for (cur = start, i = 0; i < slicelen;
1982 cur += step, i++)
1983 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1984 }
1985
1986 if (newlen < slicelen) {
1987 /* delete slice */
1988 for (i = stop; i < self->extra->length; i++)
1989 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1990 } else if (newlen > slicelen) {
1991 /* insert slice */
1992 for (i = self->extra->length-1; i >= stop; i--)
1993 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1994 }
1995
1996 /* replace the slice */
1997 for (cur = start, i = 0; i < newlen;
1998 cur += step, i++) {
1999 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2000 Py_INCREF(element);
2001 self->extra->children[cur] = element;
2002 }
2003
2004 self->extra->length += newlen - slicelen;
2005
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002006 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002007
2008 /* discard the recycle bin, and everything in it */
2009 Py_XDECREF(recycle);
2010
2011 return 0;
2012 }
2013 else {
2014 PyErr_SetString(PyExc_TypeError,
2015 "element indices must be integers");
2016 return -1;
2017 }
2018}
2019
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002020static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002021element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002022{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002023 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002024 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002025 return res;
2026}
2027
Serhiy Storchakadde08152015-11-25 15:28:13 +02002028static PyObject*
2029element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002030{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002031 PyObject *res = element_get_text(self);
2032 Py_XINCREF(res);
2033 return res;
2034}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002035
Serhiy Storchakadde08152015-11-25 15:28:13 +02002036static PyObject*
2037element_tail_getter(ElementObject *self, void *closure)
2038{
2039 PyObject *res = element_get_tail(self);
2040 Py_XINCREF(res);
2041 return res;
2042}
2043
2044static PyObject*
2045element_attrib_getter(ElementObject *self, void *closure)
2046{
2047 PyObject *res;
2048 if (!self->extra) {
2049 if (create_extra(self, NULL) < 0)
2050 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002051 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002052 res = element_get_attrib(self);
2053 Py_XINCREF(res);
2054 return res;
2055}
Victor Stinner4d463432013-07-11 23:05:03 +02002056
Serhiy Storchakadde08152015-11-25 15:28:13 +02002057/* macro for setter validation */
2058#define _VALIDATE_ATTR_VALUE(V) \
2059 if ((V) == NULL) { \
2060 PyErr_SetString( \
2061 PyExc_AttributeError, \
2062 "can't delete element attribute"); \
2063 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002064 }
2065
Serhiy Storchakadde08152015-11-25 15:28:13 +02002066static int
2067element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2068{
2069 _VALIDATE_ATTR_VALUE(value);
2070 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002071 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002072 return 0;
2073}
2074
2075static int
2076element_text_setter(ElementObject *self, PyObject *value, void *closure)
2077{
2078 _VALIDATE_ATTR_VALUE(value);
2079 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002080 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002081 return 0;
2082}
2083
2084static int
2085element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2086{
2087 _VALIDATE_ATTR_VALUE(value);
2088 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002089 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002090 return 0;
2091}
2092
2093static int
2094element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2095{
2096 _VALIDATE_ATTR_VALUE(value);
2097 if (!self->extra) {
2098 if (create_extra(self, NULL) < 0)
2099 return -1;
2100 }
2101 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002102 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002103 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002104}
2105
2106static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002107 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002108 0, /* sq_concat */
2109 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002110 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002111 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002112 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002113 0,
2114};
2115
Eli Bendersky64d11e62012-06-15 07:42:50 +03002116/******************************* Element iterator ****************************/
2117
2118/* ElementIterObject represents the iteration state over an XML element in
2119 * pre-order traversal. To keep track of which sub-element should be returned
2120 * next, a stack of parents is maintained. This is a standard stack-based
2121 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122 * The stack is managed using a continuous array.
2123 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 * the current one is exhausted, and the next child to examine in that parent.
2125 */
2126typedef struct ParentLocator_t {
2127 ElementObject *parent;
2128 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129} ParentLocator;
2130
2131typedef struct {
2132 PyObject_HEAD
2133 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 Py_ssize_t parent_stack_used;
2135 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002136 ElementObject *root_element;
2137 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138 int gettext;
2139} ElementIterObject;
2140
2141
2142static void
2143elementiter_dealloc(ElementIterObject *it)
2144{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002145 Py_ssize_t i = it->parent_stack_used;
2146 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002147 /* bpo-31095: UnTrack is needed before calling any callbacks */
2148 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002149 while (i--)
2150 Py_XDECREF(it->parent_stack[i].parent);
2151 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152
2153 Py_XDECREF(it->sought_tag);
2154 Py_XDECREF(it->root_element);
2155
Eli Bendersky64d11e62012-06-15 07:42:50 +03002156 PyObject_GC_Del(it);
2157}
2158
2159static int
2160elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2161{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002162 Py_ssize_t i = it->parent_stack_used;
2163 while (i--)
2164 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002165
2166 Py_VISIT(it->root_element);
2167 Py_VISIT(it->sought_tag);
2168 return 0;
2169}
2170
2171/* Helper function for elementiter_next. Add a new parent to the parent stack.
2172 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002173static int
2174parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002175{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 ParentLocator *item;
2177
2178 if (it->parent_stack_used >= it->parent_stack_size) {
2179 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2180 ParentLocator *parent_stack = it->parent_stack;
2181 PyMem_Resize(parent_stack, ParentLocator, new_size);
2182 if (parent_stack == NULL)
2183 return -1;
2184 it->parent_stack = parent_stack;
2185 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002187 item = it->parent_stack + it->parent_stack_used++;
2188 Py_INCREF(parent);
2189 item->parent = parent;
2190 item->child_index = 0;
2191 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002192}
2193
2194static PyObject *
2195elementiter_next(ElementIterObject *it)
2196{
2197 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002198 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002199 * A short note on gettext: this function serves both the iter() and
2200 * itertext() methods to avoid code duplication. However, there are a few
2201 * small differences in the way these iterations work. Namely:
2202 * - itertext() only yields text from nodes that have it, and continues
2203 * iterating when a node doesn't have text (so it doesn't return any
2204 * node like iter())
2205 * - itertext() also has to handle tail, after finishing with all the
2206 * children of a node.
2207 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002208 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002209 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002210 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002211
2212 while (1) {
2213 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002214 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002215 * iterator is exhausted.
2216 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002217 if (!it->parent_stack_used) {
2218 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002219 PyErr_SetNone(PyExc_StopIteration);
2220 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002221 }
2222
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002223 elem = it->root_element; /* steals a reference */
2224 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002225 }
2226 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002227 /* See if there are children left to traverse in the current parent. If
2228 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002229 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002230 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2231 Py_ssize_t child_index = item->child_index;
2232 ElementObjectExtra *extra;
2233 elem = item->parent;
2234 extra = elem->extra;
2235 if (!extra || child_index >= extra->length) {
2236 it->parent_stack_used--;
2237 /* Note that extra condition on it->parent_stack_used here;
2238 * this is because itertext() is supposed to only return *inner*
2239 * text, not text following the element it began iteration with.
2240 */
2241 if (it->gettext && it->parent_stack_used) {
2242 text = element_get_tail(elem);
2243 goto gettext;
2244 }
2245 Py_DECREF(elem);
2246 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002247 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002248
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002249 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002250 elem = (ElementObject *)extra->children[child_index];
2251 item->child_index++;
2252 Py_INCREF(elem);
2253 }
2254
2255 if (parent_stack_push_new(it, elem) < 0) {
2256 Py_DECREF(elem);
2257 PyErr_NoMemory();
2258 return NULL;
2259 }
2260 if (it->gettext) {
2261 text = element_get_text(elem);
2262 goto gettext;
2263 }
2264
2265 if (it->sought_tag == Py_None)
2266 return (PyObject *)elem;
2267
2268 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2269 if (rc > 0)
2270 return (PyObject *)elem;
2271
2272 Py_DECREF(elem);
2273 if (rc < 0)
2274 return NULL;
2275 continue;
2276
2277gettext:
2278 if (!text) {
2279 Py_DECREF(elem);
2280 return NULL;
2281 }
2282 if (text == Py_None) {
2283 Py_DECREF(elem);
2284 }
2285 else {
2286 Py_INCREF(text);
2287 Py_DECREF(elem);
2288 rc = PyObject_IsTrue(text);
2289 if (rc > 0)
2290 return text;
2291 Py_DECREF(text);
2292 if (rc < 0)
2293 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 }
2295 }
2296
2297 return NULL;
2298}
2299
2300
2301static PyTypeObject ElementIter_Type = {
2302 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002303 /* Using the module's name since the pure-Python implementation does not
2304 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002305 "_elementtree._element_iterator", /* tp_name */
2306 sizeof(ElementIterObject), /* tp_basicsize */
2307 0, /* tp_itemsize */
2308 /* methods */
2309 (destructor)elementiter_dealloc, /* tp_dealloc */
2310 0, /* tp_print */
2311 0, /* tp_getattr */
2312 0, /* tp_setattr */
2313 0, /* tp_reserved */
2314 0, /* tp_repr */
2315 0, /* tp_as_number */
2316 0, /* tp_as_sequence */
2317 0, /* tp_as_mapping */
2318 0, /* tp_hash */
2319 0, /* tp_call */
2320 0, /* tp_str */
2321 0, /* tp_getattro */
2322 0, /* tp_setattro */
2323 0, /* tp_as_buffer */
2324 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2325 0, /* tp_doc */
2326 (traverseproc)elementiter_traverse, /* tp_traverse */
2327 0, /* tp_clear */
2328 0, /* tp_richcompare */
2329 0, /* tp_weaklistoffset */
2330 PyObject_SelfIter, /* tp_iter */
2331 (iternextfunc)elementiter_next, /* tp_iternext */
2332 0, /* tp_methods */
2333 0, /* tp_members */
2334 0, /* tp_getset */
2335 0, /* tp_base */
2336 0, /* tp_dict */
2337 0, /* tp_descr_get */
2338 0, /* tp_descr_set */
2339 0, /* tp_dictoffset */
2340 0, /* tp_init */
2341 0, /* tp_alloc */
2342 0, /* tp_new */
2343};
2344
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002345#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002346
2347static PyObject *
2348create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2349{
2350 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002351
2352 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2353 if (!it)
2354 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002355
Victor Stinner4d463432013-07-11 23:05:03 +02002356 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002357 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002358 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002359 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002360 it->root_element = self;
2361
Eli Bendersky64d11e62012-06-15 07:42:50 +03002362 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002363
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002364 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002365 if (it->parent_stack == NULL) {
2366 Py_DECREF(it);
2367 PyErr_NoMemory();
2368 return NULL;
2369 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002370 it->parent_stack_used = 0;
2371 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002372
Eli Bendersky64d11e62012-06-15 07:42:50 +03002373 return (PyObject *)it;
2374}
2375
2376
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377/* ==================================================================== */
2378/* the tree builder type */
2379
2380typedef struct {
2381 PyObject_HEAD
2382
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
Antoine Pitrouee329312012-10-04 19:53:29 +02002385 PyObject *this; /* current node */
2386 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 PyObject *stack; /* element stack */
2391 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392
Eli Bendersky48d358b2012-05-30 17:57:50 +03002393 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002394 PyObject *comment_factory;
2395 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002396
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002398 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002399 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2400 PyObject *end_event_obj;
2401 PyObject *start_ns_event_obj;
2402 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002403 PyObject *comment_event_obj;
2404 PyObject *pi_event_obj;
2405
2406 char insert_comments;
2407 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408} TreeBuilderObject;
2409
Christian Heimes90aa7642007-12-19 02:45:37 +00002410#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411
2412/* -------------------------------------------------------------------- */
2413/* constructor and destructor */
2414
Eli Bendersky58d548d2012-05-29 15:45:16 +03002415static PyObject *
2416treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002418 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2419 if (t != NULL) {
2420 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421
Eli Bendersky58d548d2012-05-29 15:45:16 +03002422 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002423 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002424 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002425 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426
Eli Bendersky58d548d2012-05-29 15:45:16 +03002427 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002428 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002429 t->comment_factory = NULL;
2430 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431 t->stack = PyList_New(20);
2432 if (!t->stack) {
2433 Py_DECREF(t->this);
2434 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002435 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002436 return NULL;
2437 }
2438 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002440 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002441 t->start_event_obj = t->end_event_obj = NULL;
2442 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002443 t->comment_event_obj = t->pi_event_obj = NULL;
2444 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002445 }
2446 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447}
2448
Serhiy Storchakacb985562015-05-04 15:32:48 +03002449/*[clinic input]
2450_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002451
Serhiy Storchakacb985562015-05-04 15:32:48 +03002452 element_factory: object = NULL
Stefan Behnel43851a22019-05-01 21:20:38 +02002453 *
2454 comment_factory: object = NULL
2455 pi_factory: object = NULL
2456 insert_comments: bool = False
2457 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002458
2459[clinic start generated code]*/
2460
2461static int
2462_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002463 PyObject *element_factory,
2464 PyObject *comment_factory,
2465 PyObject *pi_factory,
2466 int insert_comments, int insert_pis)
2467/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002468{
Stefan Behnel43851a22019-05-01 21:20:38 +02002469 if (element_factory && element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002470 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002471 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002472 } else {
2473 Py_CLEAR(self->element_factory);
2474 }
2475
2476 if (!comment_factory || comment_factory == Py_None) {
2477 elementtreestate *st = ET_STATE_GLOBAL;
2478 comment_factory = st->comment_factory;
2479 }
2480 if (comment_factory) {
2481 Py_INCREF(comment_factory);
2482 Py_XSETREF(self->comment_factory, comment_factory);
2483 self->insert_comments = insert_comments;
2484 } else {
2485 Py_CLEAR(self->comment_factory);
2486 self->insert_comments = 0;
2487 }
2488
2489 if (!pi_factory || pi_factory == Py_None) {
2490 elementtreestate *st = ET_STATE_GLOBAL;
2491 pi_factory = st->pi_factory;
2492 }
2493 if (pi_factory) {
2494 Py_INCREF(pi_factory);
2495 Py_XSETREF(self->pi_factory, pi_factory);
2496 self->insert_pis = insert_pis;
2497 } else {
2498 Py_CLEAR(self->pi_factory);
2499 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002500 }
2501
Eli Bendersky58d548d2012-05-29 15:45:16 +03002502 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002503}
2504
Eli Bendersky48d358b2012-05-30 17:57:50 +03002505static int
2506treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2507{
Stefan Behnel43851a22019-05-01 21:20:38 +02002508 Py_VISIT(self->pi_event_obj);
2509 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002510 Py_VISIT(self->end_ns_event_obj);
2511 Py_VISIT(self->start_ns_event_obj);
2512 Py_VISIT(self->end_event_obj);
2513 Py_VISIT(self->start_event_obj);
2514 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002515 Py_VISIT(self->root);
2516 Py_VISIT(self->this);
2517 Py_VISIT(self->last);
2518 Py_VISIT(self->data);
2519 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002520 Py_VISIT(self->pi_factory);
2521 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002522 Py_VISIT(self->element_factory);
2523 return 0;
2524}
2525
2526static int
2527treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528{
Stefan Behnel43851a22019-05-01 21:20:38 +02002529 Py_CLEAR(self->pi_event_obj);
2530 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002531 Py_CLEAR(self->end_ns_event_obj);
2532 Py_CLEAR(self->start_ns_event_obj);
2533 Py_CLEAR(self->end_event_obj);
2534 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002535 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002536 Py_CLEAR(self->stack);
2537 Py_CLEAR(self->data);
2538 Py_CLEAR(self->last);
2539 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002540 Py_CLEAR(self->pi_factory);
2541 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002542 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002543 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002544 return 0;
2545}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546
Eli Bendersky48d358b2012-05-30 17:57:50 +03002547static void
2548treebuilder_dealloc(TreeBuilderObject *self)
2549{
2550 PyObject_GC_UnTrack(self);
2551 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002552 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553}
2554
2555/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002556/* helpers for handling of arbitrary element-like objects */
2557
Stefan Behnel43851a22019-05-01 21:20:38 +02002558/*[clinic input]
2559_elementtree._set_factories
2560
2561 comment_factory: object
2562 pi_factory: object
2563 /
2564
2565Change the factories used to create comments and processing instructions.
2566
2567For internal use only.
2568[clinic start generated code]*/
2569
2570static PyObject *
2571_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2572 PyObject *pi_factory)
2573/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2574{
2575 elementtreestate *st = ET_STATE_GLOBAL;
2576 PyObject *old;
2577
2578 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2579 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2580 Py_TYPE(comment_factory)->tp_name);
2581 return NULL;
2582 }
2583 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2584 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2585 Py_TYPE(pi_factory)->tp_name);
2586 return NULL;
2587 }
2588
2589 old = PyTuple_Pack(2,
2590 st->comment_factory ? st->comment_factory : Py_None,
2591 st->pi_factory ? st->pi_factory : Py_None);
2592
2593 if (comment_factory == Py_None) {
2594 Py_CLEAR(st->comment_factory);
2595 } else {
2596 Py_INCREF(comment_factory);
2597 Py_XSETREF(st->comment_factory, comment_factory);
2598 }
2599 if (pi_factory == Py_None) {
2600 Py_CLEAR(st->pi_factory);
2601 } else {
2602 Py_INCREF(pi_factory);
2603 Py_XSETREF(st->pi_factory, pi_factory);
2604 }
2605
2606 return old;
2607}
2608
Antoine Pitrouee329312012-10-04 19:53:29 +02002609static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002610treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002611 PyObject **dest, _Py_Identifier *name)
2612{
2613 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002614 PyObject *tmp = JOIN_OBJ(*dest);
2615 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2616 *data = NULL;
2617 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002618 return 0;
2619 }
2620 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002621 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002622 int r;
2623 if (joined == NULL)
2624 return -1;
2625 r = _PyObject_SetAttrId(element, name, joined);
2626 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002627 if (r < 0)
2628 return -1;
2629 Py_CLEAR(*data);
2630 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002631 }
2632}
2633
Serhiy Storchaka576def02017-03-30 09:47:31 +03002634LOCAL(int)
2635treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002636{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002637 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002638
Serhiy Storchaka576def02017-03-30 09:47:31 +03002639 if (!self->data) {
2640 return 0;
2641 }
2642
2643 if (self->this == element) {
2644 _Py_IDENTIFIER(text);
2645 return treebuilder_set_element_text_or_tail(
2646 element, &self->data,
2647 &((ElementObject *) element)->text, &PyId_text);
2648 }
2649 else {
2650 _Py_IDENTIFIER(tail);
2651 return treebuilder_set_element_text_or_tail(
2652 element, &self->data,
2653 &((ElementObject *) element)->tail, &PyId_tail);
2654 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002655}
2656
2657static int
2658treebuilder_add_subelement(PyObject *element, PyObject *child)
2659{
2660 _Py_IDENTIFIER(append);
2661 if (Element_CheckExact(element)) {
2662 ElementObject *elem = (ElementObject *) element;
2663 return element_add_subelement(elem, child);
2664 }
2665 else {
2666 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002667 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002668 if (res == NULL)
2669 return -1;
2670 Py_DECREF(res);
2671 return 0;
2672 }
2673}
2674
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002675LOCAL(int)
2676treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2677 PyObject *node)
2678{
2679 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002680 PyObject *res;
2681 PyObject *event = PyTuple_Pack(2, action, node);
2682 if (event == NULL)
2683 return -1;
Stefan Behnel43851a22019-05-01 21:20:38 +02002684 res = _PyObject_FastCall(self->events_append, &event, 1);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002685 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002686 if (res == NULL)
2687 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002688 Py_DECREF(res);
2689 }
2690 return 0;
2691}
2692
Antoine Pitrouee329312012-10-04 19:53:29 +02002693/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694/* handlers */
2695
2696LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2698 PyObject* attrib)
2699{
2700 PyObject* node;
2701 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002702 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703
Serhiy Storchaka576def02017-03-30 09:47:31 +03002704 if (treebuilder_flush_data(self) < 0) {
2705 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706 }
2707
Stefan Behnel43851a22019-05-01 21:20:38 +02002708 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002709 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002710 } else if (attrib == Py_None) {
2711 attrib = PyDict_New();
2712 if (!attrib)
2713 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002714 node = PyObject_CallFunctionObjArgs(self->element_factory,
2715 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002716 Py_DECREF(attrib);
2717 }
2718 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002719 node = PyObject_CallFunctionObjArgs(self->element_factory,
2720 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002721 }
2722 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002724 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725
Antoine Pitrouee329312012-10-04 19:53:29 +02002726 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727
2728 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002729 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002730 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 } else {
2732 if (self->root) {
2733 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002734 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735 "multiple elements on top level"
2736 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002737 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 }
2739 Py_INCREF(node);
2740 self->root = node;
2741 }
2742
2743 if (self->index < PyList_GET_SIZE(self->stack)) {
2744 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002745 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 Py_INCREF(this);
2747 } else {
2748 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002749 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 }
2751 self->index++;
2752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002754 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002756 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002758 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2759 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
2761 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002762
2763 error:
2764 Py_DECREF(node);
2765 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766}
2767
2768LOCAL(PyObject*)
2769treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2770{
2771 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002772 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002773 /* ignore calls to data before the first call to start */
2774 Py_RETURN_NONE;
2775 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776 /* store the first item as is */
2777 Py_INCREF(data); self->data = data;
2778 } else {
2779 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002780 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2781 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002782 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 /* expat often generates single character data sections; handle
2784 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002785 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2786 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 } else if (PyList_CheckExact(self->data)) {
2790 if (PyList_Append(self->data, data) < 0)
2791 return NULL;
2792 } else {
2793 PyObject* list = PyList_New(2);
2794 if (!list)
2795 return NULL;
2796 PyList_SET_ITEM(list, 0, self->data);
2797 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2798 self->data = list;
2799 }
2800 }
2801
2802 Py_RETURN_NONE;
2803}
2804
2805LOCAL(PyObject*)
2806treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2807{
2808 PyObject* item;
2809
Serhiy Storchaka576def02017-03-30 09:47:31 +03002810 if (treebuilder_flush_data(self) < 0) {
2811 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812 }
2813
2814 if (self->index == 0) {
2815 PyErr_SetString(
2816 PyExc_IndexError,
2817 "pop from empty stack"
2818 );
2819 return NULL;
2820 }
2821
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002822 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002823 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002824 self->index--;
2825 self->this = PyList_GET_ITEM(self->stack, self->index);
2826 Py_INCREF(self->this);
2827 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002829 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2830 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831
2832 Py_INCREF(self->last);
2833 return (PyObject*) self->last;
2834}
2835
Stefan Behnel43851a22019-05-01 21:20:38 +02002836LOCAL(PyObject*)
2837treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2838{
2839 PyObject* comment = NULL;
2840 PyObject* this;
2841
2842 if (treebuilder_flush_data(self) < 0) {
2843 return NULL;
2844 }
2845
2846 if (self->comment_factory) {
2847 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2848 if (!comment)
2849 return NULL;
2850
2851 this = self->this;
2852 if (self->insert_comments && this != Py_None) {
2853 if (treebuilder_add_subelement(this, comment) < 0)
2854 goto error;
2855 }
2856 } else {
2857 Py_INCREF(text);
2858 comment = text;
2859 }
2860
2861 if (self->events_append && self->comment_event_obj) {
2862 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2863 goto error;
2864 }
2865
2866 return comment;
2867
2868 error:
2869 Py_DECREF(comment);
2870 return NULL;
2871}
2872
2873LOCAL(PyObject*)
2874treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2875{
2876 PyObject* pi = NULL;
2877 PyObject* this;
2878 PyObject* stack[2] = {target, text};
2879
2880 if (treebuilder_flush_data(self) < 0) {
2881 return NULL;
2882 }
2883
2884 if (self->pi_factory) {
2885 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2886 if (!pi) {
2887 return NULL;
2888 }
2889
2890 this = self->this;
2891 if (self->insert_pis && this != Py_None) {
2892 if (treebuilder_add_subelement(this, pi) < 0)
2893 goto error;
2894 }
2895 } else {
2896 pi = PyTuple_Pack(2, target, text);
2897 if (!pi) {
2898 return NULL;
2899 }
2900 }
2901
2902 if (self->events_append && self->pi_event_obj) {
2903 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2904 goto error;
2905 }
2906
2907 return pi;
2908
2909 error:
2910 Py_DECREF(pi);
2911 return NULL;
2912}
2913
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002914LOCAL(PyObject*)
2915treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2916{
2917 PyObject* parcel;
2918
2919 if (self->events_append && self->start_ns_event_obj) {
2920 parcel = PyTuple_Pack(2, prefix, uri);
2921 if (!parcel) {
2922 return NULL;
2923 }
2924
2925 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2926 Py_DECREF(parcel);
2927 return NULL;
2928 }
2929 Py_DECREF(parcel);
2930 }
2931
2932 Py_RETURN_NONE;
2933}
2934
2935LOCAL(PyObject*)
2936treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2937{
2938 if (self->events_append && self->end_ns_event_obj) {
2939 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2940 return NULL;
2941 }
2942 }
2943
2944 Py_RETURN_NONE;
2945}
2946
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947/* -------------------------------------------------------------------- */
2948/* methods (in alphabetical order) */
2949
Serhiy Storchakacb985562015-05-04 15:32:48 +03002950/*[clinic input]
2951_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952
Serhiy Storchakacb985562015-05-04 15:32:48 +03002953 data: object
2954 /
2955
2956[clinic start generated code]*/
2957
2958static PyObject *
2959_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2960/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2961{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 return treebuilder_handle_data(self, data);
2963}
2964
Serhiy Storchakacb985562015-05-04 15:32:48 +03002965/*[clinic input]
2966_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967
Serhiy Storchakacb985562015-05-04 15:32:48 +03002968 tag: object
2969 /
2970
2971[clinic start generated code]*/
2972
2973static PyObject *
2974_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2975/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2976{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 return treebuilder_handle_end(self, tag);
2978}
2979
Stefan Behnel43851a22019-05-01 21:20:38 +02002980/*[clinic input]
2981_elementtree.TreeBuilder.comment
2982
2983 text: object
2984 /
2985
2986[clinic start generated code]*/
2987
2988static PyObject *
2989_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2990/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2991{
2992 return treebuilder_handle_comment(self, text);
2993}
2994
2995/*[clinic input]
2996_elementtree.TreeBuilder.pi
2997
2998 target: object
2999 text: object = None
3000 /
3001
3002[clinic start generated code]*/
3003
3004static PyObject *
3005_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3006 PyObject *text)
3007/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3008{
3009 return treebuilder_handle_pi(self, target, text);
3010}
3011
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012LOCAL(PyObject*)
3013treebuilder_done(TreeBuilderObject* self)
3014{
3015 PyObject* res;
3016
3017 /* FIXME: check stack size? */
3018
3019 if (self->root)
3020 res = self->root;
3021 else
3022 res = Py_None;
3023
3024 Py_INCREF(res);
3025 return res;
3026}
3027
Serhiy Storchakacb985562015-05-04 15:32:48 +03003028/*[clinic input]
3029_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003030
Serhiy Storchakacb985562015-05-04 15:32:48 +03003031[clinic start generated code]*/
3032
3033static PyObject *
3034_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3035/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3036{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037 return treebuilder_done(self);
3038}
3039
Serhiy Storchakacb985562015-05-04 15:32:48 +03003040/*[clinic input]
3041_elementtree.TreeBuilder.start
3042
3043 tag: object
3044 attrs: object = None
3045 /
3046
3047[clinic start generated code]*/
3048
3049static PyObject *
3050_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3051 PyObject *attrs)
3052/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003053{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003054 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055}
3056
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057/* ==================================================================== */
3058/* the expat interface */
3059
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003062
3063/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3064 * cached globally without being in per-module state.
3065 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003066static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003068
Eli Bendersky52467b12012-06-01 07:13:08 +03003069static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3070 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3071
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072typedef struct {
3073 PyObject_HEAD
3074
3075 XML_Parser parser;
3076
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003077 PyObject *target;
3078 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003080 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003081
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003082 PyObject *handle_start_ns;
3083 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003084 PyObject *handle_start;
3085 PyObject *handle_data;
3086 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003088 PyObject *handle_comment;
3089 PyObject *handle_pi;
3090 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003092 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003093
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094} XMLParserObject;
3095
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096/* helpers */
3097
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098LOCAL(PyObject*)
3099makeuniversal(XMLParserObject* self, const char* string)
3100{
3101 /* convert a UTF-8 tag/attribute name from the expat parser
3102 to a universal name string */
3103
Antoine Pitrouc1948842012-10-01 23:40:37 +02003104 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105 PyObject* key;
3106 PyObject* value;
3107
3108 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003109 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003110 if (!key)
3111 return NULL;
3112
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003113 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114
3115 if (value) {
3116 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003117 }
3118 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119 /* new name. convert to universal name, and decode as
3120 necessary */
3121
3122 PyObject* tag;
3123 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003124 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003125
3126 /* look for namespace separator */
3127 for (i = 0; i < size; i++)
3128 if (string[i] == '}')
3129 break;
3130 if (i != size) {
3131 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003132 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003133 if (tag == NULL) {
3134 Py_DECREF(key);
3135 return NULL;
3136 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003137 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003138 p[0] = '{';
3139 memcpy(p+1, string, size);
3140 size++;
3141 } else {
3142 /* plain name; use key as tag */
3143 Py_INCREF(key);
3144 tag = key;
3145 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003146
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003147 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003148 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003149 value = PyUnicode_DecodeUTF8(p, size, "strict");
3150 Py_DECREF(tag);
3151 if (!value) {
3152 Py_DECREF(key);
3153 return NULL;
3154 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003155
3156 /* add to names dictionary */
3157 if (PyDict_SetItem(self->names, key, value) < 0) {
3158 Py_DECREF(key);
3159 Py_DECREF(value);
3160 return NULL;
3161 }
3162 }
3163
3164 Py_DECREF(key);
3165 return value;
3166}
3167
Eli Bendersky5b77d812012-03-16 08:20:05 +02003168/* Set the ParseError exception with the given parameters.
3169 * If message is not NULL, it's used as the error string. Otherwise, the
3170 * message string is the default for the given error_code.
3171*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003172static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003173expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3174 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003175{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003176 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003177 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003178
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003179 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003180 message ? message : EXPAT(ErrorString)(error_code),
3181 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003182 if (errmsg == NULL)
3183 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003184
Stefan Behnel43851a22019-05-01 21:20:38 +02003185 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003186 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003187 if (!error)
3188 return;
3189
Eli Bendersky5b77d812012-03-16 08:20:05 +02003190 /* Add code and position attributes */
3191 code = PyLong_FromLong((long)error_code);
3192 if (!code) {
3193 Py_DECREF(error);
3194 return;
3195 }
3196 if (PyObject_SetAttrString(error, "code", code) == -1) {
3197 Py_DECREF(error);
3198 Py_DECREF(code);
3199 return;
3200 }
3201 Py_DECREF(code);
3202
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003203 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003204 if (!position) {
3205 Py_DECREF(error);
3206 return;
3207 }
3208 if (PyObject_SetAttrString(error, "position", position) == -1) {
3209 Py_DECREF(error);
3210 Py_DECREF(position);
3211 return;
3212 }
3213 Py_DECREF(position);
3214
Eli Bendersky532d03e2013-08-10 08:00:39 -07003215 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003216 Py_DECREF(error);
3217}
3218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219/* -------------------------------------------------------------------- */
3220/* handlers */
3221
3222static void
3223expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3224 int data_len)
3225{
3226 PyObject* key;
3227 PyObject* value;
3228 PyObject* res;
3229
3230 if (data_len < 2 || data_in[0] != '&')
3231 return;
3232
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003233 if (PyErr_Occurred())
3234 return;
3235
Neal Norwitz0269b912007-08-08 06:56:02 +00003236 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 if (!key)
3238 return;
3239
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003240 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241
3242 if (value) {
3243 if (TreeBuilder_CheckExact(self->target))
3244 res = treebuilder_handle_data(
3245 (TreeBuilderObject*) self->target, value
3246 );
3247 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003248 res = _PyObject_FastCall(self->handle_data, &value, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 else
3250 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003252 } else if (!PyErr_Occurred()) {
3253 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003254 char message[128] = "undefined entity ";
3255 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003256 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003257 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003259 EXPAT(GetErrorColumnNumber)(self->parser),
3260 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 );
3262 }
3263
3264 Py_DECREF(key);
3265}
3266
3267static void
3268expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3269 const XML_Char **attrib_in)
3270{
3271 PyObject* res;
3272 PyObject* tag;
3273 PyObject* attrib;
3274 int ok;
3275
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003276 if (PyErr_Occurred())
3277 return;
3278
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 /* tag name */
3280 tag = makeuniversal(self, tag_in);
3281 if (!tag)
3282 return; /* parser will look for errors */
3283
3284 /* attributes */
3285 if (attrib_in[0]) {
3286 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003287 if (!attrib) {
3288 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003290 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 while (attrib_in[0] && attrib_in[1]) {
3292 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003293 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 if (!key || !value) {
3295 Py_XDECREF(value);
3296 Py_XDECREF(key);
3297 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003298 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 return;
3300 }
3301 ok = PyDict_SetItem(attrib, key, value);
3302 Py_DECREF(value);
3303 Py_DECREF(key);
3304 if (ok < 0) {
3305 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003306 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 return;
3308 }
3309 attrib_in += 2;
3310 }
3311 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003312 Py_INCREF(Py_None);
3313 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003314 }
3315
3316 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 /* shortcut */
3318 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3319 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003320 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003321 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003322 if (attrib == Py_None) {
3323 Py_DECREF(attrib);
3324 attrib = PyDict_New();
3325 if (!attrib) {
3326 Py_DECREF(tag);
3327 return;
3328 }
3329 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003330 res = PyObject_CallFunctionObjArgs(self->handle_start,
3331 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003332 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 res = NULL;
3334
3335 Py_DECREF(tag);
3336 Py_DECREF(attrib);
3337
3338 Py_XDECREF(res);
3339}
3340
3341static void
3342expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3343 int data_len)
3344{
3345 PyObject* data;
3346 PyObject* res;
3347
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003348 if (PyErr_Occurred())
3349 return;
3350
Neal Norwitz0269b912007-08-08 06:56:02 +00003351 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003352 if (!data)
3353 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354
3355 if (TreeBuilder_CheckExact(self->target))
3356 /* shortcut */
3357 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3358 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003359 res = _PyObject_FastCall(self->handle_data, &data, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360 else
3361 res = NULL;
3362
3363 Py_DECREF(data);
3364
3365 Py_XDECREF(res);
3366}
3367
3368static void
3369expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3370{
3371 PyObject* tag;
3372 PyObject* res = NULL;
3373
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003374 if (PyErr_Occurred())
3375 return;
3376
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377 if (TreeBuilder_CheckExact(self->target))
3378 /* shortcut */
3379 /* the standard tree builder doesn't look at the end tag */
3380 res = treebuilder_handle_end(
3381 (TreeBuilderObject*) self->target, Py_None
3382 );
3383 else if (self->handle_end) {
3384 tag = makeuniversal(self, tag_in);
3385 if (tag) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003386 res = _PyObject_FastCall(self->handle_end, &tag, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387 Py_DECREF(tag);
3388 }
3389 }
3390
3391 Py_XDECREF(res);
3392}
3393
3394static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003395expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3396 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003398 PyObject* res = NULL;
3399 PyObject* uri;
3400 PyObject* prefix;
3401 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003402
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003403 if (PyErr_Occurred())
3404 return;
3405
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003406 if (!uri_in)
3407 uri_in = "";
3408 if (!prefix_in)
3409 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003410
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003411 if (TreeBuilder_CheckExact(self->target)) {
3412 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3413 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003414
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003415 if (target->events_append && target->start_ns_event_obj) {
3416 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3417 if (!prefix)
3418 return;
3419 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3420 if (!uri) {
3421 Py_DECREF(prefix);
3422 return;
3423 }
3424
3425 res = treebuilder_handle_start_ns(target, prefix, uri);
3426 Py_DECREF(uri);
3427 Py_DECREF(prefix);
3428 }
3429 } else if (self->handle_start_ns) {
3430 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3431 if (!prefix)
3432 return;
3433 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3434 if (!uri) {
3435 Py_DECREF(prefix);
3436 return;
3437 }
3438
3439 stack[0] = prefix;
3440 stack[1] = uri;
3441 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3442 Py_DECREF(uri);
3443 Py_DECREF(prefix);
3444 }
3445
3446 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447}
3448
3449static void
3450expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3451{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003452 PyObject *res = NULL;
3453 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003454
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003455 if (PyErr_Occurred())
3456 return;
3457
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003458 if (!prefix_in)
3459 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003460
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003461 if (TreeBuilder_CheckExact(self->target)) {
3462 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3463 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3464
3465 if (target->events_append && target->end_ns_event_obj) {
3466 res = treebuilder_handle_end_ns(target, Py_None);
3467 }
3468 } else if (self->handle_end_ns) {
3469 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3470 if (!prefix)
3471 return;
3472
3473 res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3474 Py_DECREF(prefix);
3475 }
3476
3477 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478}
3479
3480static void
3481expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3482{
Stefan Behnel43851a22019-05-01 21:20:38 +02003483 PyObject* comment = NULL;
3484 PyObject* res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003486 if (PyErr_Occurred())
3487 return;
3488
Stefan Behnel43851a22019-05-01 21:20:38 +02003489 if (TreeBuilder_CheckExact(self->target)) {
3490 /* shortcut */
3491 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3492
Neal Norwitz0269b912007-08-08 06:56:02 +00003493 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003494 if (!comment)
3495 return; /* parser will look for errors */
3496
3497 res = treebuilder_handle_comment(target, comment);
3498 } else if (self->handle_comment) {
3499 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3500 if (!comment)
3501 return;
3502
3503 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003505
3506 Py_XDECREF(res);
3507 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508}
3509
Eli Bendersky45839902013-01-13 05:14:47 -08003510static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003511expat_start_doctype_handler(XMLParserObject *self,
3512 const XML_Char *doctype_name,
3513 const XML_Char *sysid,
3514 const XML_Char *pubid,
3515 int has_internal_subset)
3516{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003517 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003518 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003519 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003520
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003521 if (PyErr_Occurred())
3522 return;
3523
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003524 doctype_name_obj = makeuniversal(self, doctype_name);
3525 if (!doctype_name_obj)
3526 return;
3527
3528 if (sysid) {
3529 sysid_obj = makeuniversal(self, sysid);
3530 if (!sysid_obj) {
3531 Py_DECREF(doctype_name_obj);
3532 return;
3533 }
3534 } else {
3535 Py_INCREF(Py_None);
3536 sysid_obj = Py_None;
3537 }
3538
3539 if (pubid) {
3540 pubid_obj = makeuniversal(self, pubid);
3541 if (!pubid_obj) {
3542 Py_DECREF(doctype_name_obj);
3543 Py_DECREF(sysid_obj);
3544 return;
3545 }
3546 } else {
3547 Py_INCREF(Py_None);
3548 pubid_obj = Py_None;
3549 }
3550
3551 /* If the target has a handler for doctype, call it. */
3552 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003553 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3554 doctype_name_obj, pubid_obj,
3555 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003556 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003557 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003558 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3559 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3560 "The doctype() method of XMLParser is ignored. "
3561 "Define doctype() method on the TreeBuilder target.",
3562 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003563 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003564 }
3565
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003566 Py_DECREF(doctype_name_obj);
3567 Py_DECREF(pubid_obj);
3568 Py_DECREF(sysid_obj);
3569}
3570
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003571static void
3572expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3573 const XML_Char* data_in)
3574{
Stefan Behnel43851a22019-05-01 21:20:38 +02003575 PyObject* pi_target = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003576 PyObject* data;
3577 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003578 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003580 if (PyErr_Occurred())
3581 return;
3582
Stefan Behnel43851a22019-05-01 21:20:38 +02003583 if (TreeBuilder_CheckExact(self->target)) {
3584 /* shortcut */
3585 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3586
3587 if (target->events_append && target->pi_event_obj) {
3588 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3589 if (!pi_target)
3590 goto error;
3591 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3592 if (!data)
3593 goto error;
3594 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595 Py_XDECREF(res);
3596 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003597 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003599 } else if (self->handle_pi) {
3600 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3601 if (!pi_target)
3602 goto error;
3603 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3604 if (!data)
3605 goto error;
3606
3607 stack[0] = pi_target;
3608 stack[1] = data;
3609 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3610 Py_XDECREF(res);
3611 Py_DECREF(data);
3612 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003614
3615 return;
3616
3617 error:
3618 Py_XDECREF(pi_target);
3619 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003620}
3621
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003623
Eli Bendersky52467b12012-06-01 07:13:08 +03003624static PyObject *
3625xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626{
Eli Bendersky52467b12012-06-01 07:13:08 +03003627 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3628 if (self) {
3629 self->parser = NULL;
3630 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003631 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003632 self->handle_start = self->handle_data = self->handle_end = NULL;
3633 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003634 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003636 return (PyObject *)self;
3637}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638
scoderc8d8e152017-09-14 22:00:03 +02003639static int
3640ignore_attribute_error(PyObject *value)
3641{
3642 if (value == NULL) {
3643 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3644 return -1;
3645 }
3646 PyErr_Clear();
3647 }
3648 return 0;
3649}
3650
Serhiy Storchakacb985562015-05-04 15:32:48 +03003651/*[clinic input]
3652_elementtree.XMLParser.__init__
3653
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003654 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003655 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003656 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003657
3658[clinic start generated code]*/
3659
Eli Bendersky52467b12012-06-01 07:13:08 +03003660static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003661_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3662 const char *encoding)
3663/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003664{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003665 self->entity = PyDict_New();
3666 if (!self->entity)
3667 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668
Serhiy Storchakacb985562015-05-04 15:32:48 +03003669 self->names = PyDict_New();
3670 if (!self->names) {
3671 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003672 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003674
Serhiy Storchakacb985562015-05-04 15:32:48 +03003675 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3676 if (!self->parser) {
3677 Py_CLEAR(self->entity);
3678 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003680 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003682 /* expat < 2.1.0 has no XML_SetHashSalt() */
3683 if (EXPAT(SetHashSalt) != NULL) {
3684 EXPAT(SetHashSalt)(self->parser,
3685 (unsigned long)_Py_HashSecret.expat.hashsalt);
3686 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687
Eli Bendersky52467b12012-06-01 07:13:08 +03003688 if (target) {
3689 Py_INCREF(target);
3690 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003691 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003693 Py_CLEAR(self->entity);
3694 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003695 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003697 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003698 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003699
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003700 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3701 if (ignore_attribute_error(self->handle_start_ns)) {
3702 return -1;
3703 }
3704 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3705 if (ignore_attribute_error(self->handle_end_ns)) {
3706 return -1;
3707 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003709 if (ignore_attribute_error(self->handle_start)) {
3710 return -1;
3711 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003712 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003713 if (ignore_attribute_error(self->handle_data)) {
3714 return -1;
3715 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003716 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003717 if (ignore_attribute_error(self->handle_end)) {
3718 return -1;
3719 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003720 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003721 if (ignore_attribute_error(self->handle_comment)) {
3722 return -1;
3723 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003724 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003725 if (ignore_attribute_error(self->handle_pi)) {
3726 return -1;
3727 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003728 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003729 if (ignore_attribute_error(self->handle_close)) {
3730 return -1;
3731 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003732 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003733 if (ignore_attribute_error(self->handle_doctype)) {
3734 return -1;
3735 }
Eli Bendersky45839902013-01-13 05:14:47 -08003736
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003737 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003738 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003739 if (self->handle_start_ns || self->handle_end_ns)
3740 EXPAT(SetNamespaceDeclHandler)(
3741 self->parser,
3742 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3743 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3744 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003745 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003746 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003747 (XML_StartElementHandler) expat_start_handler,
3748 (XML_EndElementHandler) expat_end_handler
3749 );
3750 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003751 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003752 (XML_DefaultHandler) expat_default_handler
3753 );
3754 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003755 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003756 (XML_CharacterDataHandler) expat_data_handler
3757 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003758 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003759 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003760 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003761 (XML_CommentHandler) expat_comment_handler
3762 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003763 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003764 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003765 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003766 (XML_ProcessingInstructionHandler) expat_pi_handler
3767 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003768 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003769 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003770 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3771 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003772 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003773 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003774 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003775 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003776
Eli Bendersky52467b12012-06-01 07:13:08 +03003777 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003778}
3779
Eli Bendersky52467b12012-06-01 07:13:08 +03003780static int
3781xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3782{
3783 Py_VISIT(self->handle_close);
3784 Py_VISIT(self->handle_pi);
3785 Py_VISIT(self->handle_comment);
3786 Py_VISIT(self->handle_end);
3787 Py_VISIT(self->handle_data);
3788 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003789 Py_VISIT(self->handle_start_ns);
3790 Py_VISIT(self->handle_end_ns);
3791 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003792
3793 Py_VISIT(self->target);
3794 Py_VISIT(self->entity);
3795 Py_VISIT(self->names);
3796
3797 return 0;
3798}
3799
3800static int
3801xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003802{
Victor Stinnere727d412017-09-18 05:29:37 -07003803 if (self->parser != NULL) {
3804 XML_Parser parser = self->parser;
3805 self->parser = NULL;
3806 EXPAT(ParserFree)(parser);
3807 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003808
Antoine Pitrouc1948842012-10-01 23:40:37 +02003809 Py_CLEAR(self->handle_close);
3810 Py_CLEAR(self->handle_pi);
3811 Py_CLEAR(self->handle_comment);
3812 Py_CLEAR(self->handle_end);
3813 Py_CLEAR(self->handle_data);
3814 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003815 Py_CLEAR(self->handle_start_ns);
3816 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003817 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003818
Antoine Pitrouc1948842012-10-01 23:40:37 +02003819 Py_CLEAR(self->target);
3820 Py_CLEAR(self->entity);
3821 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003822
Eli Bendersky52467b12012-06-01 07:13:08 +03003823 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003824}
3825
Eli Bendersky52467b12012-06-01 07:13:08 +03003826static void
3827xmlparser_dealloc(XMLParserObject* self)
3828{
3829 PyObject_GC_UnTrack(self);
3830 xmlparser_gc_clear(self);
3831 Py_TYPE(self)->tp_free((PyObject *)self);
3832}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003833
3834LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003835expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003836{
3837 int ok;
3838
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003839 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003840 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3841
3842 if (PyErr_Occurred())
3843 return NULL;
3844
3845 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003846 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003847 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003848 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003849 EXPAT(GetErrorColumnNumber)(self->parser),
3850 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003851 );
3852 return NULL;
3853 }
3854
3855 Py_RETURN_NONE;
3856}
3857
Serhiy Storchakacb985562015-05-04 15:32:48 +03003858/*[clinic input]
3859_elementtree.XMLParser.close
3860
3861[clinic start generated code]*/
3862
3863static PyObject *
3864_elementtree_XMLParser_close_impl(XMLParserObject *self)
3865/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003866{
3867 /* end feeding data to parser */
3868
3869 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003870 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003871 if (!res)
3872 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003873
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003874 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003875 Py_DECREF(res);
3876 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003877 }
3878 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003879 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003880 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003881 }
3882 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003883 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003884 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003885}
3886
Serhiy Storchakacb985562015-05-04 15:32:48 +03003887/*[clinic input]
3888_elementtree.XMLParser.feed
3889
3890 data: object
3891 /
3892
3893[clinic start generated code]*/
3894
3895static PyObject *
3896_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3897/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003898{
3899 /* feed data to parser */
3900
Serhiy Storchakacb985562015-05-04 15:32:48 +03003901 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003902 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003903 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3904 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003905 return NULL;
3906 if (data_len > INT_MAX) {
3907 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3908 return NULL;
3909 }
3910 /* Explicitly set UTF-8 encoding. Return code ignored. */
3911 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003912 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003913 }
3914 else {
3915 Py_buffer view;
3916 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003917 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003918 return NULL;
3919 if (view.len > INT_MAX) {
3920 PyBuffer_Release(&view);
3921 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3922 return NULL;
3923 }
3924 res = expat_parse(self, view.buf, (int)view.len, 0);
3925 PyBuffer_Release(&view);
3926 return res;
3927 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928}
3929
Serhiy Storchakacb985562015-05-04 15:32:48 +03003930/*[clinic input]
3931_elementtree.XMLParser._parse_whole
3932
3933 file: object
3934 /
3935
3936[clinic start generated code]*/
3937
3938static PyObject *
3939_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3940/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003941{
Eli Benderskya3699232013-05-19 18:47:23 -07003942 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003943 PyObject* reader;
3944 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003945 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003946 PyObject* res;
3947
Serhiy Storchakacb985562015-05-04 15:32:48 +03003948 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003949 if (!reader)
3950 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003951
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003952 /* read from open file object */
3953 for (;;) {
3954
3955 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3956
3957 if (!buffer) {
3958 /* read failed (e.g. due to KeyboardInterrupt) */
3959 Py_DECREF(reader);
3960 return NULL;
3961 }
3962
Eli Benderskyf996e772012-03-16 05:53:30 +02003963 if (PyUnicode_CheckExact(buffer)) {
3964 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003965 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003966 Py_DECREF(buffer);
3967 break;
3968 }
3969 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003970 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003971 if (!temp) {
3972 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003973 Py_DECREF(reader);
3974 return NULL;
3975 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003976 buffer = temp;
3977 }
3978 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003979 Py_DECREF(buffer);
3980 break;
3981 }
3982
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003983 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3984 Py_DECREF(buffer);
3985 Py_DECREF(reader);
3986 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3987 return NULL;
3988 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003989 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003990 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003991 );
3992
3993 Py_DECREF(buffer);
3994
3995 if (!res) {
3996 Py_DECREF(reader);
3997 return NULL;
3998 }
3999 Py_DECREF(res);
4000
4001 }
4002
4003 Py_DECREF(reader);
4004
4005 res = expat_parse(self, "", 0, 1);
4006
4007 if (res && TreeBuilder_CheckExact(self->target)) {
4008 Py_DECREF(res);
4009 return treebuilder_done((TreeBuilderObject*) self->target);
4010 }
4011
4012 return res;
4013}
4014
Serhiy Storchakacb985562015-05-04 15:32:48 +03004015/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004016_elementtree.XMLParser._setevents
4017
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004018 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004019 events_to_report: object = None
4020 /
4021
4022[clinic start generated code]*/
4023
4024static PyObject *
4025_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4026 PyObject *events_queue,
4027 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004028/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004029{
4030 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004031 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004032 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004033 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004034
4035 if (!TreeBuilder_CheckExact(self->target)) {
4036 PyErr_SetString(
4037 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004038 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004039 "targets"
4040 );
4041 return NULL;
4042 }
4043
4044 target = (TreeBuilderObject*) self->target;
4045
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004046 events_append = PyObject_GetAttrString(events_queue, "append");
4047 if (events_append == NULL)
4048 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004049 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004050
4051 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004052 Py_CLEAR(target->start_event_obj);
4053 Py_CLEAR(target->end_event_obj);
4054 Py_CLEAR(target->start_ns_event_obj);
4055 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004056 Py_CLEAR(target->comment_event_obj);
4057 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004058
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004059 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004060 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004061 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004062 Py_RETURN_NONE;
4063 }
4064
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004065 if (!(events_seq = PySequence_Fast(events_to_report,
4066 "events must be a sequence"))) {
4067 return NULL;
4068 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004069
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004070 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004071 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004072 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004073 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004074 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004075 } else if (PyBytes_Check(event_name_obj)) {
4076 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004077 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004078 if (event_name == NULL) {
4079 Py_DECREF(events_seq);
4080 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4081 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004082 }
4083
4084 Py_INCREF(event_name_obj);
4085 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004086 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004087 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004088 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004089 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004090 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004091 EXPAT(SetNamespaceDeclHandler)(
4092 self->parser,
4093 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4094 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4095 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004096 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004097 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004098 EXPAT(SetNamespaceDeclHandler)(
4099 self->parser,
4100 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4101 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4102 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004103 } else if (strcmp(event_name, "comment") == 0) {
4104 Py_XSETREF(target->comment_event_obj, event_name_obj);
4105 EXPAT(SetCommentHandler)(
4106 self->parser,
4107 (XML_CommentHandler) expat_comment_handler
4108 );
4109 } else if (strcmp(event_name, "pi") == 0) {
4110 Py_XSETREF(target->pi_event_obj, event_name_obj);
4111 EXPAT(SetProcessingInstructionHandler)(
4112 self->parser,
4113 (XML_ProcessingInstructionHandler) expat_pi_handler
4114 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004115 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004116 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004117 Py_DECREF(events_seq);
4118 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004119 return NULL;
4120 }
4121 }
4122
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004123 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004124 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004125}
4126
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004127static PyMemberDef xmlparser_members[] = {
4128 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4129 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4130 {NULL}
4131};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004132
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004133static PyObject*
4134xmlparser_version_getter(XMLParserObject *self, void *closure)
4135{
4136 return PyUnicode_FromFormat(
4137 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4138 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004139}
4140
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004141static PyGetSetDef xmlparser_getsetlist[] = {
4142 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4143 {NULL},
4144};
4145
Serhiy Storchakacb985562015-05-04 15:32:48 +03004146#include "clinic/_elementtree.c.h"
4147
4148static PyMethodDef element_methods[] = {
4149
4150 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4151
4152 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4153 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4154
4155 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4156 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4157 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4158
4159 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4160 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4161 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4162 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4163
4164 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4165 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4166 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4167
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004168 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004169 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4170
4171 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4172 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4173
4174 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4175
4176 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4177 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4178 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4179 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4180 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4181
4182 {NULL, NULL}
4183};
4184
4185static PyMappingMethods element_as_mapping = {
4186 (lenfunc) element_length,
4187 (binaryfunc) element_subscr,
4188 (objobjargproc) element_ass_subscr,
4189};
4190
Serhiy Storchakadde08152015-11-25 15:28:13 +02004191static PyGetSetDef element_getsetlist[] = {
4192 {"tag",
4193 (getter)element_tag_getter,
4194 (setter)element_tag_setter,
4195 "A string identifying what kind of data this element represents"},
4196 {"text",
4197 (getter)element_text_getter,
4198 (setter)element_text_setter,
4199 "A string of text directly after the start tag, or None"},
4200 {"tail",
4201 (getter)element_tail_getter,
4202 (setter)element_tail_setter,
4203 "A string of text directly after the end tag, or None"},
4204 {"attrib",
4205 (getter)element_attrib_getter,
4206 (setter)element_attrib_setter,
4207 "A dictionary containing the element's attributes"},
4208 {NULL},
4209};
4210
Serhiy Storchakacb985562015-05-04 15:32:48 +03004211static PyTypeObject Element_Type = {
4212 PyVarObject_HEAD_INIT(NULL, 0)
4213 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4214 /* methods */
4215 (destructor)element_dealloc, /* tp_dealloc */
4216 0, /* tp_print */
4217 0, /* tp_getattr */
4218 0, /* tp_setattr */
4219 0, /* tp_reserved */
4220 (reprfunc)element_repr, /* tp_repr */
4221 0, /* tp_as_number */
4222 &element_as_sequence, /* tp_as_sequence */
4223 &element_as_mapping, /* tp_as_mapping */
4224 0, /* tp_hash */
4225 0, /* tp_call */
4226 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004227 PyObject_GenericGetAttr, /* tp_getattro */
4228 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004229 0, /* tp_as_buffer */
4230 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4231 /* tp_flags */
4232 0, /* tp_doc */
4233 (traverseproc)element_gc_traverse, /* tp_traverse */
4234 (inquiry)element_gc_clear, /* tp_clear */
4235 0, /* tp_richcompare */
4236 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4237 0, /* tp_iter */
4238 0, /* tp_iternext */
4239 element_methods, /* tp_methods */
4240 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004241 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004242 0, /* tp_base */
4243 0, /* tp_dict */
4244 0, /* tp_descr_get */
4245 0, /* tp_descr_set */
4246 0, /* tp_dictoffset */
4247 (initproc)element_init, /* tp_init */
4248 PyType_GenericAlloc, /* tp_alloc */
4249 element_new, /* tp_new */
4250 0, /* tp_free */
4251};
4252
4253static PyMethodDef treebuilder_methods[] = {
4254 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4255 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4256 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004257 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4258 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004259 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4260 {NULL, NULL}
4261};
4262
4263static PyTypeObject TreeBuilder_Type = {
4264 PyVarObject_HEAD_INIT(NULL, 0)
4265 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4266 /* methods */
4267 (destructor)treebuilder_dealloc, /* tp_dealloc */
4268 0, /* tp_print */
4269 0, /* tp_getattr */
4270 0, /* tp_setattr */
4271 0, /* tp_reserved */
4272 0, /* tp_repr */
4273 0, /* tp_as_number */
4274 0, /* tp_as_sequence */
4275 0, /* tp_as_mapping */
4276 0, /* tp_hash */
4277 0, /* tp_call */
4278 0, /* tp_str */
4279 0, /* tp_getattro */
4280 0, /* tp_setattro */
4281 0, /* tp_as_buffer */
4282 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4283 /* tp_flags */
4284 0, /* tp_doc */
4285 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4286 (inquiry)treebuilder_gc_clear, /* tp_clear */
4287 0, /* tp_richcompare */
4288 0, /* tp_weaklistoffset */
4289 0, /* tp_iter */
4290 0, /* tp_iternext */
4291 treebuilder_methods, /* tp_methods */
4292 0, /* tp_members */
4293 0, /* tp_getset */
4294 0, /* tp_base */
4295 0, /* tp_dict */
4296 0, /* tp_descr_get */
4297 0, /* tp_descr_set */
4298 0, /* tp_dictoffset */
4299 _elementtree_TreeBuilder___init__, /* tp_init */
4300 PyType_GenericAlloc, /* tp_alloc */
4301 treebuilder_new, /* tp_new */
4302 0, /* tp_free */
4303};
4304
4305static PyMethodDef xmlparser_methods[] = {
4306 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4307 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4308 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4309 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004310 {NULL, NULL}
4311};
4312
Neal Norwitz227b5332006-03-22 09:28:35 +00004313static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004314 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004315 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004316 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004317 (destructor)xmlparser_dealloc, /* tp_dealloc */
4318 0, /* tp_print */
4319 0, /* tp_getattr */
4320 0, /* tp_setattr */
4321 0, /* tp_reserved */
4322 0, /* tp_repr */
4323 0, /* tp_as_number */
4324 0, /* tp_as_sequence */
4325 0, /* tp_as_mapping */
4326 0, /* tp_hash */
4327 0, /* tp_call */
4328 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004329 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004330 0, /* tp_setattro */
4331 0, /* tp_as_buffer */
4332 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4333 /* tp_flags */
4334 0, /* tp_doc */
4335 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4336 (inquiry)xmlparser_gc_clear, /* tp_clear */
4337 0, /* tp_richcompare */
4338 0, /* tp_weaklistoffset */
4339 0, /* tp_iter */
4340 0, /* tp_iternext */
4341 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004342 xmlparser_members, /* tp_members */
4343 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004344 0, /* tp_base */
4345 0, /* tp_dict */
4346 0, /* tp_descr_get */
4347 0, /* tp_descr_set */
4348 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004349 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004350 PyType_GenericAlloc, /* tp_alloc */
4351 xmlparser_new, /* tp_new */
4352 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004353};
4354
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004355/* ==================================================================== */
4356/* python module interface */
4357
4358static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004359 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004360 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004361 {NULL, NULL}
4362};
4363
Martin v. Löwis1a214512008-06-11 05:26:20 +00004364
Eli Bendersky532d03e2013-08-10 08:00:39 -07004365static struct PyModuleDef elementtreemodule = {
4366 PyModuleDef_HEAD_INIT,
4367 "_elementtree",
4368 NULL,
4369 sizeof(elementtreestate),
4370 _functions,
4371 NULL,
4372 elementtree_traverse,
4373 elementtree_clear,
4374 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004375};
4376
Neal Norwitzf6657e62006-12-28 04:47:50 +00004377PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004378PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004379{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004380 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004381 elementtreestate *st;
4382
4383 m = PyState_FindModule(&elementtreemodule);
4384 if (m) {
4385 Py_INCREF(m);
4386 return m;
4387 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004388
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004389 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004390 if (PyType_Ready(&ElementIter_Type) < 0)
4391 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004392 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004393 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004394 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004395 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004396 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004397 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004398
Eli Bendersky532d03e2013-08-10 08:00:39 -07004399 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004400 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004401 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004402 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004403
Eli Bendersky828efde2012-04-05 05:40:58 +03004404 if (!(temp = PyImport_ImportModule("copy")))
4405 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004406 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004407 Py_XDECREF(temp);
4408
Victor Stinnerb136f112017-07-10 22:28:02 +02004409 if (st->deepcopy_obj == NULL) {
4410 return NULL;
4411 }
4412
4413 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004414 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004415 return NULL;
4416
Eli Bendersky20d41742012-06-01 09:48:37 +03004417 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004418 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4419 if (expat_capi) {
4420 /* check that it's usable */
4421 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004422 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004423 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4424 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004425 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004426 PyErr_SetString(PyExc_ImportError,
4427 "pyexpat version is incompatible");
4428 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004429 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004430 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004431 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004432 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004433
Eli Bendersky532d03e2013-08-10 08:00:39 -07004434 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004435 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004436 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004437 Py_INCREF(st->parseerror_obj);
4438 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004439
Eli Bendersky092af1f2012-03-04 07:14:03 +02004440 Py_INCREF((PyObject *)&Element_Type);
4441 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4442
Eli Bendersky58d548d2012-05-29 15:45:16 +03004443 Py_INCREF((PyObject *)&TreeBuilder_Type);
4444 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4445
Eli Bendersky52467b12012-06-01 07:13:08 +03004446 Py_INCREF((PyObject *)&XMLParser_Type);
4447 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004448
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004449 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004450}