blob: b1a96299f24fe191433b30607d4454e094abd4c5 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600339 if (attrib_str == NULL) {
340 return NULL;
341 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356 Py_DECREF(attrib);
357 attrib = NULL;
358 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 } else {
360 attrib = PyDict_New();
361 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700362
363 Py_DECREF(attrib_str);
364
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600365 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
366 Py_DECREF(attrib);
367 return NULL;
368 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300369 return attrib;
370}
371
Serhiy Storchakacb985562015-05-04 15:32:48 +0300372/*[clinic input]
373module _elementtree
374class _elementtree.Element "ElementObject *" "&Element_Type"
375class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
376class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
377[clinic start generated code]*/
378/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
379
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380static int
381element_init(PyObject *self, PyObject *args, PyObject *kwds)
382{
383 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 PyObject *attrib = NULL;
385 ElementObject *self_elem;
386
387 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
388 return -1;
389
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 if (attrib) {
391 /* attrib passed as positional arg */
392 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 if (!attrib)
394 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (kwds) {
396 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200397 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300398 return -1;
399 }
400 }
401 } else if (kwds) {
402 /* have keywords args */
403 attrib = get_attrib_from_keywords(kwds);
404 if (!attrib)
405 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 }
407
408 self_elem = (ElementObject *)self;
409
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 return -1;
414 }
415 }
416
Eli Bendersky48d358b2012-05-30 17:57:50 +0300417 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200418 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419
420 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300422 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300425 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
Eli Bendersky092af1f2012-03-04 07:14:03 +0200427 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300428 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429
430 return 0;
431}
432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200434element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200436 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 PyObject* *children;
438
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300439 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 /* make sure self->children can hold the given number of extra
441 elements. set an exception and return -1 if allocation failed */
442
Victor Stinner5f0af232013-07-11 23:01:36 +0200443 if (!self->extra) {
444 if (create_extra(self, NULL) < 0)
445 return -1;
446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200448 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449
450 if (size > self->extra->allocated) {
451 /* use Python 2.4's list growth strategy */
452 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100454 * which needs at least 4 bytes.
455 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 * be safe.
457 */
458 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200459 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
460 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000462 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100463 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 * false alarm always assume at least one child to be safe.
465 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000466 children = PyObject_Realloc(self->extra->children,
467 size * sizeof(PyObject*));
468 if (!children)
469 goto nomemory;
470 } else {
471 children = PyObject_Malloc(size * sizeof(PyObject*));
472 if (!children)
473 goto nomemory;
474 /* copy existing children from static area to malloc buffer */
475 memcpy(children, self->extra->children,
476 self->extra->length * sizeof(PyObject*));
477 }
478 self->extra->children = children;
479 self->extra->allocated = size;
480 }
481
482 return 0;
483
484 nomemory:
485 PyErr_NoMemory();
486 return -1;
487}
488
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300489LOCAL(void)
490raise_type_error(PyObject *element)
491{
492 PyErr_Format(PyExc_TypeError,
493 "expected an Element, not \"%.200s\"",
494 Py_TYPE(element)->tp_name);
495}
496
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000497LOCAL(int)
498element_add_subelement(ElementObject* self, PyObject* element)
499{
500 /* add a child element to a parent */
501
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300502 if (!Element_Check(element)) {
503 raise_type_error(element);
504 return -1;
505 }
506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000507 if (element_resize(self, 1) < 0)
508 return -1;
509
510 Py_INCREF(element);
511 self->extra->children[self->extra->length] = element;
512
513 self->extra->length++;
514
515 return 0;
516}
517
518LOCAL(PyObject*)
519element_get_attrib(ElementObject* self)
520{
521 /* return borrowed reference to attrib dictionary */
522 /* note: this function assumes that the extra section exists */
523
524 PyObject* res = self->extra->attrib;
525
526 if (res == Py_None) {
527 /* create missing dictionary */
528 res = PyDict_New();
529 if (!res)
530 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200531 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 self->extra->attrib = res;
533 }
534
535 return res;
536}
537
538LOCAL(PyObject*)
539element_get_text(ElementObject* self)
540{
541 /* return borrowed reference to text attribute */
542
Serhiy Storchaka576def02017-03-30 09:47:31 +0300543 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
545 if (JOIN_GET(res)) {
546 res = JOIN_OBJ(res);
547 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300548 PyObject *tmp = list_join(res);
549 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300551 self->text = tmp;
552 Py_DECREF(res);
553 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554 }
555 }
556
557 return res;
558}
559
560LOCAL(PyObject*)
561element_get_tail(ElementObject* self)
562{
563 /* return borrowed reference to text attribute */
564
Serhiy Storchaka576def02017-03-30 09:47:31 +0300565 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000566
567 if (JOIN_GET(res)) {
568 res = JOIN_OBJ(res);
569 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300570 PyObject *tmp = list_join(res);
571 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000572 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300573 self->tail = tmp;
574 Py_DECREF(res);
575 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576 }
577 }
578
579 return res;
580}
581
582static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300583subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584{
585 PyObject* elem;
586
587 ElementObject* parent;
588 PyObject* tag;
589 PyObject* attrib = NULL;
590 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
591 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800592 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000593 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800594 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595
Eli Bendersky737b1732012-05-29 06:02:56 +0300596 if (attrib) {
597 /* attrib passed as positional arg */
598 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 if (!attrib)
600 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600601 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
602 Py_DECREF(attrib);
603 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300604 }
605 } else if (kwds) {
606 /* have keyword args */
607 attrib = get_attrib_from_keywords(kwds);
608 if (!attrib)
609 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000610 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 Py_INCREF(Py_None);
613 attrib = Py_None;
614 }
615
Eli Bendersky092af1f2012-03-04 07:14:03 +0200616 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200618 if (elem == NULL)
619 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000621 if (element_add_subelement(parent, elem) < 0) {
622 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000624 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 return elem;
627}
628
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629static int
630element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
631{
632 Py_VISIT(self->tag);
633 Py_VISIT(JOIN_OBJ(self->text));
634 Py_VISIT(JOIN_OBJ(self->tail));
635
636 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200637 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300638 Py_VISIT(self->extra->attrib);
639
640 for (i = 0; i < self->extra->length; ++i)
641 Py_VISIT(self->extra->children[i]);
642 }
643 return 0;
644}
645
646static int
647element_gc_clear(ElementObject *self)
648{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700650 _clear_joined_ptr(&self->text);
651 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652
653 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300654 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300655 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300656 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657 return 0;
658}
659
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660static void
661element_dealloc(ElementObject* self)
662{
INADA Naokia6296d32017-08-24 14:55:17 +0900663 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200665 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300666
667 if (self->weakreflist != NULL)
668 PyObject_ClearWeakRefs((PyObject *) self);
669
Eli Bendersky0192ba32012-03-30 16:38:33 +0300670 /* element_gc_clear clears all references and deallocates extra
671 */
672 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200675 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200676 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677}
678
679/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
Serhiy Storchakacb985562015-05-04 15:32:48 +0300681/*[clinic input]
682_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000683
Serhiy Storchakacb985562015-05-04 15:32:48 +0300684 subelement: object(subclass_of='&Element_Type')
685 /
686
687[clinic start generated code]*/
688
689static PyObject *
690_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
691/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
692{
693 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000694 return NULL;
695
696 Py_RETURN_NONE;
697}
698
Serhiy Storchakacb985562015-05-04 15:32:48 +0300699/*[clinic input]
700_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701
Serhiy Storchakacb985562015-05-04 15:32:48 +0300702[clinic start generated code]*/
703
704static PyObject *
705_elementtree_Element_clear_impl(ElementObject *self)
706/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
707{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300708 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709
710 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300711 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300714 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 Py_RETURN_NONE;
717}
718
Serhiy Storchakacb985562015-05-04 15:32:48 +0300719/*[clinic input]
720_elementtree.Element.__copy__
721
722[clinic start generated code]*/
723
724static PyObject *
725_elementtree_Element___copy___impl(ElementObject *self)
726/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200728 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000729 ElementObject* element;
730
Eli Bendersky092af1f2012-03-04 07:14:03 +0200731 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800732 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733 if (!element)
734 return NULL;
735
Oren Milman39ecb9c2017-10-10 23:26:24 +0300736 Py_INCREF(JOIN_OBJ(self->text));
737 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000738
Oren Milman39ecb9c2017-10-10 23:26:24 +0300739 Py_INCREF(JOIN_OBJ(self->tail));
740 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300742 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000744 if (element_resize(element, self->extra->length) < 0) {
745 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000747 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 for (i = 0; i < self->extra->length; i++) {
750 Py_INCREF(self->extra->children[i]);
751 element->extra->children[i] = self->extra->children[i];
752 }
753
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300754 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000756 }
757
758 return (PyObject*) element;
759}
760
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200761/* Helper for a deep copy. */
762LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
763
Serhiy Storchakacb985562015-05-04 15:32:48 +0300764/*[clinic input]
765_elementtree.Element.__deepcopy__
766
Oren Milmand0568182017-09-12 17:39:15 +0300767 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300768 /
769
770[clinic start generated code]*/
771
772static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300773_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
774/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000775{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200776 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000777 ElementObject* element;
778 PyObject* tag;
779 PyObject* attrib;
780 PyObject* text;
781 PyObject* tail;
782 PyObject* id;
783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 tag = deepcopy(self->tag, memo);
785 if (!tag)
786 return NULL;
787
788 if (self->extra) {
789 attrib = deepcopy(self->extra->attrib, memo);
790 if (!attrib) {
791 Py_DECREF(tag);
792 return NULL;
793 }
794 } else {
795 Py_INCREF(Py_None);
796 attrib = Py_None;
797 }
798
Eli Bendersky092af1f2012-03-04 07:14:03 +0200799 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800
801 Py_DECREF(tag);
802 Py_DECREF(attrib);
803
804 if (!element)
805 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100806
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 text = deepcopy(JOIN_OBJ(self->text), memo);
808 if (!text)
809 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300810 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811
812 tail = deepcopy(JOIN_OBJ(self->tail), memo);
813 if (!tail)
814 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300815 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300817 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 if (element_resize(element, self->extra->length) < 0)
820 goto error;
821
822 for (i = 0; i < self->extra->length; i++) {
823 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300824 if (!child || !Element_Check(child)) {
825 if (child) {
826 raise_type_error(child);
827 Py_DECREF(child);
828 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000829 element->extra->length = i;
830 goto error;
831 }
832 element->extra->children[i] = child;
833 }
834
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300835 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 }
838
839 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700840 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000841 if (!id)
842 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843
844 i = PyDict_SetItem(memo, id, (PyObject*) element);
845
846 Py_DECREF(id);
847
848 if (i < 0)
849 goto error;
850
851 return (PyObject*) element;
852
853 error:
854 Py_DECREF(element);
855 return NULL;
856}
857
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200858LOCAL(PyObject *)
859deepcopy(PyObject *object, PyObject *memo)
860{
861 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200862 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200864
865 /* Fast paths */
866 if (object == Py_None || PyUnicode_CheckExact(object)) {
867 Py_INCREF(object);
868 return object;
869 }
870
871 if (Py_REFCNT(object) == 1) {
872 if (PyDict_CheckExact(object)) {
873 PyObject *key, *value;
874 Py_ssize_t pos = 0;
875 int simple = 1;
876 while (PyDict_Next(object, &pos, &key, &value)) {
877 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
878 simple = 0;
879 break;
880 }
881 }
882 if (simple)
883 return PyDict_Copy(object);
884 /* Fall through to general case */
885 }
886 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300887 return _elementtree_Element___deepcopy___impl(
888 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200889 }
890 }
891
892 /* General case */
893 st = ET_STATE_GLOBAL;
894 if (!st->deepcopy_obj) {
895 PyErr_SetString(PyExc_RuntimeError,
896 "deepcopy helper not found");
897 return NULL;
898 }
899
Victor Stinner7fbac452016-08-20 01:34:44 +0200900 stack[0] = object;
901 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200902 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200903}
904
905
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906/*[clinic input]
907_elementtree.Element.__sizeof__ -> Py_ssize_t
908
909[clinic start generated code]*/
910
911static Py_ssize_t
912_elementtree_Element___sizeof___impl(ElementObject *self)
913/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200914{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200915 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200916 if (self->extra) {
917 result += sizeof(ElementObjectExtra);
918 if (self->extra->children != self->extra->_children)
919 result += sizeof(PyObject*) * self->extra->allocated;
920 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300921 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200922}
923
Eli Bendersky698bdb22013-01-10 06:01:06 -0800924/* dict keys for getstate/setstate. */
925#define PICKLED_TAG "tag"
926#define PICKLED_CHILDREN "_children"
927#define PICKLED_ATTRIB "attrib"
928#define PICKLED_TAIL "tail"
929#define PICKLED_TEXT "text"
930
931/* __getstate__ returns a fabricated instance dict as in the pure-Python
932 * Element implementation, for interoperability/interchangeability. This
933 * makes the pure-Python implementation details an API, but (a) there aren't
934 * any unnecessary structures there; and (b) it buys compatibility with 3.2
935 * pickles. See issue #16076.
936 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300937/*[clinic input]
938_elementtree.Element.__getstate__
939
940[clinic start generated code]*/
941
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300943_elementtree_Element___getstate___impl(ElementObject *self)
944/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200946 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800947 PyObject *instancedict = NULL, *children;
948
949 /* Build a list of children. */
950 children = PyList_New(self->extra ? self->extra->length : 0);
951 if (!children)
952 return NULL;
953 for (i = 0; i < PyList_GET_SIZE(children); i++) {
954 PyObject *child = self->extra->children[i];
955 Py_INCREF(child);
956 PyList_SET_ITEM(children, i, child);
957 }
958
959 /* Construct the state object. */
960 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
961 if (noattrib)
962 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
963 PICKLED_TAG, self->tag,
964 PICKLED_CHILDREN, children,
965 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700966 PICKLED_TEXT, JOIN_OBJ(self->text),
967 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968 else
969 instancedict = Py_BuildValue("{sOsOsOsOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800975 if (instancedict) {
976 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800977 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800978 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800979 else {
980 for (i = 0; i < PyList_GET_SIZE(children); i++)
981 Py_DECREF(PyList_GET_ITEM(children, i));
982 Py_DECREF(children);
983
984 return NULL;
985 }
986}
987
988static PyObject *
989element_setstate_from_attributes(ElementObject *self,
990 PyObject *tag,
991 PyObject *attrib,
992 PyObject *text,
993 PyObject *tail,
994 PyObject *children)
995{
996 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300997 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800998
999 if (!tag) {
1000 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1001 return NULL;
1002 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001004 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001005 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001006
Oren Milman39ecb9c2017-10-10 23:26:24 +03001007 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1008 Py_INCREF(JOIN_OBJ(text));
1009 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Oren Milman39ecb9c2017-10-10 23:26:24 +03001011 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1012 Py_INCREF(JOIN_OBJ(tail));
1013 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014
1015 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001016 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001018 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001019
1020 /* Compute 'nchildren'. */
1021 if (children) {
1022 if (!PyList_Check(children)) {
1023 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1024 return NULL;
1025 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001026 nchildren = PyList_GET_SIZE(children);
1027
1028 /* (Re-)allocate 'extra'.
1029 Avoid DECREFs calling into this code again (cycles, etc.)
1030 */
1031 oldextra = self->extra;
1032 self->extra = NULL;
1033 if (element_resize(self, nchildren)) {
1034 assert(!self->extra || !self->extra->length);
1035 clear_extra(self);
1036 self->extra = oldextra;
1037 return NULL;
1038 }
1039 assert(self->extra);
1040 assert(self->extra->allocated >= nchildren);
1041 if (oldextra) {
1042 assert(self->extra->attrib == Py_None);
1043 self->extra->attrib = oldextra->attrib;
1044 oldextra->attrib = Py_None;
1045 }
1046
1047 /* Copy children */
1048 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001049 PyObject *child = PyList_GET_ITEM(children, i);
1050 if (!Element_Check(child)) {
1051 raise_type_error(child);
1052 self->extra->length = i;
1053 dealloc_extra(oldextra);
1054 return NULL;
1055 }
1056 Py_INCREF(child);
1057 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001058 }
1059
1060 assert(!self->extra->length);
1061 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001062 }
1063 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001064 if (element_resize(self, 0)) {
1065 return NULL;
1066 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067 }
1068
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 /* Stash attrib. */
1070 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001071 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001072 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001073 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001074 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001075
1076 Py_RETURN_NONE;
1077}
1078
1079/* __setstate__ for Element instance from the Python implementation.
1080 * 'state' should be the instance dict.
1081 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001082
Eli Bendersky698bdb22013-01-10 06:01:06 -08001083static PyObject *
1084element_setstate_from_Python(ElementObject *self, PyObject *state)
1085{
1086 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1087 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1088 PyObject *args;
1089 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001090 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001091
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092 tag = attrib = text = tail = children = NULL;
1093 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001094 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001095 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001096
1097 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1098 &attrib, &text, &tail, &children))
1099 retval = element_setstate_from_attributes(self, tag, attrib, text,
1100 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001101 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001102 retval = NULL;
1103
1104 Py_DECREF(args);
1105 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001106}
1107
Serhiy Storchakacb985562015-05-04 15:32:48 +03001108/*[clinic input]
1109_elementtree.Element.__setstate__
1110
1111 state: object
1112 /
1113
1114[clinic start generated code]*/
1115
Eli Bendersky698bdb22013-01-10 06:01:06 -08001116static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001117_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1118/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001119{
1120 if (!PyDict_CheckExact(state)) {
1121 PyErr_Format(PyExc_TypeError,
1122 "Don't know how to unpickle \"%.200R\" as an Element",
1123 state);
1124 return NULL;
1125 }
1126 else
1127 return element_setstate_from_Python(self, state);
1128}
1129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130LOCAL(int)
1131checkpath(PyObject* tag)
1132{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133 Py_ssize_t i;
1134 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135
1136 /* check if a tag contains an xpath character */
1137
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001138#define PATHCHAR(ch) \
1139 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1143 void *data = PyUnicode_DATA(tag);
1144 unsigned int kind = PyUnicode_KIND(tag);
1145 for (i = 0; i < len; i++) {
1146 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1147 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001150 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 return 1;
1153 }
1154 return 0;
1155 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001156 if (PyBytes_Check(tag)) {
1157 char *p = PyBytes_AS_STRING(tag);
1158 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159 if (p[i] == '{')
1160 check = 0;
1161 else if (p[i] == '}')
1162 check = 1;
1163 else if (check && PATHCHAR(p[i]))
1164 return 1;
1165 }
1166 return 0;
1167 }
1168
1169 return 1; /* unknown type; might be path expression */
1170}
1171
Serhiy Storchakacb985562015-05-04 15:32:48 +03001172/*[clinic input]
1173_elementtree.Element.extend
1174
1175 elements: object
1176 /
1177
1178[clinic start generated code]*/
1179
1180static PyObject *
1181_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1182/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001183{
1184 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001185 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001186
Serhiy Storchakacb985562015-05-04 15:32:48 +03001187 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188 if (!seq) {
1189 PyErr_Format(
1190 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001191 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001192 );
1193 return NULL;
1194 }
1195
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001196 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001198 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 if (element_add_subelement(self, element) < 0) {
1200 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001201 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202 return NULL;
1203 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001204 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001205 }
1206
1207 Py_DECREF(seq);
1208
1209 Py_RETURN_NONE;
1210}
1211
Serhiy Storchakacb985562015-05-04 15:32:48 +03001212/*[clinic input]
1213_elementtree.Element.find
1214
1215 path: object
1216 namespaces: object = None
1217
1218[clinic start generated code]*/
1219
1220static PyObject *
1221_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1222 PyObject *namespaces)
1223/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001225 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001226 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001227
Serhiy Storchakacb985562015-05-04 15:32:48 +03001228 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001229 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001230 return _PyObject_CallMethodIdObjArgs(
1231 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001233 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001234
1235 if (!self->extra)
1236 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001237
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 for (i = 0; i < self->extra->length; i++) {
1239 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001241 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001243 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
1247 if (rc < 0)
1248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 }
1250
1251 Py_RETURN_NONE;
1252}
1253
Serhiy Storchakacb985562015-05-04 15:32:48 +03001254/*[clinic input]
1255_elementtree.Element.findtext
1256
1257 path: object
1258 default: object = None
1259 namespaces: object = None
1260
1261[clinic start generated code]*/
1262
1263static PyObject *
1264_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1265 PyObject *default_value,
1266 PyObject *namespaces)
1267/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001268{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001269 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001270 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001271 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001272
Serhiy Storchakacb985562015-05-04 15:32:48 +03001273 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001274 return _PyObject_CallMethodIdObjArgs(
1275 st->elementpath_obj, &PyId_findtext,
1276 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 );
1278
1279 if (!self->extra) {
1280 Py_INCREF(default_value);
1281 return default_value;
1282 }
1283
1284 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001285 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001286 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001287 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001288 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001289 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001290 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001291 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 if (text == Py_None) {
1293 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001294 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001296 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 return text;
1299 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001300 Py_DECREF(item);
1301 if (rc < 0)
1302 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 Py_INCREF(default_value);
1306 return default_value;
1307}
1308
Serhiy Storchakacb985562015-05-04 15:32:48 +03001309/*[clinic input]
1310_elementtree.Element.findall
1311
1312 path: object
1313 namespaces: object = None
1314
1315[clinic start generated code]*/
1316
1317static PyObject *
1318_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1319 PyObject *namespaces)
1320/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001322 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001324 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001325
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001326 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001327 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001328 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001329 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001330 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001331 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001332
1333 out = PyList_New(0);
1334 if (!out)
1335 return NULL;
1336
1337 if (!self->extra)
1338 return out;
1339
1340 for (i = 0; i < self->extra->length; i++) {
1341 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001342 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001343 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001344 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001345 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001346 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1347 Py_DECREF(item);
1348 Py_DECREF(out);
1349 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001351 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352 }
1353
1354 return out;
1355}
1356
Serhiy Storchakacb985562015-05-04 15:32:48 +03001357/*[clinic input]
1358_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001359
Serhiy Storchakacb985562015-05-04 15:32:48 +03001360 path: object
1361 namespaces: object = None
1362
1363[clinic start generated code]*/
1364
1365static PyObject *
1366_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1367 PyObject *namespaces)
1368/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1369{
1370 PyObject* tag = path;
1371 _Py_IDENTIFIER(iterfind);
1372 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001373
Victor Stinnerf5616342016-12-09 15:26:00 +01001374 return _PyObject_CallMethodIdObjArgs(
1375 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001376}
1377
Serhiy Storchakacb985562015-05-04 15:32:48 +03001378/*[clinic input]
1379_elementtree.Element.get
1380
1381 key: object
1382 default: object = None
1383
1384[clinic start generated code]*/
1385
1386static PyObject *
1387_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1388 PyObject *default_value)
1389/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001390{
1391 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001392
1393 if (!self->extra || self->extra->attrib == Py_None)
1394 value = default_value;
1395 else {
1396 value = PyDict_GetItem(self->extra->attrib, key);
1397 if (!value)
1398 value = default_value;
1399 }
1400
1401 Py_INCREF(value);
1402 return value;
1403}
1404
Serhiy Storchakacb985562015-05-04 15:32:48 +03001405/*[clinic input]
1406_elementtree.Element.getchildren
1407
1408[clinic start generated code]*/
1409
1410static PyObject *
1411_elementtree_Element_getchildren_impl(ElementObject *self)
1412/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001413{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001414 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001415 PyObject* list;
1416
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001417 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1418 "This method will be removed in future versions. "
1419 "Use 'list(elem)' or iteration over elem instead.",
1420 1) < 0) {
1421 return NULL;
1422 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424 if (!self->extra)
1425 return PyList_New(0);
1426
1427 list = PyList_New(self->extra->length);
1428 if (!list)
1429 return NULL;
1430
1431 for (i = 0; i < self->extra->length; i++) {
1432 PyObject* item = self->extra->children[i];
1433 Py_INCREF(item);
1434 PyList_SET_ITEM(list, i, item);
1435 }
1436
1437 return list;
1438}
1439
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001440
Eli Bendersky64d11e62012-06-15 07:42:50 +03001441static PyObject *
1442create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1443
1444
Serhiy Storchakacb985562015-05-04 15:32:48 +03001445/*[clinic input]
1446_elementtree.Element.iter
1447
1448 tag: object = None
1449
1450[clinic start generated code]*/
1451
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001453_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1454/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001455{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001456 if (PyUnicode_Check(tag)) {
1457 if (PyUnicode_READY(tag) < 0)
1458 return NULL;
1459 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1460 tag = Py_None;
1461 }
1462 else if (PyBytes_Check(tag)) {
1463 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1464 tag = Py_None;
1465 }
1466
Eli Bendersky64d11e62012-06-15 07:42:50 +03001467 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468}
1469
1470
Serhiy Storchakacb985562015-05-04 15:32:48 +03001471/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001472_elementtree.Element.getiterator
1473
1474 tag: object = None
1475
1476[clinic start generated code]*/
1477
1478static PyObject *
1479_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1480/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1481{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001482 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001483 "This method will be removed in future versions. "
1484 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1485 1) < 0) {
1486 return NULL;
1487 }
1488 return _elementtree_Element_iter_impl(self, tag);
1489}
1490
1491
1492/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001493_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495[clinic start generated code]*/
1496
1497static PyObject *
1498_elementtree_Element_itertext_impl(ElementObject *self)
1499/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1500{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001501 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502}
1503
Eli Bendersky64d11e62012-06-15 07:42:50 +03001504
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001506element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001508 ElementObject* self = (ElementObject*) self_;
1509
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 if (!self->extra || index < 0 || index >= self->extra->length) {
1511 PyErr_SetString(
1512 PyExc_IndexError,
1513 "child index out of range"
1514 );
1515 return NULL;
1516 }
1517
1518 Py_INCREF(self->extra->children[index]);
1519 return self->extra->children[index];
1520}
1521
Serhiy Storchakacb985562015-05-04 15:32:48 +03001522/*[clinic input]
1523_elementtree.Element.insert
1524
1525 index: Py_ssize_t
1526 subelement: object(subclass_of='&Element_Type')
1527 /
1528
1529[clinic start generated code]*/
1530
1531static PyObject *
1532_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1533 PyObject *subelement)
1534/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001536 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537
Victor Stinner5f0af232013-07-11 23:01:36 +02001538 if (!self->extra) {
1539 if (create_extra(self, NULL) < 0)
1540 return NULL;
1541 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001543 if (index < 0) {
1544 index += self->extra->length;
1545 if (index < 0)
1546 index = 0;
1547 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548 if (index > self->extra->length)
1549 index = self->extra->length;
1550
1551 if (element_resize(self, 1) < 0)
1552 return NULL;
1553
1554 for (i = self->extra->length; i > index; i--)
1555 self->extra->children[i] = self->extra->children[i-1];
1556
Serhiy Storchakacb985562015-05-04 15:32:48 +03001557 Py_INCREF(subelement);
1558 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559
1560 self->extra->length++;
1561
1562 Py_RETURN_NONE;
1563}
1564
Serhiy Storchakacb985562015-05-04 15:32:48 +03001565/*[clinic input]
1566_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567
Serhiy Storchakacb985562015-05-04 15:32:48 +03001568[clinic start generated code]*/
1569
1570static PyObject *
1571_elementtree_Element_items_impl(ElementObject *self)
1572/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1573{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 if (!self->extra || self->extra->attrib == Py_None)
1575 return PyList_New(0);
1576
1577 return PyDict_Items(self->extra->attrib);
1578}
1579
Serhiy Storchakacb985562015-05-04 15:32:48 +03001580/*[clinic input]
1581_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582
Serhiy Storchakacb985562015-05-04 15:32:48 +03001583[clinic start generated code]*/
1584
1585static PyObject *
1586_elementtree_Element_keys_impl(ElementObject *self)
1587/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1588{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 if (!self->extra || self->extra->attrib == Py_None)
1590 return PyList_New(0);
1591
1592 return PyDict_Keys(self->extra->attrib);
1593}
1594
Martin v. Löwis18e16552006-02-15 17:27:45 +00001595static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596element_length(ElementObject* self)
1597{
1598 if (!self->extra)
1599 return 0;
1600
1601 return self->extra->length;
1602}
1603
Serhiy Storchakacb985562015-05-04 15:32:48 +03001604/*[clinic input]
1605_elementtree.Element.makeelement
1606
1607 tag: object
1608 attrib: object
1609 /
1610
1611[clinic start generated code]*/
1612
1613static PyObject *
1614_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1615 PyObject *attrib)
1616/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617{
1618 PyObject* elem;
1619
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620 attrib = PyDict_Copy(attrib);
1621 if (!attrib)
1622 return NULL;
1623
Eli Bendersky092af1f2012-03-04 07:14:03 +02001624 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625
1626 Py_DECREF(attrib);
1627
1628 return elem;
1629}
1630
Serhiy Storchakacb985562015-05-04 15:32:48 +03001631/*[clinic input]
1632_elementtree.Element.remove
1633
1634 subelement: object(subclass_of='&Element_Type')
1635 /
1636
1637[clinic start generated code]*/
1638
1639static PyObject *
1640_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1641/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001643 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001644 int rc;
1645 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001646
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001647 if (!self->extra) {
1648 /* element has no children, so raise exception */
1649 PyErr_SetString(
1650 PyExc_ValueError,
1651 "list.remove(x): x not in list"
1652 );
1653 return NULL;
1654 }
1655
1656 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001657 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001659 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001660 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001662 if (rc < 0)
1663 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664 }
1665
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001666 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001667 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 PyErr_SetString(
1669 PyExc_ValueError,
1670 "list.remove(x): x not in list"
1671 );
1672 return NULL;
1673 }
1674
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001675 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001676
1677 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678 for (; i < self->extra->length; i++)
1679 self->extra->children[i] = self->extra->children[i+1];
1680
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001681 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 Py_RETURN_NONE;
1683}
1684
1685static PyObject*
1686element_repr(ElementObject* self)
1687{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001688 int status;
1689
1690 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001691 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001692
1693 status = Py_ReprEnter((PyObject *)self);
1694 if (status == 0) {
1695 PyObject *res;
1696 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1697 Py_ReprLeave((PyObject *)self);
1698 return res;
1699 }
1700 if (status > 0)
1701 PyErr_Format(PyExc_RuntimeError,
1702 "reentrant call inside %s.__repr__",
1703 Py_TYPE(self)->tp_name);
1704 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001705}
1706
Serhiy Storchakacb985562015-05-04 15:32:48 +03001707/*[clinic input]
1708_elementtree.Element.set
1709
1710 key: object
1711 value: object
1712 /
1713
1714[clinic start generated code]*/
1715
1716static PyObject *
1717_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1718 PyObject *value)
1719/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001720{
1721 PyObject* attrib;
1722
Victor Stinner5f0af232013-07-11 23:01:36 +02001723 if (!self->extra) {
1724 if (create_extra(self, NULL) < 0)
1725 return NULL;
1726 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727
1728 attrib = element_get_attrib(self);
1729 if (!attrib)
1730 return NULL;
1731
1732 if (PyDict_SetItem(attrib, key, value) < 0)
1733 return NULL;
1734
1735 Py_RETURN_NONE;
1736}
1737
1738static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001739element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001741 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 PyObject* old;
1744
1745 if (!self->extra || index < 0 || index >= self->extra->length) {
1746 PyErr_SetString(
1747 PyExc_IndexError,
1748 "child assignment index out of range");
1749 return -1;
1750 }
1751
1752 old = self->extra->children[index];
1753
1754 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001755 if (!Element_Check(item)) {
1756 raise_type_error(item);
1757 return -1;
1758 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001759 Py_INCREF(item);
1760 self->extra->children[index] = item;
1761 } else {
1762 self->extra->length--;
1763 for (i = index; i < self->extra->length; i++)
1764 self->extra->children[i] = self->extra->children[i+1];
1765 }
1766
1767 Py_DECREF(old);
1768
1769 return 0;
1770}
1771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772static PyObject*
1773element_subscr(PyObject* self_, PyObject* item)
1774{
1775 ElementObject* self = (ElementObject*) self_;
1776
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777 if (PyIndex_Check(item)) {
1778 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779
1780 if (i == -1 && PyErr_Occurred()) {
1781 return NULL;
1782 }
1783 if (i < 0 && self->extra)
1784 i += self->extra->length;
1785 return element_getitem(self_, i);
1786 }
1787 else if (PySlice_Check(item)) {
1788 Py_ssize_t start, stop, step, slicelen, cur, i;
1789 PyObject* list;
1790
1791 if (!self->extra)
1792 return PyList_New(0);
1793
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001794 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795 return NULL;
1796 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001797 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1798 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799
1800 if (slicelen <= 0)
1801 return PyList_New(0);
1802 else {
1803 list = PyList_New(slicelen);
1804 if (!list)
1805 return NULL;
1806
1807 for (cur = start, i = 0; i < slicelen;
1808 cur += step, i++) {
1809 PyObject* item = self->extra->children[cur];
1810 Py_INCREF(item);
1811 PyList_SET_ITEM(list, i, item);
1812 }
1813
1814 return list;
1815 }
1816 }
1817 else {
1818 PyErr_SetString(PyExc_TypeError,
1819 "element indices must be integers");
1820 return NULL;
1821 }
1822}
1823
1824static int
1825element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1826{
1827 ElementObject* self = (ElementObject*) self_;
1828
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001829 if (PyIndex_Check(item)) {
1830 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831
1832 if (i == -1 && PyErr_Occurred()) {
1833 return -1;
1834 }
1835 if (i < 0 && self->extra)
1836 i += self->extra->length;
1837 return element_setitem(self_, i, value);
1838 }
1839 else if (PySlice_Check(item)) {
1840 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1841
1842 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001843 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001844
Victor Stinner5f0af232013-07-11 23:01:36 +02001845 if (!self->extra) {
1846 if (create_extra(self, NULL) < 0)
1847 return -1;
1848 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001850 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001851 return -1;
1852 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001853 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1854 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
Eli Bendersky865756a2012-03-09 13:38:15 +02001856 if (value == NULL) {
1857 /* Delete slice */
1858 size_t cur;
1859 Py_ssize_t i;
1860
1861 if (slicelen <= 0)
1862 return 0;
1863
1864 /* Since we're deleting, the direction of the range doesn't matter,
1865 * so for simplicity make it always ascending.
1866 */
1867 if (step < 0) {
1868 stop = start + 1;
1869 start = stop + step * (slicelen - 1) - 1;
1870 step = -step;
1871 }
1872
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001873 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001874
1875 /* recycle is a list that will contain all the children
1876 * scheduled for removal.
1877 */
1878 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001879 return -1;
1880 }
1881
1882 /* This loop walks over all the children that have to be deleted,
1883 * with cur pointing at them. num_moved is the amount of children
1884 * until the next deleted child that have to be "shifted down" to
1885 * occupy the deleted's places.
1886 * Note that in the ith iteration, shifting is done i+i places down
1887 * because i children were already removed.
1888 */
1889 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1890 /* Compute how many children have to be moved, clipping at the
1891 * list end.
1892 */
1893 Py_ssize_t num_moved = step - 1;
1894 if (cur + step >= (size_t)self->extra->length) {
1895 num_moved = self->extra->length - cur - 1;
1896 }
1897
1898 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1899
1900 memmove(
1901 self->extra->children + cur - i,
1902 self->extra->children + cur + 1,
1903 num_moved * sizeof(PyObject *));
1904 }
1905
1906 /* Leftover "tail" after the last removed child */
1907 cur = start + (size_t)slicelen * step;
1908 if (cur < (size_t)self->extra->length) {
1909 memmove(
1910 self->extra->children + cur - slicelen,
1911 self->extra->children + cur,
1912 (self->extra->length - cur) * sizeof(PyObject *));
1913 }
1914
1915 self->extra->length -= slicelen;
1916
1917 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001918 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001919 return 0;
1920 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001921
1922 /* A new slice is actually being assigned */
1923 seq = PySequence_Fast(value, "");
1924 if (!seq) {
1925 PyErr_Format(
1926 PyExc_TypeError,
1927 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1928 );
1929 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001930 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001931 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001932
1933 if (step != 1 && newlen != slicelen)
1934 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001935 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001936 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001937 "attempt to assign sequence of size %zd "
1938 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001939 newlen, slicelen
1940 );
1941 return -1;
1942 }
1943
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944 /* Resize before creating the recycle bin, to prevent refleaks. */
1945 if (newlen > slicelen) {
1946 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001947 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001948 return -1;
1949 }
1950 }
1951
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001952 for (i = 0; i < newlen; i++) {
1953 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1954 if (!Element_Check(element)) {
1955 raise_type_error(element);
1956 Py_DECREF(seq);
1957 return -1;
1958 }
1959 }
1960
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001961 if (slicelen > 0) {
1962 /* to avoid recursive calls to this method (via decref), move
1963 old items to the recycle bin here, and get rid of them when
1964 we're done modifying the element */
1965 recycle = PyList_New(slicelen);
1966 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001967 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001968 return -1;
1969 }
1970 for (cur = start, i = 0; i < slicelen;
1971 cur += step, i++)
1972 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1973 }
1974
1975 if (newlen < slicelen) {
1976 /* delete slice */
1977 for (i = stop; i < self->extra->length; i++)
1978 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1979 } else if (newlen > slicelen) {
1980 /* insert slice */
1981 for (i = self->extra->length-1; i >= stop; i--)
1982 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1983 }
1984
1985 /* replace the slice */
1986 for (cur = start, i = 0; i < newlen;
1987 cur += step, i++) {
1988 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1989 Py_INCREF(element);
1990 self->extra->children[cur] = element;
1991 }
1992
1993 self->extra->length += newlen - slicelen;
1994
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001995 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001996
1997 /* discard the recycle bin, and everything in it */
1998 Py_XDECREF(recycle);
1999
2000 return 0;
2001 }
2002 else {
2003 PyErr_SetString(PyExc_TypeError,
2004 "element indices must be integers");
2005 return -1;
2006 }
2007}
2008
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002009static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002010element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002011{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002012 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002013 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002014 return res;
2015}
2016
Serhiy Storchakadde08152015-11-25 15:28:13 +02002017static PyObject*
2018element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002020 PyObject *res = element_get_text(self);
2021 Py_XINCREF(res);
2022 return res;
2023}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002024
Serhiy Storchakadde08152015-11-25 15:28:13 +02002025static PyObject*
2026element_tail_getter(ElementObject *self, void *closure)
2027{
2028 PyObject *res = element_get_tail(self);
2029 Py_XINCREF(res);
2030 return res;
2031}
2032
2033static PyObject*
2034element_attrib_getter(ElementObject *self, void *closure)
2035{
2036 PyObject *res;
2037 if (!self->extra) {
2038 if (create_extra(self, NULL) < 0)
2039 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002040 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002041 res = element_get_attrib(self);
2042 Py_XINCREF(res);
2043 return res;
2044}
Victor Stinner4d463432013-07-11 23:05:03 +02002045
Serhiy Storchakadde08152015-11-25 15:28:13 +02002046/* macro for setter validation */
2047#define _VALIDATE_ATTR_VALUE(V) \
2048 if ((V) == NULL) { \
2049 PyErr_SetString( \
2050 PyExc_AttributeError, \
2051 "can't delete element attribute"); \
2052 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002053 }
2054
Serhiy Storchakadde08152015-11-25 15:28:13 +02002055static int
2056element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2057{
2058 _VALIDATE_ATTR_VALUE(value);
2059 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002060 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002061 return 0;
2062}
2063
2064static int
2065element_text_setter(ElementObject *self, PyObject *value, void *closure)
2066{
2067 _VALIDATE_ATTR_VALUE(value);
2068 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002069 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002070 return 0;
2071}
2072
2073static int
2074element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2075{
2076 _VALIDATE_ATTR_VALUE(value);
2077 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002078 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002079 return 0;
2080}
2081
2082static int
2083element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2084{
2085 _VALIDATE_ATTR_VALUE(value);
2086 if (!self->extra) {
2087 if (create_extra(self, NULL) < 0)
2088 return -1;
2089 }
2090 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002091 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002092 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002093}
2094
2095static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002096 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097 0, /* sq_concat */
2098 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002099 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002100 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002101 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002102 0,
2103};
2104
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105/******************************* Element iterator ****************************/
2106
2107/* ElementIterObject represents the iteration state over an XML element in
2108 * pre-order traversal. To keep track of which sub-element should be returned
2109 * next, a stack of parents is maintained. This is a standard stack-based
2110 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002111 * The stack is managed using a continuous array.
2112 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113 * the current one is exhausted, and the next child to examine in that parent.
2114 */
2115typedef struct ParentLocator_t {
2116 ElementObject *parent;
2117 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118} ParentLocator;
2119
2120typedef struct {
2121 PyObject_HEAD
2122 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002123 Py_ssize_t parent_stack_used;
2124 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 ElementObject *root_element;
2126 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002127 int gettext;
2128} ElementIterObject;
2129
2130
2131static void
2132elementiter_dealloc(ElementIterObject *it)
2133{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 Py_ssize_t i = it->parent_stack_used;
2135 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002136 /* bpo-31095: UnTrack is needed before calling any callbacks */
2137 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002138 while (i--)
2139 Py_XDECREF(it->parent_stack[i].parent);
2140 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141
2142 Py_XDECREF(it->sought_tag);
2143 Py_XDECREF(it->root_element);
2144
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145 PyObject_GC_Del(it);
2146}
2147
2148static int
2149elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2150{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151 Py_ssize_t i = it->parent_stack_used;
2152 while (i--)
2153 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154
2155 Py_VISIT(it->root_element);
2156 Py_VISIT(it->sought_tag);
2157 return 0;
2158}
2159
2160/* Helper function for elementiter_next. Add a new parent to the parent stack.
2161 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002162static int
2163parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002164{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002165 ParentLocator *item;
2166
2167 if (it->parent_stack_used >= it->parent_stack_size) {
2168 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2169 ParentLocator *parent_stack = it->parent_stack;
2170 PyMem_Resize(parent_stack, ParentLocator, new_size);
2171 if (parent_stack == NULL)
2172 return -1;
2173 it->parent_stack = parent_stack;
2174 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002175 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 item = it->parent_stack + it->parent_stack_used++;
2177 Py_INCREF(parent);
2178 item->parent = parent;
2179 item->child_index = 0;
2180 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002181}
2182
2183static PyObject *
2184elementiter_next(ElementIterObject *it)
2185{
2186 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002187 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 * A short note on gettext: this function serves both the iter() and
2189 * itertext() methods to avoid code duplication. However, there are a few
2190 * small differences in the way these iterations work. Namely:
2191 * - itertext() only yields text from nodes that have it, and continues
2192 * iterating when a node doesn't have text (so it doesn't return any
2193 * node like iter())
2194 * - itertext() also has to handle tail, after finishing with all the
2195 * children of a node.
2196 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002197 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002198 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002199 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200
2201 while (1) {
2202 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002203 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002204 * iterator is exhausted.
2205 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002206 if (!it->parent_stack_used) {
2207 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002208 PyErr_SetNone(PyExc_StopIteration);
2209 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002210 }
2211
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002212 elem = it->root_element; /* steals a reference */
2213 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002214 }
2215 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002216 /* See if there are children left to traverse in the current parent. If
2217 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002218 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002219 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2220 Py_ssize_t child_index = item->child_index;
2221 ElementObjectExtra *extra;
2222 elem = item->parent;
2223 extra = elem->extra;
2224 if (!extra || child_index >= extra->length) {
2225 it->parent_stack_used--;
2226 /* Note that extra condition on it->parent_stack_used here;
2227 * this is because itertext() is supposed to only return *inner*
2228 * text, not text following the element it began iteration with.
2229 */
2230 if (it->gettext && it->parent_stack_used) {
2231 text = element_get_tail(elem);
2232 goto gettext;
2233 }
2234 Py_DECREF(elem);
2235 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002236 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002237
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002238 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002239 elem = (ElementObject *)extra->children[child_index];
2240 item->child_index++;
2241 Py_INCREF(elem);
2242 }
2243
2244 if (parent_stack_push_new(it, elem) < 0) {
2245 Py_DECREF(elem);
2246 PyErr_NoMemory();
2247 return NULL;
2248 }
2249 if (it->gettext) {
2250 text = element_get_text(elem);
2251 goto gettext;
2252 }
2253
2254 if (it->sought_tag == Py_None)
2255 return (PyObject *)elem;
2256
2257 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2258 if (rc > 0)
2259 return (PyObject *)elem;
2260
2261 Py_DECREF(elem);
2262 if (rc < 0)
2263 return NULL;
2264 continue;
2265
2266gettext:
2267 if (!text) {
2268 Py_DECREF(elem);
2269 return NULL;
2270 }
2271 if (text == Py_None) {
2272 Py_DECREF(elem);
2273 }
2274 else {
2275 Py_INCREF(text);
2276 Py_DECREF(elem);
2277 rc = PyObject_IsTrue(text);
2278 if (rc > 0)
2279 return text;
2280 Py_DECREF(text);
2281 if (rc < 0)
2282 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002283 }
2284 }
2285
2286 return NULL;
2287}
2288
2289
2290static PyTypeObject ElementIter_Type = {
2291 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002292 /* Using the module's name since the pure-Python implementation does not
2293 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 "_elementtree._element_iterator", /* tp_name */
2295 sizeof(ElementIterObject), /* tp_basicsize */
2296 0, /* tp_itemsize */
2297 /* methods */
2298 (destructor)elementiter_dealloc, /* tp_dealloc */
2299 0, /* tp_print */
2300 0, /* tp_getattr */
2301 0, /* tp_setattr */
2302 0, /* tp_reserved */
2303 0, /* tp_repr */
2304 0, /* tp_as_number */
2305 0, /* tp_as_sequence */
2306 0, /* tp_as_mapping */
2307 0, /* tp_hash */
2308 0, /* tp_call */
2309 0, /* tp_str */
2310 0, /* tp_getattro */
2311 0, /* tp_setattro */
2312 0, /* tp_as_buffer */
2313 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2314 0, /* tp_doc */
2315 (traverseproc)elementiter_traverse, /* tp_traverse */
2316 0, /* tp_clear */
2317 0, /* tp_richcompare */
2318 0, /* tp_weaklistoffset */
2319 PyObject_SelfIter, /* tp_iter */
2320 (iternextfunc)elementiter_next, /* tp_iternext */
2321 0, /* tp_methods */
2322 0, /* tp_members */
2323 0, /* tp_getset */
2324 0, /* tp_base */
2325 0, /* tp_dict */
2326 0, /* tp_descr_get */
2327 0, /* tp_descr_set */
2328 0, /* tp_dictoffset */
2329 0, /* tp_init */
2330 0, /* tp_alloc */
2331 0, /* tp_new */
2332};
2333
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002334#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002335
2336static PyObject *
2337create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2338{
2339 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002340
2341 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2342 if (!it)
2343 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002344
Victor Stinner4d463432013-07-11 23:05:03 +02002345 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002346 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002347 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002348 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002349 it->root_element = self;
2350
Eli Bendersky64d11e62012-06-15 07:42:50 +03002351 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002352
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002353 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002354 if (it->parent_stack == NULL) {
2355 Py_DECREF(it);
2356 PyErr_NoMemory();
2357 return NULL;
2358 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002359 it->parent_stack_used = 0;
2360 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002361
Eli Bendersky64d11e62012-06-15 07:42:50 +03002362 return (PyObject *)it;
2363}
2364
2365
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366/* ==================================================================== */
2367/* the tree builder type */
2368
2369typedef struct {
2370 PyObject_HEAD
2371
Eli Bendersky58d548d2012-05-29 15:45:16 +03002372 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373
Antoine Pitrouee329312012-10-04 19:53:29 +02002374 PyObject *this; /* current node */
2375 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002376
Eli Bendersky58d548d2012-05-29 15:45:16 +03002377 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378
Eli Bendersky58d548d2012-05-29 15:45:16 +03002379 PyObject *stack; /* element stack */
2380 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 PyObject *element_factory;
2383
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002385 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002386 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2387 PyObject *end_event_obj;
2388 PyObject *start_ns_event_obj;
2389 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390} TreeBuilderObject;
2391
Christian Heimes90aa7642007-12-19 02:45:37 +00002392#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393
2394/* -------------------------------------------------------------------- */
2395/* constructor and destructor */
2396
Eli Bendersky58d548d2012-05-29 15:45:16 +03002397static PyObject *
2398treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2401 if (t != NULL) {
2402 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403
Eli Bendersky58d548d2012-05-29 15:45:16 +03002404 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002405 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002406 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002407 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408
Eli Bendersky58d548d2012-05-29 15:45:16 +03002409 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002410 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002411 t->stack = PyList_New(20);
2412 if (!t->stack) {
2413 Py_DECREF(t->this);
2414 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002415 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002416 return NULL;
2417 }
2418 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002420 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002421 t->start_event_obj = t->end_event_obj = NULL;
2422 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2423 }
2424 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425}
2426
Serhiy Storchakacb985562015-05-04 15:32:48 +03002427/*[clinic input]
2428_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002429
Serhiy Storchakacb985562015-05-04 15:32:48 +03002430 element_factory: object = NULL
2431
2432[clinic start generated code]*/
2433
2434static int
2435_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2436 PyObject *element_factory)
2437/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2438{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002439 if (element_factory) {
2440 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002441 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002442 }
2443
Eli Bendersky58d548d2012-05-29 15:45:16 +03002444 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002445}
2446
Eli Bendersky48d358b2012-05-30 17:57:50 +03002447static int
2448treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2449{
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002450 Py_VISIT(self->end_ns_event_obj);
2451 Py_VISIT(self->start_ns_event_obj);
2452 Py_VISIT(self->end_event_obj);
2453 Py_VISIT(self->start_event_obj);
2454 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002455 Py_VISIT(self->root);
2456 Py_VISIT(self->this);
2457 Py_VISIT(self->last);
2458 Py_VISIT(self->data);
2459 Py_VISIT(self->stack);
2460 Py_VISIT(self->element_factory);
2461 return 0;
2462}
2463
2464static int
2465treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002467 Py_CLEAR(self->end_ns_event_obj);
2468 Py_CLEAR(self->start_ns_event_obj);
2469 Py_CLEAR(self->end_event_obj);
2470 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002471 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002472 Py_CLEAR(self->stack);
2473 Py_CLEAR(self->data);
2474 Py_CLEAR(self->last);
2475 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002476 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002477 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002478 return 0;
2479}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480
Eli Bendersky48d358b2012-05-30 17:57:50 +03002481static void
2482treebuilder_dealloc(TreeBuilderObject *self)
2483{
2484 PyObject_GC_UnTrack(self);
2485 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002486 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487}
2488
2489/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002490/* helpers for handling of arbitrary element-like objects */
2491
2492static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002493treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002494 PyObject **dest, _Py_Identifier *name)
2495{
2496 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002497 PyObject *tmp = JOIN_OBJ(*dest);
2498 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2499 *data = NULL;
2500 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002501 return 0;
2502 }
2503 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002504 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002505 int r;
2506 if (joined == NULL)
2507 return -1;
2508 r = _PyObject_SetAttrId(element, name, joined);
2509 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002510 if (r < 0)
2511 return -1;
2512 Py_CLEAR(*data);
2513 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002514 }
2515}
2516
Serhiy Storchaka576def02017-03-30 09:47:31 +03002517LOCAL(int)
2518treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002519{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002520 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002521
Serhiy Storchaka576def02017-03-30 09:47:31 +03002522 if (!self->data) {
2523 return 0;
2524 }
2525
2526 if (self->this == element) {
2527 _Py_IDENTIFIER(text);
2528 return treebuilder_set_element_text_or_tail(
2529 element, &self->data,
2530 &((ElementObject *) element)->text, &PyId_text);
2531 }
2532 else {
2533 _Py_IDENTIFIER(tail);
2534 return treebuilder_set_element_text_or_tail(
2535 element, &self->data,
2536 &((ElementObject *) element)->tail, &PyId_tail);
2537 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002538}
2539
2540static int
2541treebuilder_add_subelement(PyObject *element, PyObject *child)
2542{
2543 _Py_IDENTIFIER(append);
2544 if (Element_CheckExact(element)) {
2545 ElementObject *elem = (ElementObject *) element;
2546 return element_add_subelement(elem, child);
2547 }
2548 else {
2549 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002550 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002551 if (res == NULL)
2552 return -1;
2553 Py_DECREF(res);
2554 return 0;
2555 }
2556}
2557
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002558LOCAL(int)
2559treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2560 PyObject *node)
2561{
2562 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002563 PyObject *res;
2564 PyObject *event = PyTuple_Pack(2, action, node);
2565 if (event == NULL)
2566 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002567 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002568 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002569 if (res == NULL)
2570 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002571 Py_DECREF(res);
2572 }
2573 return 0;
2574}
2575
Antoine Pitrouee329312012-10-04 19:53:29 +02002576/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577/* handlers */
2578
2579LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2581 PyObject* attrib)
2582{
2583 PyObject* node;
2584 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002585 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586
Serhiy Storchaka576def02017-03-30 09:47:31 +03002587 if (treebuilder_flush_data(self) < 0) {
2588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589 }
2590
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002591 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002592 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002593 } else if (attrib == Py_None) {
2594 attrib = PyDict_New();
2595 if (!attrib)
2596 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002597 node = PyObject_CallFunctionObjArgs(self->element_factory,
2598 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002599 Py_DECREF(attrib);
2600 }
2601 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002602 node = PyObject_CallFunctionObjArgs(self->element_factory,
2603 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002604 }
2605 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002607 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608
Antoine Pitrouee329312012-10-04 19:53:29 +02002609 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610
2611 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002612 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002613 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614 } else {
2615 if (self->root) {
2616 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002617 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618 "multiple elements on top level"
2619 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002620 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 }
2622 Py_INCREF(node);
2623 self->root = node;
2624 }
2625
2626 if (self->index < PyList_GET_SIZE(self->stack)) {
2627 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002628 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 Py_INCREF(this);
2630 } else {
2631 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002632 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633 }
2634 self->index++;
2635
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002637 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002639 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002641 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2642 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643
2644 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002645
2646 error:
2647 Py_DECREF(node);
2648 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649}
2650
2651LOCAL(PyObject*)
2652treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2653{
2654 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002655 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002656 /* ignore calls to data before the first call to start */
2657 Py_RETURN_NONE;
2658 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659 /* store the first item as is */
2660 Py_INCREF(data); self->data = data;
2661 } else {
2662 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002663 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2664 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002665 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 /* expat often generates single character data sections; handle
2667 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002668 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2669 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002670 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002671 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672 } else if (PyList_CheckExact(self->data)) {
2673 if (PyList_Append(self->data, data) < 0)
2674 return NULL;
2675 } else {
2676 PyObject* list = PyList_New(2);
2677 if (!list)
2678 return NULL;
2679 PyList_SET_ITEM(list, 0, self->data);
2680 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2681 self->data = list;
2682 }
2683 }
2684
2685 Py_RETURN_NONE;
2686}
2687
2688LOCAL(PyObject*)
2689treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2690{
2691 PyObject* item;
2692
Serhiy Storchaka576def02017-03-30 09:47:31 +03002693 if (treebuilder_flush_data(self) < 0) {
2694 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695 }
2696
2697 if (self->index == 0) {
2698 PyErr_SetString(
2699 PyExc_IndexError,
2700 "pop from empty stack"
2701 );
2702 return NULL;
2703 }
2704
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002705 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002706 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002707 self->index--;
2708 self->this = PyList_GET_ITEM(self->stack, self->index);
2709 Py_INCREF(self->this);
2710 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002712 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2713 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714
2715 Py_INCREF(self->last);
2716 return (PyObject*) self->last;
2717}
2718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719/* -------------------------------------------------------------------- */
2720/* methods (in alphabetical order) */
2721
Serhiy Storchakacb985562015-05-04 15:32:48 +03002722/*[clinic input]
2723_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724
Serhiy Storchakacb985562015-05-04 15:32:48 +03002725 data: object
2726 /
2727
2728[clinic start generated code]*/
2729
2730static PyObject *
2731_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2732/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2733{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 return treebuilder_handle_data(self, data);
2735}
2736
Serhiy Storchakacb985562015-05-04 15:32:48 +03002737/*[clinic input]
2738_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Serhiy Storchakacb985562015-05-04 15:32:48 +03002740 tag: object
2741 /
2742
2743[clinic start generated code]*/
2744
2745static PyObject *
2746_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2747/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2748{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 return treebuilder_handle_end(self, tag);
2750}
2751
2752LOCAL(PyObject*)
2753treebuilder_done(TreeBuilderObject* self)
2754{
2755 PyObject* res;
2756
2757 /* FIXME: check stack size? */
2758
2759 if (self->root)
2760 res = self->root;
2761 else
2762 res = Py_None;
2763
2764 Py_INCREF(res);
2765 return res;
2766}
2767
Serhiy Storchakacb985562015-05-04 15:32:48 +03002768/*[clinic input]
2769_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770
Serhiy Storchakacb985562015-05-04 15:32:48 +03002771[clinic start generated code]*/
2772
2773static PyObject *
2774_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2775/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2776{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777 return treebuilder_done(self);
2778}
2779
Serhiy Storchakacb985562015-05-04 15:32:48 +03002780/*[clinic input]
2781_elementtree.TreeBuilder.start
2782
2783 tag: object
2784 attrs: object = None
2785 /
2786
2787[clinic start generated code]*/
2788
2789static PyObject *
2790_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2791 PyObject *attrs)
2792/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002794 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795}
2796
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797/* ==================================================================== */
2798/* the expat interface */
2799
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002802
2803/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2804 * cached globally without being in per-module state.
2805 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002806static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808
Eli Bendersky52467b12012-06-01 07:13:08 +03002809static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2810 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2811
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812typedef struct {
2813 PyObject_HEAD
2814
2815 XML_Parser parser;
2816
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002817 PyObject *target;
2818 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002819
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002820 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002822 PyObject *handle_start;
2823 PyObject *handle_data;
2824 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002826 PyObject *handle_comment;
2827 PyObject *handle_pi;
2828 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002830 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002831
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002832} XMLParserObject;
2833
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834/* helpers */
2835
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002836LOCAL(PyObject*)
2837makeuniversal(XMLParserObject* self, const char* string)
2838{
2839 /* convert a UTF-8 tag/attribute name from the expat parser
2840 to a universal name string */
2841
Antoine Pitrouc1948842012-10-01 23:40:37 +02002842 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002843 PyObject* key;
2844 PyObject* value;
2845
2846 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002847 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002848 if (!key)
2849 return NULL;
2850
2851 value = PyDict_GetItem(self->names, key);
2852
2853 if (value) {
2854 Py_INCREF(value);
2855 } else {
2856 /* new name. convert to universal name, and decode as
2857 necessary */
2858
2859 PyObject* tag;
2860 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002861 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002862
2863 /* look for namespace separator */
2864 for (i = 0; i < size; i++)
2865 if (string[i] == '}')
2866 break;
2867 if (i != size) {
2868 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002869 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002870 if (tag == NULL) {
2871 Py_DECREF(key);
2872 return NULL;
2873 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002874 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002875 p[0] = '{';
2876 memcpy(p+1, string, size);
2877 size++;
2878 } else {
2879 /* plain name; use key as tag */
2880 Py_INCREF(key);
2881 tag = key;
2882 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002883
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002885 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002886 value = PyUnicode_DecodeUTF8(p, size, "strict");
2887 Py_DECREF(tag);
2888 if (!value) {
2889 Py_DECREF(key);
2890 return NULL;
2891 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892
2893 /* add to names dictionary */
2894 if (PyDict_SetItem(self->names, key, value) < 0) {
2895 Py_DECREF(key);
2896 Py_DECREF(value);
2897 return NULL;
2898 }
2899 }
2900
2901 Py_DECREF(key);
2902 return value;
2903}
2904
Eli Bendersky5b77d812012-03-16 08:20:05 +02002905/* Set the ParseError exception with the given parameters.
2906 * If message is not NULL, it's used as the error string. Otherwise, the
2907 * message string is the default for the given error_code.
2908*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002909static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002910expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2911 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002912{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002913 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002914 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002915
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002916 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002917 message ? message : EXPAT(ErrorString)(error_code),
2918 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002919 if (errmsg == NULL)
2920 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002921
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002922 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002923 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002924 if (!error)
2925 return;
2926
Eli Bendersky5b77d812012-03-16 08:20:05 +02002927 /* Add code and position attributes */
2928 code = PyLong_FromLong((long)error_code);
2929 if (!code) {
2930 Py_DECREF(error);
2931 return;
2932 }
2933 if (PyObject_SetAttrString(error, "code", code) == -1) {
2934 Py_DECREF(error);
2935 Py_DECREF(code);
2936 return;
2937 }
2938 Py_DECREF(code);
2939
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002940 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002941 if (!position) {
2942 Py_DECREF(error);
2943 return;
2944 }
2945 if (PyObject_SetAttrString(error, "position", position) == -1) {
2946 Py_DECREF(error);
2947 Py_DECREF(position);
2948 return;
2949 }
2950 Py_DECREF(position);
2951
Eli Bendersky532d03e2013-08-10 08:00:39 -07002952 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002953 Py_DECREF(error);
2954}
2955
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956/* -------------------------------------------------------------------- */
2957/* handlers */
2958
2959static void
2960expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2961 int data_len)
2962{
2963 PyObject* key;
2964 PyObject* value;
2965 PyObject* res;
2966
2967 if (data_len < 2 || data_in[0] != '&')
2968 return;
2969
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002970 if (PyErr_Occurred())
2971 return;
2972
Neal Norwitz0269b912007-08-08 06:56:02 +00002973 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 if (!key)
2975 return;
2976
2977 value = PyDict_GetItem(self->entity, key);
2978
2979 if (value) {
2980 if (TreeBuilder_CheckExact(self->target))
2981 res = treebuilder_handle_data(
2982 (TreeBuilderObject*) self->target, value
2983 );
2984 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002985 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 else
2987 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002989 } else if (!PyErr_Occurred()) {
2990 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002991 char message[128] = "undefined entity ";
2992 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002993 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002994 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002996 EXPAT(GetErrorColumnNumber)(self->parser),
2997 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 );
2999 }
3000
3001 Py_DECREF(key);
3002}
3003
3004static void
3005expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3006 const XML_Char **attrib_in)
3007{
3008 PyObject* res;
3009 PyObject* tag;
3010 PyObject* attrib;
3011 int ok;
3012
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003013 if (PyErr_Occurred())
3014 return;
3015
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 /* tag name */
3017 tag = makeuniversal(self, tag_in);
3018 if (!tag)
3019 return; /* parser will look for errors */
3020
3021 /* attributes */
3022 if (attrib_in[0]) {
3023 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003024 if (!attrib) {
3025 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003027 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 while (attrib_in[0] && attrib_in[1]) {
3029 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003030 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 if (!key || !value) {
3032 Py_XDECREF(value);
3033 Py_XDECREF(key);
3034 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003035 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 return;
3037 }
3038 ok = PyDict_SetItem(attrib, key, value);
3039 Py_DECREF(value);
3040 Py_DECREF(key);
3041 if (ok < 0) {
3042 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003043 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044 return;
3045 }
3046 attrib_in += 2;
3047 }
3048 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003049 Py_INCREF(Py_None);
3050 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003051 }
3052
3053 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054 /* shortcut */
3055 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3056 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003057 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003058 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003059 if (attrib == Py_None) {
3060 Py_DECREF(attrib);
3061 attrib = PyDict_New();
3062 if (!attrib) {
3063 Py_DECREF(tag);
3064 return;
3065 }
3066 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003067 res = PyObject_CallFunctionObjArgs(self->handle_start,
3068 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003069 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 res = NULL;
3071
3072 Py_DECREF(tag);
3073 Py_DECREF(attrib);
3074
3075 Py_XDECREF(res);
3076}
3077
3078static void
3079expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3080 int data_len)
3081{
3082 PyObject* data;
3083 PyObject* res;
3084
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003085 if (PyErr_Occurred())
3086 return;
3087
Neal Norwitz0269b912007-08-08 06:56:02 +00003088 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003089 if (!data)
3090 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091
3092 if (TreeBuilder_CheckExact(self->target))
3093 /* shortcut */
3094 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3095 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003096 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097 else
3098 res = NULL;
3099
3100 Py_DECREF(data);
3101
3102 Py_XDECREF(res);
3103}
3104
3105static void
3106expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3107{
3108 PyObject* tag;
3109 PyObject* res = NULL;
3110
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003111 if (PyErr_Occurred())
3112 return;
3113
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114 if (TreeBuilder_CheckExact(self->target))
3115 /* shortcut */
3116 /* the standard tree builder doesn't look at the end tag */
3117 res = treebuilder_handle_end(
3118 (TreeBuilderObject*) self->target, Py_None
3119 );
3120 else if (self->handle_end) {
3121 tag = makeuniversal(self, tag_in);
3122 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003123 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124 Py_DECREF(tag);
3125 }
3126 }
3127
3128 Py_XDECREF(res);
3129}
3130
3131static void
3132expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3133 const XML_Char *uri)
3134{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003135 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3136 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003137
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003138 if (PyErr_Occurred())
3139 return;
3140
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003141 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003142 return;
3143
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003144 if (!uri)
3145 uri = "";
3146 if (!prefix)
3147 prefix = "";
3148
3149 parcel = Py_BuildValue("ss", prefix, uri);
3150 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003151 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003152 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3153 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003154}
3155
3156static void
3157expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3158{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003159 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3160
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003161 if (PyErr_Occurred())
3162 return;
3163
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003164 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003165 return;
3166
3167 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168}
3169
3170static void
3171expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3172{
3173 PyObject* comment;
3174 PyObject* res;
3175
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003176 if (PyErr_Occurred())
3177 return;
3178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003180 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003181 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003182 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3183 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184 Py_XDECREF(res);
3185 Py_DECREF(comment);
3186 }
3187 }
3188}
3189
Eli Bendersky45839902013-01-13 05:14:47 -08003190static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003191expat_start_doctype_handler(XMLParserObject *self,
3192 const XML_Char *doctype_name,
3193 const XML_Char *sysid,
3194 const XML_Char *pubid,
3195 int has_internal_subset)
3196{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003197 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003198 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003199 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003200
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003201 if (PyErr_Occurred())
3202 return;
3203
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003204 doctype_name_obj = makeuniversal(self, doctype_name);
3205 if (!doctype_name_obj)
3206 return;
3207
3208 if (sysid) {
3209 sysid_obj = makeuniversal(self, sysid);
3210 if (!sysid_obj) {
3211 Py_DECREF(doctype_name_obj);
3212 return;
3213 }
3214 } else {
3215 Py_INCREF(Py_None);
3216 sysid_obj = Py_None;
3217 }
3218
3219 if (pubid) {
3220 pubid_obj = makeuniversal(self, pubid);
3221 if (!pubid_obj) {
3222 Py_DECREF(doctype_name_obj);
3223 Py_DECREF(sysid_obj);
3224 return;
3225 }
3226 } else {
3227 Py_INCREF(Py_None);
3228 pubid_obj = Py_None;
3229 }
3230
3231 /* If the target has a handler for doctype, call it. */
3232 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003233 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3234 doctype_name_obj, pubid_obj,
3235 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003236 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003237 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003238 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3239 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3240 "The doctype() method of XMLParser is ignored. "
3241 "Define doctype() method on the TreeBuilder target.",
3242 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003243 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003244 }
3245
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003246 Py_DECREF(doctype_name_obj);
3247 Py_DECREF(pubid_obj);
3248 Py_DECREF(sysid_obj);
3249}
3250
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251static void
3252expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3253 const XML_Char* data_in)
3254{
3255 PyObject* target;
3256 PyObject* data;
3257 PyObject* res;
3258
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003259 if (PyErr_Occurred())
3260 return;
3261
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003263 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3264 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003266 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3267 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 Py_XDECREF(res);
3269 Py_DECREF(data);
3270 Py_DECREF(target);
3271 } else {
3272 Py_XDECREF(data);
3273 Py_XDECREF(target);
3274 }
3275 }
3276}
3277
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279
Eli Bendersky52467b12012-06-01 07:13:08 +03003280static PyObject *
3281xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282{
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3284 if (self) {
3285 self->parser = NULL;
3286 self->target = self->entity = self->names = NULL;
3287 self->handle_start = self->handle_data = self->handle_end = NULL;
3288 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003289 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003291 return (PyObject *)self;
3292}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003293
scoderc8d8e152017-09-14 22:00:03 +02003294static int
3295ignore_attribute_error(PyObject *value)
3296{
3297 if (value == NULL) {
3298 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3299 return -1;
3300 }
3301 PyErr_Clear();
3302 }
3303 return 0;
3304}
3305
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306/*[clinic input]
3307_elementtree.XMLParser.__init__
3308
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003309 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003311 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312
3313[clinic start generated code]*/
3314
Eli Bendersky52467b12012-06-01 07:13:08 +03003315static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003316_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3317 const char *encoding)
3318/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003319{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 self->entity = PyDict_New();
3321 if (!self->entity)
3322 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323
Serhiy Storchakacb985562015-05-04 15:32:48 +03003324 self->names = PyDict_New();
3325 if (!self->names) {
3326 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003327 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003328 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003329
Serhiy Storchakacb985562015-05-04 15:32:48 +03003330 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3331 if (!self->parser) {
3332 Py_CLEAR(self->entity);
3333 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003335 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003337 /* expat < 2.1.0 has no XML_SetHashSalt() */
3338 if (EXPAT(SetHashSalt) != NULL) {
3339 EXPAT(SetHashSalt)(self->parser,
3340 (unsigned long)_Py_HashSecret.expat.hashsalt);
3341 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342
Eli Bendersky52467b12012-06-01 07:13:08 +03003343 if (target) {
3344 Py_INCREF(target);
3345 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003346 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003348 Py_CLEAR(self->entity);
3349 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003350 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003352 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003353 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354
Serhiy Storchakacb985562015-05-04 15:32:48 +03003355 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003356 if (ignore_attribute_error(self->handle_start)) {
3357 return -1;
3358 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003360 if (ignore_attribute_error(self->handle_data)) {
3361 return -1;
3362 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003363 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003364 if (ignore_attribute_error(self->handle_end)) {
3365 return -1;
3366 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003367 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003368 if (ignore_attribute_error(self->handle_comment)) {
3369 return -1;
3370 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003371 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003372 if (ignore_attribute_error(self->handle_pi)) {
3373 return -1;
3374 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003375 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003376 if (ignore_attribute_error(self->handle_close)) {
3377 return -1;
3378 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003380 if (ignore_attribute_error(self->handle_doctype)) {
3381 return -1;
3382 }
Eli Bendersky45839902013-01-13 05:14:47 -08003383
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003385 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388 (XML_StartElementHandler) expat_start_handler,
3389 (XML_EndElementHandler) expat_end_handler
3390 );
3391 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003392 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003393 (XML_DefaultHandler) expat_default_handler
3394 );
3395 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003396 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 (XML_CharacterDataHandler) expat_data_handler
3398 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003399 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003401 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 (XML_CommentHandler) expat_comment_handler
3403 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003404 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003406 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407 (XML_ProcessingInstructionHandler) expat_pi_handler
3408 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003409 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003410 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003411 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3412 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003414 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003415 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003417
Eli Bendersky52467b12012-06-01 07:13:08 +03003418 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419}
3420
Eli Bendersky52467b12012-06-01 07:13:08 +03003421static int
3422xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3423{
3424 Py_VISIT(self->handle_close);
3425 Py_VISIT(self->handle_pi);
3426 Py_VISIT(self->handle_comment);
3427 Py_VISIT(self->handle_end);
3428 Py_VISIT(self->handle_data);
3429 Py_VISIT(self->handle_start);
3430
3431 Py_VISIT(self->target);
3432 Py_VISIT(self->entity);
3433 Py_VISIT(self->names);
3434
3435 return 0;
3436}
3437
3438static int
3439xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003440{
Victor Stinnere727d412017-09-18 05:29:37 -07003441 if (self->parser != NULL) {
3442 XML_Parser parser = self->parser;
3443 self->parser = NULL;
3444 EXPAT(ParserFree)(parser);
3445 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446
Antoine Pitrouc1948842012-10-01 23:40:37 +02003447 Py_CLEAR(self->handle_close);
3448 Py_CLEAR(self->handle_pi);
3449 Py_CLEAR(self->handle_comment);
3450 Py_CLEAR(self->handle_end);
3451 Py_CLEAR(self->handle_data);
3452 Py_CLEAR(self->handle_start);
3453 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454
Antoine Pitrouc1948842012-10-01 23:40:37 +02003455 Py_CLEAR(self->target);
3456 Py_CLEAR(self->entity);
3457 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458
Eli Bendersky52467b12012-06-01 07:13:08 +03003459 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460}
3461
Eli Bendersky52467b12012-06-01 07:13:08 +03003462static void
3463xmlparser_dealloc(XMLParserObject* self)
3464{
3465 PyObject_GC_UnTrack(self);
3466 xmlparser_gc_clear(self);
3467 Py_TYPE(self)->tp_free((PyObject *)self);
3468}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469
3470LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003471expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003472{
3473 int ok;
3474
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003475 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3477
3478 if (PyErr_Occurred())
3479 return NULL;
3480
3481 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003482 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003483 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003484 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003485 EXPAT(GetErrorColumnNumber)(self->parser),
3486 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003487 );
3488 return NULL;
3489 }
3490
3491 Py_RETURN_NONE;
3492}
3493
Serhiy Storchakacb985562015-05-04 15:32:48 +03003494/*[clinic input]
3495_elementtree.XMLParser.close
3496
3497[clinic start generated code]*/
3498
3499static PyObject *
3500_elementtree_XMLParser_close_impl(XMLParserObject *self)
3501/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003502{
3503 /* end feeding data to parser */
3504
3505 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003506 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003507 if (!res)
3508 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003510 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 Py_DECREF(res);
3512 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003513 }
3514 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003515 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003516 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003517 }
3518 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003519 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003520 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521}
3522
Serhiy Storchakacb985562015-05-04 15:32:48 +03003523/*[clinic input]
3524_elementtree.XMLParser.feed
3525
3526 data: object
3527 /
3528
3529[clinic start generated code]*/
3530
3531static PyObject *
3532_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3533/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003534{
3535 /* feed data to parser */
3536
Serhiy Storchakacb985562015-05-04 15:32:48 +03003537 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003538 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3540 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003541 return NULL;
3542 if (data_len > INT_MAX) {
3543 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3544 return NULL;
3545 }
3546 /* Explicitly set UTF-8 encoding. Return code ignored. */
3547 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003548 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003549 }
3550 else {
3551 Py_buffer view;
3552 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003553 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003554 return NULL;
3555 if (view.len > INT_MAX) {
3556 PyBuffer_Release(&view);
3557 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3558 return NULL;
3559 }
3560 res = expat_parse(self, view.buf, (int)view.len, 0);
3561 PyBuffer_Release(&view);
3562 return res;
3563 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003564}
3565
Serhiy Storchakacb985562015-05-04 15:32:48 +03003566/*[clinic input]
3567_elementtree.XMLParser._parse_whole
3568
3569 file: object
3570 /
3571
3572[clinic start generated code]*/
3573
3574static PyObject *
3575_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3576/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577{
Eli Benderskya3699232013-05-19 18:47:23 -07003578 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579 PyObject* reader;
3580 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003581 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 PyObject* res;
3583
Serhiy Storchakacb985562015-05-04 15:32:48 +03003584 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 if (!reader)
3586 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003587
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 /* read from open file object */
3589 for (;;) {
3590
3591 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3592
3593 if (!buffer) {
3594 /* read failed (e.g. due to KeyboardInterrupt) */
3595 Py_DECREF(reader);
3596 return NULL;
3597 }
3598
Eli Benderskyf996e772012-03-16 05:53:30 +02003599 if (PyUnicode_CheckExact(buffer)) {
3600 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003601 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003602 Py_DECREF(buffer);
3603 break;
3604 }
3605 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003606 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003607 if (!temp) {
3608 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003609 Py_DECREF(reader);
3610 return NULL;
3611 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003612 buffer = temp;
3613 }
3614 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615 Py_DECREF(buffer);
3616 break;
3617 }
3618
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003619 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3620 Py_DECREF(buffer);
3621 Py_DECREF(reader);
3622 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3623 return NULL;
3624 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003626 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627 );
3628
3629 Py_DECREF(buffer);
3630
3631 if (!res) {
3632 Py_DECREF(reader);
3633 return NULL;
3634 }
3635 Py_DECREF(res);
3636
3637 }
3638
3639 Py_DECREF(reader);
3640
3641 res = expat_parse(self, "", 0, 1);
3642
3643 if (res && TreeBuilder_CheckExact(self->target)) {
3644 Py_DECREF(res);
3645 return treebuilder_done((TreeBuilderObject*) self->target);
3646 }
3647
3648 return res;
3649}
3650
Serhiy Storchakacb985562015-05-04 15:32:48 +03003651/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003652_elementtree.XMLParser._setevents
3653
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003654 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003655 events_to_report: object = None
3656 /
3657
3658[clinic start generated code]*/
3659
3660static PyObject *
3661_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3662 PyObject *events_queue,
3663 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003664/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665{
3666 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003667 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003668 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003669 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670
3671 if (!TreeBuilder_CheckExact(self->target)) {
3672 PyErr_SetString(
3673 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003674 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675 "targets"
3676 );
3677 return NULL;
3678 }
3679
3680 target = (TreeBuilderObject*) self->target;
3681
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003682 events_append = PyObject_GetAttrString(events_queue, "append");
3683 if (events_append == NULL)
3684 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003685 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686
3687 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003688 Py_CLEAR(target->start_event_obj);
3689 Py_CLEAR(target->end_event_obj);
3690 Py_CLEAR(target->start_ns_event_obj);
3691 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003692
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003693 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003695 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696 Py_RETURN_NONE;
3697 }
3698
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003699 if (!(events_seq = PySequence_Fast(events_to_report,
3700 "events must be a sequence"))) {
3701 return NULL;
3702 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003703
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003704 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003705 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003706 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003707 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003708 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003709 } else if (PyBytes_Check(event_name_obj)) {
3710 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003711 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003712 if (event_name == NULL) {
3713 Py_DECREF(events_seq);
3714 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3715 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003716 }
3717
3718 Py_INCREF(event_name_obj);
3719 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003720 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003721 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003722 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003723 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003724 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003725 EXPAT(SetNamespaceDeclHandler)(
3726 self->parser,
3727 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3728 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3729 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003730 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003731 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003732 EXPAT(SetNamespaceDeclHandler)(
3733 self->parser,
3734 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3735 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3736 );
3737 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003738 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003739 Py_DECREF(events_seq);
3740 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003741 return NULL;
3742 }
3743 }
3744
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003745 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003746 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003747}
3748
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003749static PyMemberDef xmlparser_members[] = {
3750 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3751 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3752 {NULL}
3753};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003754
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003755static PyObject*
3756xmlparser_version_getter(XMLParserObject *self, void *closure)
3757{
3758 return PyUnicode_FromFormat(
3759 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3760 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003761}
3762
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003763static PyGetSetDef xmlparser_getsetlist[] = {
3764 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3765 {NULL},
3766};
3767
Serhiy Storchakacb985562015-05-04 15:32:48 +03003768#include "clinic/_elementtree.c.h"
3769
3770static PyMethodDef element_methods[] = {
3771
3772 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3773
3774 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3775 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3776
3777 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3778 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3779 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3780
3781 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3782 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3783 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3784 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3785
3786 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3787 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3788 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3789
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003790 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003791 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3792
3793 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3794 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3795
3796 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3797
3798 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3799 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3800 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3801 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3802 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3803
3804 {NULL, NULL}
3805};
3806
3807static PyMappingMethods element_as_mapping = {
3808 (lenfunc) element_length,
3809 (binaryfunc) element_subscr,
3810 (objobjargproc) element_ass_subscr,
3811};
3812
Serhiy Storchakadde08152015-11-25 15:28:13 +02003813static PyGetSetDef element_getsetlist[] = {
3814 {"tag",
3815 (getter)element_tag_getter,
3816 (setter)element_tag_setter,
3817 "A string identifying what kind of data this element represents"},
3818 {"text",
3819 (getter)element_text_getter,
3820 (setter)element_text_setter,
3821 "A string of text directly after the start tag, or None"},
3822 {"tail",
3823 (getter)element_tail_getter,
3824 (setter)element_tail_setter,
3825 "A string of text directly after the end tag, or None"},
3826 {"attrib",
3827 (getter)element_attrib_getter,
3828 (setter)element_attrib_setter,
3829 "A dictionary containing the element's attributes"},
3830 {NULL},
3831};
3832
Serhiy Storchakacb985562015-05-04 15:32:48 +03003833static PyTypeObject Element_Type = {
3834 PyVarObject_HEAD_INIT(NULL, 0)
3835 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3836 /* methods */
3837 (destructor)element_dealloc, /* tp_dealloc */
3838 0, /* tp_print */
3839 0, /* tp_getattr */
3840 0, /* tp_setattr */
3841 0, /* tp_reserved */
3842 (reprfunc)element_repr, /* tp_repr */
3843 0, /* tp_as_number */
3844 &element_as_sequence, /* tp_as_sequence */
3845 &element_as_mapping, /* tp_as_mapping */
3846 0, /* tp_hash */
3847 0, /* tp_call */
3848 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003849 PyObject_GenericGetAttr, /* tp_getattro */
3850 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003851 0, /* tp_as_buffer */
3852 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3853 /* tp_flags */
3854 0, /* tp_doc */
3855 (traverseproc)element_gc_traverse, /* tp_traverse */
3856 (inquiry)element_gc_clear, /* tp_clear */
3857 0, /* tp_richcompare */
3858 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3859 0, /* tp_iter */
3860 0, /* tp_iternext */
3861 element_methods, /* tp_methods */
3862 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003863 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003864 0, /* tp_base */
3865 0, /* tp_dict */
3866 0, /* tp_descr_get */
3867 0, /* tp_descr_set */
3868 0, /* tp_dictoffset */
3869 (initproc)element_init, /* tp_init */
3870 PyType_GenericAlloc, /* tp_alloc */
3871 element_new, /* tp_new */
3872 0, /* tp_free */
3873};
3874
3875static PyMethodDef treebuilder_methods[] = {
3876 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3877 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3878 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3879 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3880 {NULL, NULL}
3881};
3882
3883static PyTypeObject TreeBuilder_Type = {
3884 PyVarObject_HEAD_INIT(NULL, 0)
3885 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3886 /* methods */
3887 (destructor)treebuilder_dealloc, /* tp_dealloc */
3888 0, /* tp_print */
3889 0, /* tp_getattr */
3890 0, /* tp_setattr */
3891 0, /* tp_reserved */
3892 0, /* tp_repr */
3893 0, /* tp_as_number */
3894 0, /* tp_as_sequence */
3895 0, /* tp_as_mapping */
3896 0, /* tp_hash */
3897 0, /* tp_call */
3898 0, /* tp_str */
3899 0, /* tp_getattro */
3900 0, /* tp_setattro */
3901 0, /* tp_as_buffer */
3902 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3903 /* tp_flags */
3904 0, /* tp_doc */
3905 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3906 (inquiry)treebuilder_gc_clear, /* tp_clear */
3907 0, /* tp_richcompare */
3908 0, /* tp_weaklistoffset */
3909 0, /* tp_iter */
3910 0, /* tp_iternext */
3911 treebuilder_methods, /* tp_methods */
3912 0, /* tp_members */
3913 0, /* tp_getset */
3914 0, /* tp_base */
3915 0, /* tp_dict */
3916 0, /* tp_descr_get */
3917 0, /* tp_descr_set */
3918 0, /* tp_dictoffset */
3919 _elementtree_TreeBuilder___init__, /* tp_init */
3920 PyType_GenericAlloc, /* tp_alloc */
3921 treebuilder_new, /* tp_new */
3922 0, /* tp_free */
3923};
3924
3925static PyMethodDef xmlparser_methods[] = {
3926 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3927 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3928 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3929 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003930 {NULL, NULL}
3931};
3932
Neal Norwitz227b5332006-03-22 09:28:35 +00003933static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003934 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003935 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003936 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003937 (destructor)xmlparser_dealloc, /* tp_dealloc */
3938 0, /* tp_print */
3939 0, /* tp_getattr */
3940 0, /* tp_setattr */
3941 0, /* tp_reserved */
3942 0, /* tp_repr */
3943 0, /* tp_as_number */
3944 0, /* tp_as_sequence */
3945 0, /* tp_as_mapping */
3946 0, /* tp_hash */
3947 0, /* tp_call */
3948 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003949 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003950 0, /* tp_setattro */
3951 0, /* tp_as_buffer */
3952 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3953 /* tp_flags */
3954 0, /* tp_doc */
3955 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3956 (inquiry)xmlparser_gc_clear, /* tp_clear */
3957 0, /* tp_richcompare */
3958 0, /* tp_weaklistoffset */
3959 0, /* tp_iter */
3960 0, /* tp_iternext */
3961 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003962 xmlparser_members, /* tp_members */
3963 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003964 0, /* tp_base */
3965 0, /* tp_dict */
3966 0, /* tp_descr_get */
3967 0, /* tp_descr_set */
3968 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003969 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003970 PyType_GenericAlloc, /* tp_alloc */
3971 xmlparser_new, /* tp_new */
3972 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003973};
3974
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003975/* ==================================================================== */
3976/* python module interface */
3977
3978static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02003979 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003980 {NULL, NULL}
3981};
3982
Martin v. Löwis1a214512008-06-11 05:26:20 +00003983
Eli Bendersky532d03e2013-08-10 08:00:39 -07003984static struct PyModuleDef elementtreemodule = {
3985 PyModuleDef_HEAD_INIT,
3986 "_elementtree",
3987 NULL,
3988 sizeof(elementtreestate),
3989 _functions,
3990 NULL,
3991 elementtree_traverse,
3992 elementtree_clear,
3993 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003994};
3995
Neal Norwitzf6657e62006-12-28 04:47:50 +00003996PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003997PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003998{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003999 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004000 elementtreestate *st;
4001
4002 m = PyState_FindModule(&elementtreemodule);
4003 if (m) {
4004 Py_INCREF(m);
4005 return m;
4006 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004007
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004008 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004009 if (PyType_Ready(&ElementIter_Type) < 0)
4010 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004011 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004012 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004013 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004014 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004015 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004016 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004017
Eli Bendersky532d03e2013-08-10 08:00:39 -07004018 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004019 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004020 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004021 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004022
Eli Bendersky828efde2012-04-05 05:40:58 +03004023 if (!(temp = PyImport_ImportModule("copy")))
4024 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004025 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004026 Py_XDECREF(temp);
4027
Victor Stinnerb136f112017-07-10 22:28:02 +02004028 if (st->deepcopy_obj == NULL) {
4029 return NULL;
4030 }
4031
4032 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004033 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004034 return NULL;
4035
Eli Bendersky20d41742012-06-01 09:48:37 +03004036 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004037 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4038 if (expat_capi) {
4039 /* check that it's usable */
4040 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004041 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004042 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4043 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004044 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004045 PyErr_SetString(PyExc_ImportError,
4046 "pyexpat version is incompatible");
4047 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004048 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004049 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004050 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004051 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004052
Eli Bendersky532d03e2013-08-10 08:00:39 -07004053 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004054 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004055 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004056 Py_INCREF(st->parseerror_obj);
4057 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004058
Eli Bendersky092af1f2012-03-04 07:14:03 +02004059 Py_INCREF((PyObject *)&Element_Type);
4060 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4061
Eli Bendersky58d548d2012-05-29 15:45:16 +03004062 Py_INCREF((PyObject *)&TreeBuilder_Type);
4063 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4064
Eli Bendersky52467b12012-06-01 07:13:08 +03004065 Py_INCREF((PyObject *)&XMLParser_Type);
4066 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004067
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004068 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004069}