blob: f88315d7711ad3a5cbfd67d804a2072872a735b1 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
339 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300340
341 if (attrib) {
342 /* If attrib was found in kwds, copy its value and remove it from
343 * kwds
344 */
345 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700346 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
348 Py_TYPE(attrib)->tp_name);
349 return NULL;
350 }
351 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700352 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300353 } else {
354 attrib = PyDict_New();
355 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700356
357 Py_DECREF(attrib_str);
358
359 /* attrib can be NULL if PyDict_New failed */
360 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200361 if (PyDict_Update(attrib, kwds) < 0)
362 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300363 return attrib;
364}
365
Serhiy Storchakacb985562015-05-04 15:32:48 +0300366/*[clinic input]
367module _elementtree
368class _elementtree.Element "ElementObject *" "&Element_Type"
369class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
370class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
371[clinic start generated code]*/
372/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
373
Eli Bendersky092af1f2012-03-04 07:14:03 +0200374static int
375element_init(PyObject *self, PyObject *args, PyObject *kwds)
376{
377 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 PyObject *attrib = NULL;
379 ElementObject *self_elem;
380
381 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
382 return -1;
383
Eli Bendersky737b1732012-05-29 06:02:56 +0300384 if (attrib) {
385 /* attrib passed as positional arg */
386 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (!attrib)
388 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300389 if (kwds) {
390 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200391 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 return -1;
393 }
394 }
395 } else if (kwds) {
396 /* have keywords args */
397 attrib = get_attrib_from_keywords(kwds);
398 if (!attrib)
399 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 }
401
402 self_elem = (ElementObject *)self;
403
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 return -1;
408 }
409 }
410
Eli Bendersky48d358b2012-05-30 17:57:50 +0300411 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413
414 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300416 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200417
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300419 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300422 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
424 return 0;
425}
426
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000427LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200428element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200430 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431 PyObject* *children;
432
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300433 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 /* make sure self->children can hold the given number of extra
435 elements. set an exception and return -1 if allocation failed */
436
Victor Stinner5f0af232013-07-11 23:01:36 +0200437 if (!self->extra) {
438 if (create_extra(self, NULL) < 0)
439 return -1;
440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443
444 if (size > self->extra->allocated) {
445 /* use Python 2.4's list growth strategy */
446 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100448 * which needs at least 4 bytes.
449 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 * be safe.
451 */
452 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200453 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
454 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000455 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100457 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000458 * false alarm always assume at least one child to be safe.
459 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460 children = PyObject_Realloc(self->extra->children,
461 size * sizeof(PyObject*));
462 if (!children)
463 goto nomemory;
464 } else {
465 children = PyObject_Malloc(size * sizeof(PyObject*));
466 if (!children)
467 goto nomemory;
468 /* copy existing children from static area to malloc buffer */
469 memcpy(children, self->extra->children,
470 self->extra->length * sizeof(PyObject*));
471 }
472 self->extra->children = children;
473 self->extra->allocated = size;
474 }
475
476 return 0;
477
478 nomemory:
479 PyErr_NoMemory();
480 return -1;
481}
482
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300483LOCAL(void)
484raise_type_error(PyObject *element)
485{
486 PyErr_Format(PyExc_TypeError,
487 "expected an Element, not \"%.200s\"",
488 Py_TYPE(element)->tp_name);
489}
490
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000491LOCAL(int)
492element_add_subelement(ElementObject* self, PyObject* element)
493{
494 /* add a child element to a parent */
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496 if (!Element_Check(element)) {
497 raise_type_error(element);
498 return -1;
499 }
500
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
Serhiy Storchaka576def02017-03-30 09:47:31 +0300537 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300542 PyObject *tmp = list_join(res);
543 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300545 self->text = tmp;
546 Py_DECREF(res);
547 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548 }
549 }
550
551 return res;
552}
553
554LOCAL(PyObject*)
555element_get_tail(ElementObject* self)
556{
557 /* return borrowed reference to text attribute */
558
Serhiy Storchaka576def02017-03-30 09:47:31 +0300559 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000560
561 if (JOIN_GET(res)) {
562 res = JOIN_OBJ(res);
563 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300564 PyObject *tmp = list_join(res);
565 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000566 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300567 self->tail = tmp;
568 Py_DECREF(res);
569 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000570 }
571 }
572
573 return res;
574}
575
576static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300577subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578{
579 PyObject* elem;
580
581 ElementObject* parent;
582 PyObject* tag;
583 PyObject* attrib = NULL;
584 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
585 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800586 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800588 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589
Eli Bendersky737b1732012-05-29 06:02:56 +0300590 if (attrib) {
591 /* attrib passed as positional arg */
592 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000593 if (!attrib)
594 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300595 if (kwds) {
596 if (PyDict_Update(attrib, kwds) < 0) {
597 return NULL;
598 }
599 }
600 } else if (kwds) {
601 /* have keyword args */
602 attrib = get_attrib_from_keywords(kwds);
603 if (!attrib)
604 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000605 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300606 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607 Py_INCREF(Py_None);
608 attrib = Py_None;
609 }
610
Eli Bendersky092af1f2012-03-04 07:14:03 +0200611 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200613 if (elem == NULL)
614 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000615
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000616 if (element_add_subelement(parent, elem) < 0) {
617 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000619 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620
621 return elem;
622}
623
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624static int
625element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
626{
627 Py_VISIT(self->tag);
628 Py_VISIT(JOIN_OBJ(self->text));
629 Py_VISIT(JOIN_OBJ(self->tail));
630
631 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200632 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 Py_VISIT(self->extra->attrib);
634
635 for (i = 0; i < self->extra->length; ++i)
636 Py_VISIT(self->extra->children[i]);
637 }
638 return 0;
639}
640
641static int
642element_gc_clear(ElementObject *self)
643{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300644 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700645 _clear_joined_ptr(&self->text);
646 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300647
648 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300649 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300650 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300651 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652 return 0;
653}
654
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000655static void
656element_dealloc(ElementObject* self)
657{
INADA Naokia6296d32017-08-24 14:55:17 +0900658 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200660 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661
662 if (self->weakreflist != NULL)
663 PyObject_ClearWeakRefs((PyObject *) self);
664
Eli Bendersky0192ba32012-03-30 16:38:33 +0300665 /* element_gc_clear clears all references and deallocates extra
666 */
667 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
669 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200670 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200671 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672}
673
674/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000675
Serhiy Storchakacb985562015-05-04 15:32:48 +0300676/*[clinic input]
677_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
Serhiy Storchakacb985562015-05-04 15:32:48 +0300679 subelement: object(subclass_of='&Element_Type')
680 /
681
682[clinic start generated code]*/
683
684static PyObject *
685_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
686/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
687{
688 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689 return NULL;
690
691 Py_RETURN_NONE;
692}
693
Serhiy Storchakacb985562015-05-04 15:32:48 +0300694/*[clinic input]
695_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696
Serhiy Storchakacb985562015-05-04 15:32:48 +0300697[clinic start generated code]*/
698
699static PyObject *
700_elementtree_Element_clear_impl(ElementObject *self)
701/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
702{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300703 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704
705 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300706 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707
708 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300709 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 Py_RETURN_NONE;
712}
713
Serhiy Storchakacb985562015-05-04 15:32:48 +0300714/*[clinic input]
715_elementtree.Element.__copy__
716
717[clinic start generated code]*/
718
719static PyObject *
720_elementtree_Element___copy___impl(ElementObject *self)
721/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200723 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724 ElementObject* element;
725
Eli Bendersky092af1f2012-03-04 07:14:03 +0200726 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800727 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 if (!element)
729 return NULL;
730
Oren Milman39ecb9c2017-10-10 23:26:24 +0300731 Py_INCREF(JOIN_OBJ(self->text));
732 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733
Oren Milman39ecb9c2017-10-10 23:26:24 +0300734 Py_INCREF(JOIN_OBJ(self->tail));
735 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300737 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000738 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000739 if (element_resize(element, self->extra->length) < 0) {
740 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000742 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743
744 for (i = 0; i < self->extra->length; i++) {
745 Py_INCREF(self->extra->children[i]);
746 element->extra->children[i] = self->extra->children[i];
747 }
748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000751 }
752
753 return (PyObject*) element;
754}
755
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200756/* Helper for a deep copy. */
757LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
758
Serhiy Storchakacb985562015-05-04 15:32:48 +0300759/*[clinic input]
760_elementtree.Element.__deepcopy__
761
Oren Milmand0568182017-09-12 17:39:15 +0300762 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300763 /
764
765[clinic start generated code]*/
766
767static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300768_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
769/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000770{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200771 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000772 ElementObject* element;
773 PyObject* tag;
774 PyObject* attrib;
775 PyObject* text;
776 PyObject* tail;
777 PyObject* id;
778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779 tag = deepcopy(self->tag, memo);
780 if (!tag)
781 return NULL;
782
783 if (self->extra) {
784 attrib = deepcopy(self->extra->attrib, memo);
785 if (!attrib) {
786 Py_DECREF(tag);
787 return NULL;
788 }
789 } else {
790 Py_INCREF(Py_None);
791 attrib = Py_None;
792 }
793
Eli Bendersky092af1f2012-03-04 07:14:03 +0200794 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000795
796 Py_DECREF(tag);
797 Py_DECREF(attrib);
798
799 if (!element)
800 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100801
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 text = deepcopy(JOIN_OBJ(self->text), memo);
803 if (!text)
804 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300805 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806
807 tail = deepcopy(JOIN_OBJ(self->tail), memo);
808 if (!tail)
809 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300810 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300812 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 if (element_resize(element, self->extra->length) < 0)
815 goto error;
816
817 for (i = 0; i < self->extra->length; i++) {
818 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300819 if (!child || !Element_Check(child)) {
820 if (child) {
821 raise_type_error(child);
822 Py_DECREF(child);
823 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000824 element->extra->length = i;
825 goto error;
826 }
827 element->extra->children[i] = child;
828 }
829
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300830 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000831 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832 }
833
834 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700835 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000836 if (!id)
837 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000838
839 i = PyDict_SetItem(memo, id, (PyObject*) element);
840
841 Py_DECREF(id);
842
843 if (i < 0)
844 goto error;
845
846 return (PyObject*) element;
847
848 error:
849 Py_DECREF(element);
850 return NULL;
851}
852
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200853LOCAL(PyObject *)
854deepcopy(PyObject *object, PyObject *memo)
855{
856 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200857 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200858 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200859
860 /* Fast paths */
861 if (object == Py_None || PyUnicode_CheckExact(object)) {
862 Py_INCREF(object);
863 return object;
864 }
865
866 if (Py_REFCNT(object) == 1) {
867 if (PyDict_CheckExact(object)) {
868 PyObject *key, *value;
869 Py_ssize_t pos = 0;
870 int simple = 1;
871 while (PyDict_Next(object, &pos, &key, &value)) {
872 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
873 simple = 0;
874 break;
875 }
876 }
877 if (simple)
878 return PyDict_Copy(object);
879 /* Fall through to general case */
880 }
881 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300882 return _elementtree_Element___deepcopy___impl(
883 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200884 }
885 }
886
887 /* General case */
888 st = ET_STATE_GLOBAL;
889 if (!st->deepcopy_obj) {
890 PyErr_SetString(PyExc_RuntimeError,
891 "deepcopy helper not found");
892 return NULL;
893 }
894
Victor Stinner7fbac452016-08-20 01:34:44 +0200895 stack[0] = object;
896 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200897 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200898}
899
900
Serhiy Storchakacb985562015-05-04 15:32:48 +0300901/*[clinic input]
902_elementtree.Element.__sizeof__ -> Py_ssize_t
903
904[clinic start generated code]*/
905
906static Py_ssize_t
907_elementtree_Element___sizeof___impl(ElementObject *self)
908/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200909{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200910 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200911 if (self->extra) {
912 result += sizeof(ElementObjectExtra);
913 if (self->extra->children != self->extra->_children)
914 result += sizeof(PyObject*) * self->extra->allocated;
915 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300916 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200917}
918
Eli Bendersky698bdb22013-01-10 06:01:06 -0800919/* dict keys for getstate/setstate. */
920#define PICKLED_TAG "tag"
921#define PICKLED_CHILDREN "_children"
922#define PICKLED_ATTRIB "attrib"
923#define PICKLED_TAIL "tail"
924#define PICKLED_TEXT "text"
925
926/* __getstate__ returns a fabricated instance dict as in the pure-Python
927 * Element implementation, for interoperability/interchangeability. This
928 * makes the pure-Python implementation details an API, but (a) there aren't
929 * any unnecessary structures there; and (b) it buys compatibility with 3.2
930 * pickles. See issue #16076.
931 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300932/*[clinic input]
933_elementtree.Element.__getstate__
934
935[clinic start generated code]*/
936
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300938_elementtree_Element___getstate___impl(ElementObject *self)
939/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200941 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 PyObject *instancedict = NULL, *children;
943
944 /* Build a list of children. */
945 children = PyList_New(self->extra ? self->extra->length : 0);
946 if (!children)
947 return NULL;
948 for (i = 0; i < PyList_GET_SIZE(children); i++) {
949 PyObject *child = self->extra->children[i];
950 Py_INCREF(child);
951 PyList_SET_ITEM(children, i, child);
952 }
953
954 /* Construct the state object. */
955 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
956 if (noattrib)
957 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
958 PICKLED_TAG, self->tag,
959 PICKLED_CHILDREN, children,
960 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700961 PICKLED_TEXT, JOIN_OBJ(self->text),
962 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963 else
964 instancedict = Py_BuildValue("{sOsOsOsOsO}",
965 PICKLED_TAG, self->tag,
966 PICKLED_CHILDREN, children,
967 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700968 PICKLED_TEXT, JOIN_OBJ(self->text),
969 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800970 if (instancedict) {
971 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800973 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974 else {
975 for (i = 0; i < PyList_GET_SIZE(children); i++)
976 Py_DECREF(PyList_GET_ITEM(children, i));
977 Py_DECREF(children);
978
979 return NULL;
980 }
981}
982
983static PyObject *
984element_setstate_from_attributes(ElementObject *self,
985 PyObject *tag,
986 PyObject *attrib,
987 PyObject *text,
988 PyObject *tail,
989 PyObject *children)
990{
991 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300992 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800993
994 if (!tag) {
995 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
996 return NULL;
997 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800998
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200999 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001000 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001001
Oren Milman39ecb9c2017-10-10 23:26:24 +03001002 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1003 Py_INCREF(JOIN_OBJ(text));
1004 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
Oren Milman39ecb9c2017-10-10 23:26:24 +03001006 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1007 Py_INCREF(JOIN_OBJ(tail));
1008 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001009
1010 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001011 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001013 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014
1015 /* Compute 'nchildren'. */
1016 if (children) {
1017 if (!PyList_Check(children)) {
1018 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1019 return NULL;
1020 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001021 nchildren = PyList_GET_SIZE(children);
1022
1023 /* (Re-)allocate 'extra'.
1024 Avoid DECREFs calling into this code again (cycles, etc.)
1025 */
1026 oldextra = self->extra;
1027 self->extra = NULL;
1028 if (element_resize(self, nchildren)) {
1029 assert(!self->extra || !self->extra->length);
1030 clear_extra(self);
1031 self->extra = oldextra;
1032 return NULL;
1033 }
1034 assert(self->extra);
1035 assert(self->extra->allocated >= nchildren);
1036 if (oldextra) {
1037 assert(self->extra->attrib == Py_None);
1038 self->extra->attrib = oldextra->attrib;
1039 oldextra->attrib = Py_None;
1040 }
1041
1042 /* Copy children */
1043 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001044 PyObject *child = PyList_GET_ITEM(children, i);
1045 if (!Element_Check(child)) {
1046 raise_type_error(child);
1047 self->extra->length = i;
1048 dealloc_extra(oldextra);
1049 return NULL;
1050 }
1051 Py_INCREF(child);
1052 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001053 }
1054
1055 assert(!self->extra->length);
1056 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057 }
1058 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001059 if (element_resize(self, 0)) {
1060 return NULL;
1061 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001062 }
1063
Eli Bendersky698bdb22013-01-10 06:01:06 -08001064 /* Stash attrib. */
1065 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001066 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001067 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001068 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001069 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001070
1071 Py_RETURN_NONE;
1072}
1073
1074/* __setstate__ for Element instance from the Python implementation.
1075 * 'state' should be the instance dict.
1076 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001077
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078static PyObject *
1079element_setstate_from_Python(ElementObject *self, PyObject *state)
1080{
1081 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1082 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1083 PyObject *args;
1084 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001085 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001086
Eli Bendersky698bdb22013-01-10 06:01:06 -08001087 tag = attrib = text = tail = children = NULL;
1088 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001089 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001091
1092 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1093 &attrib, &text, &tail, &children))
1094 retval = element_setstate_from_attributes(self, tag, attrib, text,
1095 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001096 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 retval = NULL;
1098
1099 Py_DECREF(args);
1100 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001101}
1102
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103/*[clinic input]
1104_elementtree.Element.__setstate__
1105
1106 state: object
1107 /
1108
1109[clinic start generated code]*/
1110
Eli Bendersky698bdb22013-01-10 06:01:06 -08001111static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001112_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1113/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001114{
1115 if (!PyDict_CheckExact(state)) {
1116 PyErr_Format(PyExc_TypeError,
1117 "Don't know how to unpickle \"%.200R\" as an Element",
1118 state);
1119 return NULL;
1120 }
1121 else
1122 return element_setstate_from_Python(self, state);
1123}
1124
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125LOCAL(int)
1126checkpath(PyObject* tag)
1127{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001128 Py_ssize_t i;
1129 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130
1131 /* check if a tag contains an xpath character */
1132
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001133#define PATHCHAR(ch) \
1134 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001137 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1138 void *data = PyUnicode_DATA(tag);
1139 unsigned int kind = PyUnicode_KIND(tag);
1140 for (i = 0; i < len; i++) {
1141 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1142 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001143 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 return 1;
1148 }
1149 return 0;
1150 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001151 if (PyBytes_Check(tag)) {
1152 char *p = PyBytes_AS_STRING(tag);
1153 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 if (p[i] == '{')
1155 check = 0;
1156 else if (p[i] == '}')
1157 check = 1;
1158 else if (check && PATHCHAR(p[i]))
1159 return 1;
1160 }
1161 return 0;
1162 }
1163
1164 return 1; /* unknown type; might be path expression */
1165}
1166
Serhiy Storchakacb985562015-05-04 15:32:48 +03001167/*[clinic input]
1168_elementtree.Element.extend
1169
1170 elements: object
1171 /
1172
1173[clinic start generated code]*/
1174
1175static PyObject *
1176_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1177/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001178{
1179 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001180 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001181
Serhiy Storchakacb985562015-05-04 15:32:48 +03001182 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001183 if (!seq) {
1184 PyErr_Format(
1185 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001186 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001187 );
1188 return NULL;
1189 }
1190
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001192 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001194 if (element_add_subelement(self, element) < 0) {
1195 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001196 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197 return NULL;
1198 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001199 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001200 }
1201
1202 Py_DECREF(seq);
1203
1204 Py_RETURN_NONE;
1205}
1206
Serhiy Storchakacb985562015-05-04 15:32:48 +03001207/*[clinic input]
1208_elementtree.Element.find
1209
1210 path: object
1211 namespaces: object = None
1212
1213[clinic start generated code]*/
1214
1215static PyObject *
1216_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1217 PyObject *namespaces)
1218/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001220 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001221 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001222
Serhiy Storchakacb985562015-05-04 15:32:48 +03001223 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001224 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001225 return _PyObject_CallMethodIdObjArgs(
1226 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001228 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229
1230 if (!self->extra)
1231 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001232
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 for (i = 0; i < self->extra->length; i++) {
1234 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001235 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001236 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001238 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001240 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 Py_DECREF(item);
1242 if (rc < 0)
1243 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 }
1245
1246 Py_RETURN_NONE;
1247}
1248
Serhiy Storchakacb985562015-05-04 15:32:48 +03001249/*[clinic input]
1250_elementtree.Element.findtext
1251
1252 path: object
1253 default: object = None
1254 namespaces: object = None
1255
1256[clinic start generated code]*/
1257
1258static PyObject *
1259_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1260 PyObject *default_value,
1261 PyObject *namespaces)
1262/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001263{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001264 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001265 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001266 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001267
Serhiy Storchakacb985562015-05-04 15:32:48 +03001268 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001269 return _PyObject_CallMethodIdObjArgs(
1270 st->elementpath_obj, &PyId_findtext,
1271 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272 );
1273
1274 if (!self->extra) {
1275 Py_INCREF(default_value);
1276 return default_value;
1277 }
1278
1279 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001280 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001281 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001282 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001283 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001284 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001285 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001286 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001287 if (text == Py_None) {
1288 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001289 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001290 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001291 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293 return text;
1294 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 Py_DECREF(item);
1296 if (rc < 0)
1297 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 }
1299
1300 Py_INCREF(default_value);
1301 return default_value;
1302}
1303
Serhiy Storchakacb985562015-05-04 15:32:48 +03001304/*[clinic input]
1305_elementtree.Element.findall
1306
1307 path: object
1308 namespaces: object = None
1309
1310[clinic start generated code]*/
1311
1312static PyObject *
1313_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1314 PyObject *namespaces)
1315/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001316{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001317 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001319 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001320
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001321 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001322 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001323 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001324 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001325 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001326 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001327
1328 out = PyList_New(0);
1329 if (!out)
1330 return NULL;
1331
1332 if (!self->extra)
1333 return out;
1334
1335 for (i = 0; i < self->extra->length; i++) {
1336 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001337 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001338 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001339 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001340 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001341 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1342 Py_DECREF(item);
1343 Py_DECREF(out);
1344 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001346 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347 }
1348
1349 return out;
1350}
1351
Serhiy Storchakacb985562015-05-04 15:32:48 +03001352/*[clinic input]
1353_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001354
Serhiy Storchakacb985562015-05-04 15:32:48 +03001355 path: object
1356 namespaces: object = None
1357
1358[clinic start generated code]*/
1359
1360static PyObject *
1361_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1362 PyObject *namespaces)
1363/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1364{
1365 PyObject* tag = path;
1366 _Py_IDENTIFIER(iterfind);
1367 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001368
Victor Stinnerf5616342016-12-09 15:26:00 +01001369 return _PyObject_CallMethodIdObjArgs(
1370 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001371}
1372
Serhiy Storchakacb985562015-05-04 15:32:48 +03001373/*[clinic input]
1374_elementtree.Element.get
1375
1376 key: object
1377 default: object = None
1378
1379[clinic start generated code]*/
1380
1381static PyObject *
1382_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1383 PyObject *default_value)
1384/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001385{
1386 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001387
1388 if (!self->extra || self->extra->attrib == Py_None)
1389 value = default_value;
1390 else {
1391 value = PyDict_GetItem(self->extra->attrib, key);
1392 if (!value)
1393 value = default_value;
1394 }
1395
1396 Py_INCREF(value);
1397 return value;
1398}
1399
Serhiy Storchakacb985562015-05-04 15:32:48 +03001400/*[clinic input]
1401_elementtree.Element.getchildren
1402
1403[clinic start generated code]*/
1404
1405static PyObject *
1406_elementtree_Element_getchildren_impl(ElementObject *self)
1407/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001409 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410 PyObject* list;
1411
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001412 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1413 "This method will be removed in future versions. "
1414 "Use 'list(elem)' or iteration over elem instead.",
1415 1) < 0) {
1416 return NULL;
1417 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001418
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001419 if (!self->extra)
1420 return PyList_New(0);
1421
1422 list = PyList_New(self->extra->length);
1423 if (!list)
1424 return NULL;
1425
1426 for (i = 0; i < self->extra->length; i++) {
1427 PyObject* item = self->extra->children[i];
1428 Py_INCREF(item);
1429 PyList_SET_ITEM(list, i, item);
1430 }
1431
1432 return list;
1433}
1434
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001435
Eli Bendersky64d11e62012-06-15 07:42:50 +03001436static PyObject *
1437create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1438
1439
Serhiy Storchakacb985562015-05-04 15:32:48 +03001440/*[clinic input]
1441_elementtree.Element.iter
1442
1443 tag: object = None
1444
1445[clinic start generated code]*/
1446
Eli Bendersky64d11e62012-06-15 07:42:50 +03001447static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001448_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1449/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001450{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001451 if (PyUnicode_Check(tag)) {
1452 if (PyUnicode_READY(tag) < 0)
1453 return NULL;
1454 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1455 tag = Py_None;
1456 }
1457 else if (PyBytes_Check(tag)) {
1458 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1459 tag = Py_None;
1460 }
1461
Eli Bendersky64d11e62012-06-15 07:42:50 +03001462 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001463}
1464
1465
Serhiy Storchakacb985562015-05-04 15:32:48 +03001466/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001467_elementtree.Element.getiterator
1468
1469 tag: object = None
1470
1471[clinic start generated code]*/
1472
1473static PyObject *
1474_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1475/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1476{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001477 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001478 "This method will be removed in future versions. "
1479 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1480 1) < 0) {
1481 return NULL;
1482 }
1483 return _elementtree_Element_iter_impl(self, tag);
1484}
1485
1486
1487/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001488_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001489
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490[clinic start generated code]*/
1491
1492static PyObject *
1493_elementtree_Element_itertext_impl(ElementObject *self)
1494/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1495{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001496 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497}
1498
Eli Bendersky64d11e62012-06-15 07:42:50 +03001499
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001501element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001503 ElementObject* self = (ElementObject*) self_;
1504
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505 if (!self->extra || index < 0 || index >= self->extra->length) {
1506 PyErr_SetString(
1507 PyExc_IndexError,
1508 "child index out of range"
1509 );
1510 return NULL;
1511 }
1512
1513 Py_INCREF(self->extra->children[index]);
1514 return self->extra->children[index];
1515}
1516
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517/*[clinic input]
1518_elementtree.Element.insert
1519
1520 index: Py_ssize_t
1521 subelement: object(subclass_of='&Element_Type')
1522 /
1523
1524[clinic start generated code]*/
1525
1526static PyObject *
1527_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1528 PyObject *subelement)
1529/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001530{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001531 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532
Victor Stinner5f0af232013-07-11 23:01:36 +02001533 if (!self->extra) {
1534 if (create_extra(self, NULL) < 0)
1535 return NULL;
1536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001538 if (index < 0) {
1539 index += self->extra->length;
1540 if (index < 0)
1541 index = 0;
1542 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543 if (index > self->extra->length)
1544 index = self->extra->length;
1545
1546 if (element_resize(self, 1) < 0)
1547 return NULL;
1548
1549 for (i = self->extra->length; i > index; i--)
1550 self->extra->children[i] = self->extra->children[i-1];
1551
Serhiy Storchakacb985562015-05-04 15:32:48 +03001552 Py_INCREF(subelement);
1553 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554
1555 self->extra->length++;
1556
1557 Py_RETURN_NONE;
1558}
1559
Serhiy Storchakacb985562015-05-04 15:32:48 +03001560/*[clinic input]
1561_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001562
Serhiy Storchakacb985562015-05-04 15:32:48 +03001563[clinic start generated code]*/
1564
1565static PyObject *
1566_elementtree_Element_items_impl(ElementObject *self)
1567/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1568{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 if (!self->extra || self->extra->attrib == Py_None)
1570 return PyList_New(0);
1571
1572 return PyDict_Items(self->extra->attrib);
1573}
1574
Serhiy Storchakacb985562015-05-04 15:32:48 +03001575/*[clinic input]
1576_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577
Serhiy Storchakacb985562015-05-04 15:32:48 +03001578[clinic start generated code]*/
1579
1580static PyObject *
1581_elementtree_Element_keys_impl(ElementObject *self)
1582/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1583{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 if (!self->extra || self->extra->attrib == Py_None)
1585 return PyList_New(0);
1586
1587 return PyDict_Keys(self->extra->attrib);
1588}
1589
Martin v. Löwis18e16552006-02-15 17:27:45 +00001590static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591element_length(ElementObject* self)
1592{
1593 if (!self->extra)
1594 return 0;
1595
1596 return self->extra->length;
1597}
1598
Serhiy Storchakacb985562015-05-04 15:32:48 +03001599/*[clinic input]
1600_elementtree.Element.makeelement
1601
1602 tag: object
1603 attrib: object
1604 /
1605
1606[clinic start generated code]*/
1607
1608static PyObject *
1609_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1610 PyObject *attrib)
1611/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612{
1613 PyObject* elem;
1614
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 attrib = PyDict_Copy(attrib);
1616 if (!attrib)
1617 return NULL;
1618
Eli Bendersky092af1f2012-03-04 07:14:03 +02001619 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620
1621 Py_DECREF(attrib);
1622
1623 return elem;
1624}
1625
Serhiy Storchakacb985562015-05-04 15:32:48 +03001626/*[clinic input]
1627_elementtree.Element.remove
1628
1629 subelement: object(subclass_of='&Element_Type')
1630 /
1631
1632[clinic start generated code]*/
1633
1634static PyObject *
1635_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1636/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001638 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001639 int rc;
1640 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642 if (!self->extra) {
1643 /* element has no children, so raise exception */
1644 PyErr_SetString(
1645 PyExc_ValueError,
1646 "list.remove(x): x not in list"
1647 );
1648 return NULL;
1649 }
1650
1651 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001652 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001654 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001655 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001656 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001657 if (rc < 0)
1658 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001659 }
1660
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001661 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001662 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001663 PyErr_SetString(
1664 PyExc_ValueError,
1665 "list.remove(x): x not in list"
1666 );
1667 return NULL;
1668 }
1669
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001670 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671
1672 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673 for (; i < self->extra->length; i++)
1674 self->extra->children[i] = self->extra->children[i+1];
1675
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001676 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001677 Py_RETURN_NONE;
1678}
1679
1680static PyObject*
1681element_repr(ElementObject* self)
1682{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001683 int status;
1684
1685 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001686 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001687
1688 status = Py_ReprEnter((PyObject *)self);
1689 if (status == 0) {
1690 PyObject *res;
1691 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1692 Py_ReprLeave((PyObject *)self);
1693 return res;
1694 }
1695 if (status > 0)
1696 PyErr_Format(PyExc_RuntimeError,
1697 "reentrant call inside %s.__repr__",
1698 Py_TYPE(self)->tp_name);
1699 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001700}
1701
Serhiy Storchakacb985562015-05-04 15:32:48 +03001702/*[clinic input]
1703_elementtree.Element.set
1704
1705 key: object
1706 value: object
1707 /
1708
1709[clinic start generated code]*/
1710
1711static PyObject *
1712_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1713 PyObject *value)
1714/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001715{
1716 PyObject* attrib;
1717
Victor Stinner5f0af232013-07-11 23:01:36 +02001718 if (!self->extra) {
1719 if (create_extra(self, NULL) < 0)
1720 return NULL;
1721 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001722
1723 attrib = element_get_attrib(self);
1724 if (!attrib)
1725 return NULL;
1726
1727 if (PyDict_SetItem(attrib, key, value) < 0)
1728 return NULL;
1729
1730 Py_RETURN_NONE;
1731}
1732
1733static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001734element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001735{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001736 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001737 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738 PyObject* old;
1739
1740 if (!self->extra || index < 0 || index >= self->extra->length) {
1741 PyErr_SetString(
1742 PyExc_IndexError,
1743 "child assignment index out of range");
1744 return -1;
1745 }
1746
1747 old = self->extra->children[index];
1748
1749 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001750 if (!Element_Check(item)) {
1751 raise_type_error(item);
1752 return -1;
1753 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 Py_INCREF(item);
1755 self->extra->children[index] = item;
1756 } else {
1757 self->extra->length--;
1758 for (i = index; i < self->extra->length; i++)
1759 self->extra->children[i] = self->extra->children[i+1];
1760 }
1761
1762 Py_DECREF(old);
1763
1764 return 0;
1765}
1766
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767static PyObject*
1768element_subscr(PyObject* self_, PyObject* item)
1769{
1770 ElementObject* self = (ElementObject*) self_;
1771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772 if (PyIndex_Check(item)) {
1773 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001774
1775 if (i == -1 && PyErr_Occurred()) {
1776 return NULL;
1777 }
1778 if (i < 0 && self->extra)
1779 i += self->extra->length;
1780 return element_getitem(self_, i);
1781 }
1782 else if (PySlice_Check(item)) {
1783 Py_ssize_t start, stop, step, slicelen, cur, i;
1784 PyObject* list;
1785
1786 if (!self->extra)
1787 return PyList_New(0);
1788
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001789 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001790 return NULL;
1791 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001792 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1793 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001794
1795 if (slicelen <= 0)
1796 return PyList_New(0);
1797 else {
1798 list = PyList_New(slicelen);
1799 if (!list)
1800 return NULL;
1801
1802 for (cur = start, i = 0; i < slicelen;
1803 cur += step, i++) {
1804 PyObject* item = self->extra->children[cur];
1805 Py_INCREF(item);
1806 PyList_SET_ITEM(list, i, item);
1807 }
1808
1809 return list;
1810 }
1811 }
1812 else {
1813 PyErr_SetString(PyExc_TypeError,
1814 "element indices must be integers");
1815 return NULL;
1816 }
1817}
1818
1819static int
1820element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1821{
1822 ElementObject* self = (ElementObject*) self_;
1823
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001824 if (PyIndex_Check(item)) {
1825 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001826
1827 if (i == -1 && PyErr_Occurred()) {
1828 return -1;
1829 }
1830 if (i < 0 && self->extra)
1831 i += self->extra->length;
1832 return element_setitem(self_, i, value);
1833 }
1834 else if (PySlice_Check(item)) {
1835 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1836
1837 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001838 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001839
Victor Stinner5f0af232013-07-11 23:01:36 +02001840 if (!self->extra) {
1841 if (create_extra(self, NULL) < 0)
1842 return -1;
1843 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001844
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001845 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001846 return -1;
1847 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001848 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1849 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850
Eli Bendersky865756a2012-03-09 13:38:15 +02001851 if (value == NULL) {
1852 /* Delete slice */
1853 size_t cur;
1854 Py_ssize_t i;
1855
1856 if (slicelen <= 0)
1857 return 0;
1858
1859 /* Since we're deleting, the direction of the range doesn't matter,
1860 * so for simplicity make it always ascending.
1861 */
1862 if (step < 0) {
1863 stop = start + 1;
1864 start = stop + step * (slicelen - 1) - 1;
1865 step = -step;
1866 }
1867
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001868 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001869
1870 /* recycle is a list that will contain all the children
1871 * scheduled for removal.
1872 */
1873 if (!(recycle = PyList_New(slicelen))) {
1874 PyErr_NoMemory();
1875 return -1;
1876 }
1877
1878 /* This loop walks over all the children that have to be deleted,
1879 * with cur pointing at them. num_moved is the amount of children
1880 * until the next deleted child that have to be "shifted down" to
1881 * occupy the deleted's places.
1882 * Note that in the ith iteration, shifting is done i+i places down
1883 * because i children were already removed.
1884 */
1885 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1886 /* Compute how many children have to be moved, clipping at the
1887 * list end.
1888 */
1889 Py_ssize_t num_moved = step - 1;
1890 if (cur + step >= (size_t)self->extra->length) {
1891 num_moved = self->extra->length - cur - 1;
1892 }
1893
1894 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1895
1896 memmove(
1897 self->extra->children + cur - i,
1898 self->extra->children + cur + 1,
1899 num_moved * sizeof(PyObject *));
1900 }
1901
1902 /* Leftover "tail" after the last removed child */
1903 cur = start + (size_t)slicelen * step;
1904 if (cur < (size_t)self->extra->length) {
1905 memmove(
1906 self->extra->children + cur - slicelen,
1907 self->extra->children + cur,
1908 (self->extra->length - cur) * sizeof(PyObject *));
1909 }
1910
1911 self->extra->length -= slicelen;
1912
1913 /* Discard the recycle list with all the deleted sub-elements */
1914 Py_XDECREF(recycle);
1915 return 0;
1916 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001917
1918 /* A new slice is actually being assigned */
1919 seq = PySequence_Fast(value, "");
1920 if (!seq) {
1921 PyErr_Format(
1922 PyExc_TypeError,
1923 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1924 );
1925 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001926 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001927 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001928
1929 if (step != 1 && newlen != slicelen)
1930 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001931 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001932 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001933 "attempt to assign sequence of size %zd "
1934 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001935 newlen, slicelen
1936 );
1937 return -1;
1938 }
1939
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001940 /* Resize before creating the recycle bin, to prevent refleaks. */
1941 if (newlen > slicelen) {
1942 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001943 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944 return -1;
1945 }
1946 }
1947
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001948 for (i = 0; i < newlen; i++) {
1949 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1950 if (!Element_Check(element)) {
1951 raise_type_error(element);
1952 Py_DECREF(seq);
1953 return -1;
1954 }
1955 }
1956
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001957 if (slicelen > 0) {
1958 /* to avoid recursive calls to this method (via decref), move
1959 old items to the recycle bin here, and get rid of them when
1960 we're done modifying the element */
1961 recycle = PyList_New(slicelen);
1962 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001963 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001964 return -1;
1965 }
1966 for (cur = start, i = 0; i < slicelen;
1967 cur += step, i++)
1968 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1969 }
1970
1971 if (newlen < slicelen) {
1972 /* delete slice */
1973 for (i = stop; i < self->extra->length; i++)
1974 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1975 } else if (newlen > slicelen) {
1976 /* insert slice */
1977 for (i = self->extra->length-1; i >= stop; i--)
1978 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1979 }
1980
1981 /* replace the slice */
1982 for (cur = start, i = 0; i < newlen;
1983 cur += step, i++) {
1984 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1985 Py_INCREF(element);
1986 self->extra->children[cur] = element;
1987 }
1988
1989 self->extra->length += newlen - slicelen;
1990
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001991 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001992
1993 /* discard the recycle bin, and everything in it */
1994 Py_XDECREF(recycle);
1995
1996 return 0;
1997 }
1998 else {
1999 PyErr_SetString(PyExc_TypeError,
2000 "element indices must be integers");
2001 return -1;
2002 }
2003}
2004
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002005static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002006element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002008 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002009 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 return res;
2011}
2012
Serhiy Storchakadde08152015-11-25 15:28:13 +02002013static PyObject*
2014element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002015{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002016 PyObject *res = element_get_text(self);
2017 Py_XINCREF(res);
2018 return res;
2019}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002020
Serhiy Storchakadde08152015-11-25 15:28:13 +02002021static PyObject*
2022element_tail_getter(ElementObject *self, void *closure)
2023{
2024 PyObject *res = element_get_tail(self);
2025 Py_XINCREF(res);
2026 return res;
2027}
2028
2029static PyObject*
2030element_attrib_getter(ElementObject *self, void *closure)
2031{
2032 PyObject *res;
2033 if (!self->extra) {
2034 if (create_extra(self, NULL) < 0)
2035 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002036 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002037 res = element_get_attrib(self);
2038 Py_XINCREF(res);
2039 return res;
2040}
Victor Stinner4d463432013-07-11 23:05:03 +02002041
Serhiy Storchakadde08152015-11-25 15:28:13 +02002042/* macro for setter validation */
2043#define _VALIDATE_ATTR_VALUE(V) \
2044 if ((V) == NULL) { \
2045 PyErr_SetString( \
2046 PyExc_AttributeError, \
2047 "can't delete element attribute"); \
2048 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002049 }
2050
Serhiy Storchakadde08152015-11-25 15:28:13 +02002051static int
2052element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2053{
2054 _VALIDATE_ATTR_VALUE(value);
2055 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002056 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002057 return 0;
2058}
2059
2060static int
2061element_text_setter(ElementObject *self, PyObject *value, void *closure)
2062{
2063 _VALIDATE_ATTR_VALUE(value);
2064 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002065 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002066 return 0;
2067}
2068
2069static int
2070element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2071{
2072 _VALIDATE_ATTR_VALUE(value);
2073 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002074 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002075 return 0;
2076}
2077
2078static int
2079element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2080{
2081 _VALIDATE_ATTR_VALUE(value);
2082 if (!self->extra) {
2083 if (create_extra(self, NULL) < 0)
2084 return -1;
2085 }
2086 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002087 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002088 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002089}
2090
2091static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002092 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002093 0, /* sq_concat */
2094 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002095 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002096 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002097 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002098 0,
2099};
2100
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101/******************************* Element iterator ****************************/
2102
2103/* ElementIterObject represents the iteration state over an XML element in
2104 * pre-order traversal. To keep track of which sub-element should be returned
2105 * next, a stack of parents is maintained. This is a standard stack-based
2106 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002107 * The stack is managed using a continuous array.
2108 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002109 * the current one is exhausted, and the next child to examine in that parent.
2110 */
2111typedef struct ParentLocator_t {
2112 ElementObject *parent;
2113 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114} ParentLocator;
2115
2116typedef struct {
2117 PyObject_HEAD
2118 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119 Py_ssize_t parent_stack_used;
2120 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 ElementObject *root_element;
2122 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123 int gettext;
2124} ElementIterObject;
2125
2126
2127static void
2128elementiter_dealloc(ElementIterObject *it)
2129{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 Py_ssize_t i = it->parent_stack_used;
2131 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002132 /* bpo-31095: UnTrack is needed before calling any callbacks */
2133 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 while (i--)
2135 Py_XDECREF(it->parent_stack[i].parent);
2136 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002137
2138 Py_XDECREF(it->sought_tag);
2139 Py_XDECREF(it->root_element);
2140
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141 PyObject_GC_Del(it);
2142}
2143
2144static int
2145elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2146{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002147 Py_ssize_t i = it->parent_stack_used;
2148 while (i--)
2149 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002150
2151 Py_VISIT(it->root_element);
2152 Py_VISIT(it->sought_tag);
2153 return 0;
2154}
2155
2156/* Helper function for elementiter_next. Add a new parent to the parent stack.
2157 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002158static int
2159parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002161 ParentLocator *item;
2162
2163 if (it->parent_stack_used >= it->parent_stack_size) {
2164 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2165 ParentLocator *parent_stack = it->parent_stack;
2166 PyMem_Resize(parent_stack, ParentLocator, new_size);
2167 if (parent_stack == NULL)
2168 return -1;
2169 it->parent_stack = parent_stack;
2170 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002172 item = it->parent_stack + it->parent_stack_used++;
2173 Py_INCREF(parent);
2174 item->parent = parent;
2175 item->child_index = 0;
2176 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002177}
2178
2179static PyObject *
2180elementiter_next(ElementIterObject *it)
2181{
2182 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002183 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002184 * A short note on gettext: this function serves both the iter() and
2185 * itertext() methods to avoid code duplication. However, there are a few
2186 * small differences in the way these iterations work. Namely:
2187 * - itertext() only yields text from nodes that have it, and continues
2188 * iterating when a node doesn't have text (so it doesn't return any
2189 * node like iter())
2190 * - itertext() also has to handle tail, after finishing with all the
2191 * children of a node.
2192 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002193 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002194 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002195 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002196
2197 while (1) {
2198 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002199 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 * iterator is exhausted.
2201 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002202 if (!it->parent_stack_used) {
2203 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002204 PyErr_SetNone(PyExc_StopIteration);
2205 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002206 }
2207
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002208 elem = it->root_element; /* steals a reference */
2209 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002210 }
2211 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002212 /* See if there are children left to traverse in the current parent. If
2213 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002214 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002215 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2216 Py_ssize_t child_index = item->child_index;
2217 ElementObjectExtra *extra;
2218 elem = item->parent;
2219 extra = elem->extra;
2220 if (!extra || child_index >= extra->length) {
2221 it->parent_stack_used--;
2222 /* Note that extra condition on it->parent_stack_used here;
2223 * this is because itertext() is supposed to only return *inner*
2224 * text, not text following the element it began iteration with.
2225 */
2226 if (it->gettext && it->parent_stack_used) {
2227 text = element_get_tail(elem);
2228 goto gettext;
2229 }
2230 Py_DECREF(elem);
2231 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002232 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002233
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002234 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002235 elem = (ElementObject *)extra->children[child_index];
2236 item->child_index++;
2237 Py_INCREF(elem);
2238 }
2239
2240 if (parent_stack_push_new(it, elem) < 0) {
2241 Py_DECREF(elem);
2242 PyErr_NoMemory();
2243 return NULL;
2244 }
2245 if (it->gettext) {
2246 text = element_get_text(elem);
2247 goto gettext;
2248 }
2249
2250 if (it->sought_tag == Py_None)
2251 return (PyObject *)elem;
2252
2253 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2254 if (rc > 0)
2255 return (PyObject *)elem;
2256
2257 Py_DECREF(elem);
2258 if (rc < 0)
2259 return NULL;
2260 continue;
2261
2262gettext:
2263 if (!text) {
2264 Py_DECREF(elem);
2265 return NULL;
2266 }
2267 if (text == Py_None) {
2268 Py_DECREF(elem);
2269 }
2270 else {
2271 Py_INCREF(text);
2272 Py_DECREF(elem);
2273 rc = PyObject_IsTrue(text);
2274 if (rc > 0)
2275 return text;
2276 Py_DECREF(text);
2277 if (rc < 0)
2278 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002279 }
2280 }
2281
2282 return NULL;
2283}
2284
2285
2286static PyTypeObject ElementIter_Type = {
2287 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002288 /* Using the module's name since the pure-Python implementation does not
2289 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002290 "_elementtree._element_iterator", /* tp_name */
2291 sizeof(ElementIterObject), /* tp_basicsize */
2292 0, /* tp_itemsize */
2293 /* methods */
2294 (destructor)elementiter_dealloc, /* tp_dealloc */
2295 0, /* tp_print */
2296 0, /* tp_getattr */
2297 0, /* tp_setattr */
2298 0, /* tp_reserved */
2299 0, /* tp_repr */
2300 0, /* tp_as_number */
2301 0, /* tp_as_sequence */
2302 0, /* tp_as_mapping */
2303 0, /* tp_hash */
2304 0, /* tp_call */
2305 0, /* tp_str */
2306 0, /* tp_getattro */
2307 0, /* tp_setattro */
2308 0, /* tp_as_buffer */
2309 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2310 0, /* tp_doc */
2311 (traverseproc)elementiter_traverse, /* tp_traverse */
2312 0, /* tp_clear */
2313 0, /* tp_richcompare */
2314 0, /* tp_weaklistoffset */
2315 PyObject_SelfIter, /* tp_iter */
2316 (iternextfunc)elementiter_next, /* tp_iternext */
2317 0, /* tp_methods */
2318 0, /* tp_members */
2319 0, /* tp_getset */
2320 0, /* tp_base */
2321 0, /* tp_dict */
2322 0, /* tp_descr_get */
2323 0, /* tp_descr_set */
2324 0, /* tp_dictoffset */
2325 0, /* tp_init */
2326 0, /* tp_alloc */
2327 0, /* tp_new */
2328};
2329
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002330#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002331
2332static PyObject *
2333create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2334{
2335 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002336
2337 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2338 if (!it)
2339 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002340
Victor Stinner4d463432013-07-11 23:05:03 +02002341 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002342 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002343 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002344 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002345 it->root_element = self;
2346
Eli Bendersky64d11e62012-06-15 07:42:50 +03002347 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002348
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002349 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002350 if (it->parent_stack == NULL) {
2351 Py_DECREF(it);
2352 PyErr_NoMemory();
2353 return NULL;
2354 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002355 it->parent_stack_used = 0;
2356 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002357
Eli Bendersky64d11e62012-06-15 07:42:50 +03002358 return (PyObject *)it;
2359}
2360
2361
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362/* ==================================================================== */
2363/* the tree builder type */
2364
2365typedef struct {
2366 PyObject_HEAD
2367
Eli Bendersky58d548d2012-05-29 15:45:16 +03002368 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369
Antoine Pitrouee329312012-10-04 19:53:29 +02002370 PyObject *this; /* current node */
2371 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372
Eli Bendersky58d548d2012-05-29 15:45:16 +03002373 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374
Eli Bendersky58d548d2012-05-29 15:45:16 +03002375 PyObject *stack; /* element stack */
2376 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377
Eli Bendersky48d358b2012-05-30 17:57:50 +03002378 PyObject *element_factory;
2379
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002381 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002382 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2383 PyObject *end_event_obj;
2384 PyObject *start_ns_event_obj;
2385 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386} TreeBuilderObject;
2387
Christian Heimes90aa7642007-12-19 02:45:37 +00002388#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389
2390/* -------------------------------------------------------------------- */
2391/* constructor and destructor */
2392
Eli Bendersky58d548d2012-05-29 15:45:16 +03002393static PyObject *
2394treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002396 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2397 if (t != NULL) {
2398 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002401 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002402 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002403 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404
Eli Bendersky58d548d2012-05-29 15:45:16 +03002405 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002406 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002407 t->stack = PyList_New(20);
2408 if (!t->stack) {
2409 Py_DECREF(t->this);
2410 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002411 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002412 return NULL;
2413 }
2414 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002416 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002417 t->start_event_obj = t->end_event_obj = NULL;
2418 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2419 }
2420 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421}
2422
Serhiy Storchakacb985562015-05-04 15:32:48 +03002423/*[clinic input]
2424_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002425
Serhiy Storchakacb985562015-05-04 15:32:48 +03002426 element_factory: object = NULL
2427
2428[clinic start generated code]*/
2429
2430static int
2431_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2432 PyObject *element_factory)
2433/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2434{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002435 if (element_factory) {
2436 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002437 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002438 }
2439
Eli Bendersky58d548d2012-05-29 15:45:16 +03002440 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002441}
2442
Eli Bendersky48d358b2012-05-30 17:57:50 +03002443static int
2444treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2445{
2446 Py_VISIT(self->root);
2447 Py_VISIT(self->this);
2448 Py_VISIT(self->last);
2449 Py_VISIT(self->data);
2450 Py_VISIT(self->stack);
2451 Py_VISIT(self->element_factory);
2452 return 0;
2453}
2454
2455static int
2456treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002458 Py_CLEAR(self->end_ns_event_obj);
2459 Py_CLEAR(self->start_ns_event_obj);
2460 Py_CLEAR(self->end_event_obj);
2461 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002462 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002463 Py_CLEAR(self->stack);
2464 Py_CLEAR(self->data);
2465 Py_CLEAR(self->last);
2466 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002467 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002468 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002469 return 0;
2470}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471
Eli Bendersky48d358b2012-05-30 17:57:50 +03002472static void
2473treebuilder_dealloc(TreeBuilderObject *self)
2474{
2475 PyObject_GC_UnTrack(self);
2476 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002477 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002478}
2479
2480/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002481/* helpers for handling of arbitrary element-like objects */
2482
2483static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002484treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002485 PyObject **dest, _Py_Identifier *name)
2486{
2487 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002488 PyObject *tmp = JOIN_OBJ(*dest);
2489 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2490 *data = NULL;
2491 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002492 return 0;
2493 }
2494 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002495 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002496 int r;
2497 if (joined == NULL)
2498 return -1;
2499 r = _PyObject_SetAttrId(element, name, joined);
2500 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002501 if (r < 0)
2502 return -1;
2503 Py_CLEAR(*data);
2504 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002505 }
2506}
2507
Serhiy Storchaka576def02017-03-30 09:47:31 +03002508LOCAL(int)
2509treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002510{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002511 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002512
Serhiy Storchaka576def02017-03-30 09:47:31 +03002513 if (!self->data) {
2514 return 0;
2515 }
2516
2517 if (self->this == element) {
2518 _Py_IDENTIFIER(text);
2519 return treebuilder_set_element_text_or_tail(
2520 element, &self->data,
2521 &((ElementObject *) element)->text, &PyId_text);
2522 }
2523 else {
2524 _Py_IDENTIFIER(tail);
2525 return treebuilder_set_element_text_or_tail(
2526 element, &self->data,
2527 &((ElementObject *) element)->tail, &PyId_tail);
2528 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002529}
2530
2531static int
2532treebuilder_add_subelement(PyObject *element, PyObject *child)
2533{
2534 _Py_IDENTIFIER(append);
2535 if (Element_CheckExact(element)) {
2536 ElementObject *elem = (ElementObject *) element;
2537 return element_add_subelement(elem, child);
2538 }
2539 else {
2540 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002541 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002542 if (res == NULL)
2543 return -1;
2544 Py_DECREF(res);
2545 return 0;
2546 }
2547}
2548
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002549LOCAL(int)
2550treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2551 PyObject *node)
2552{
2553 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002554 PyObject *res;
2555 PyObject *event = PyTuple_Pack(2, action, node);
2556 if (event == NULL)
2557 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002558 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002559 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002560 if (res == NULL)
2561 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002562 Py_DECREF(res);
2563 }
2564 return 0;
2565}
2566
Antoine Pitrouee329312012-10-04 19:53:29 +02002567/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568/* handlers */
2569
2570LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2572 PyObject* attrib)
2573{
2574 PyObject* node;
2575 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002576 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577
Serhiy Storchaka576def02017-03-30 09:47:31 +03002578 if (treebuilder_flush_data(self) < 0) {
2579 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 }
2581
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002582 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002583 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002584 } else if (attrib == Py_None) {
2585 attrib = PyDict_New();
2586 if (!attrib)
2587 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002588 node = PyObject_CallFunctionObjArgs(self->element_factory,
2589 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002590 Py_DECREF(attrib);
2591 }
2592 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002593 node = PyObject_CallFunctionObjArgs(self->element_factory,
2594 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002595 }
2596 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002598 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599
Antoine Pitrouee329312012-10-04 19:53:29 +02002600 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601
2602 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002603 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002604 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 } else {
2606 if (self->root) {
2607 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002608 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002609 "multiple elements on top level"
2610 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002611 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 }
2613 Py_INCREF(node);
2614 self->root = node;
2615 }
2616
2617 if (self->index < PyList_GET_SIZE(self->stack)) {
2618 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002619 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 Py_INCREF(this);
2621 } else {
2622 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002623 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624 }
2625 self->index++;
2626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002628 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002630 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002632 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2633 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634
2635 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002636
2637 error:
2638 Py_DECREF(node);
2639 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640}
2641
2642LOCAL(PyObject*)
2643treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2644{
2645 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002646 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002647 /* ignore calls to data before the first call to start */
2648 Py_RETURN_NONE;
2649 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650 /* store the first item as is */
2651 Py_INCREF(data); self->data = data;
2652 } else {
2653 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002654 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2655 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002656 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657 /* expat often generates single character data sections; handle
2658 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002659 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2660 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002662 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 } else if (PyList_CheckExact(self->data)) {
2664 if (PyList_Append(self->data, data) < 0)
2665 return NULL;
2666 } else {
2667 PyObject* list = PyList_New(2);
2668 if (!list)
2669 return NULL;
2670 PyList_SET_ITEM(list, 0, self->data);
2671 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2672 self->data = list;
2673 }
2674 }
2675
2676 Py_RETURN_NONE;
2677}
2678
2679LOCAL(PyObject*)
2680treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2681{
2682 PyObject* item;
2683
Serhiy Storchaka576def02017-03-30 09:47:31 +03002684 if (treebuilder_flush_data(self) < 0) {
2685 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686 }
2687
2688 if (self->index == 0) {
2689 PyErr_SetString(
2690 PyExc_IndexError,
2691 "pop from empty stack"
2692 );
2693 return NULL;
2694 }
2695
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002696 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002697 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002698 self->index--;
2699 self->this = PyList_GET_ITEM(self->stack, self->index);
2700 Py_INCREF(self->this);
2701 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002703 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2704 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705
2706 Py_INCREF(self->last);
2707 return (PyObject*) self->last;
2708}
2709
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710/* -------------------------------------------------------------------- */
2711/* methods (in alphabetical order) */
2712
Serhiy Storchakacb985562015-05-04 15:32:48 +03002713/*[clinic input]
2714_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Serhiy Storchakacb985562015-05-04 15:32:48 +03002716 data: object
2717 /
2718
2719[clinic start generated code]*/
2720
2721static PyObject *
2722_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2723/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2724{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725 return treebuilder_handle_data(self, data);
2726}
2727
Serhiy Storchakacb985562015-05-04 15:32:48 +03002728/*[clinic input]
2729_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730
Serhiy Storchakacb985562015-05-04 15:32:48 +03002731 tag: object
2732 /
2733
2734[clinic start generated code]*/
2735
2736static PyObject *
2737_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2738/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2739{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740 return treebuilder_handle_end(self, tag);
2741}
2742
2743LOCAL(PyObject*)
2744treebuilder_done(TreeBuilderObject* self)
2745{
2746 PyObject* res;
2747
2748 /* FIXME: check stack size? */
2749
2750 if (self->root)
2751 res = self->root;
2752 else
2753 res = Py_None;
2754
2755 Py_INCREF(res);
2756 return res;
2757}
2758
Serhiy Storchakacb985562015-05-04 15:32:48 +03002759/*[clinic input]
2760_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761
Serhiy Storchakacb985562015-05-04 15:32:48 +03002762[clinic start generated code]*/
2763
2764static PyObject *
2765_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2766/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2767{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 return treebuilder_done(self);
2769}
2770
Serhiy Storchakacb985562015-05-04 15:32:48 +03002771/*[clinic input]
2772_elementtree.TreeBuilder.start
2773
2774 tag: object
2775 attrs: object = None
2776 /
2777
2778[clinic start generated code]*/
2779
2780static PyObject *
2781_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2782 PyObject *attrs)
2783/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002785 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786}
2787
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788/* ==================================================================== */
2789/* the expat interface */
2790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002793
2794/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2795 * cached globally without being in per-module state.
2796 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002797static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799
Eli Bendersky52467b12012-06-01 07:13:08 +03002800static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2801 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2802
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803typedef struct {
2804 PyObject_HEAD
2805
2806 XML_Parser parser;
2807
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002808 PyObject *target;
2809 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002811 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002813 PyObject *handle_start;
2814 PyObject *handle_data;
2815 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002817 PyObject *handle_comment;
2818 PyObject *handle_pi;
2819 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002821 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002822
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823} XMLParserObject;
2824
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825/* helpers */
2826
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827LOCAL(PyObject*)
2828makeuniversal(XMLParserObject* self, const char* string)
2829{
2830 /* convert a UTF-8 tag/attribute name from the expat parser
2831 to a universal name string */
2832
Antoine Pitrouc1948842012-10-01 23:40:37 +02002833 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 PyObject* key;
2835 PyObject* value;
2836
2837 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002838 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002839 if (!key)
2840 return NULL;
2841
2842 value = PyDict_GetItem(self->names, key);
2843
2844 if (value) {
2845 Py_INCREF(value);
2846 } else {
2847 /* new name. convert to universal name, and decode as
2848 necessary */
2849
2850 PyObject* tag;
2851 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002852 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002853
2854 /* look for namespace separator */
2855 for (i = 0; i < size; i++)
2856 if (string[i] == '}')
2857 break;
2858 if (i != size) {
2859 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002860 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002861 if (tag == NULL) {
2862 Py_DECREF(key);
2863 return NULL;
2864 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002865 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002866 p[0] = '{';
2867 memcpy(p+1, string, size);
2868 size++;
2869 } else {
2870 /* plain name; use key as tag */
2871 Py_INCREF(key);
2872 tag = key;
2873 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002874
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002875 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002876 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002877 value = PyUnicode_DecodeUTF8(p, size, "strict");
2878 Py_DECREF(tag);
2879 if (!value) {
2880 Py_DECREF(key);
2881 return NULL;
2882 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883
2884 /* add to names dictionary */
2885 if (PyDict_SetItem(self->names, key, value) < 0) {
2886 Py_DECREF(key);
2887 Py_DECREF(value);
2888 return NULL;
2889 }
2890 }
2891
2892 Py_DECREF(key);
2893 return value;
2894}
2895
Eli Bendersky5b77d812012-03-16 08:20:05 +02002896/* Set the ParseError exception with the given parameters.
2897 * If message is not NULL, it's used as the error string. Otherwise, the
2898 * message string is the default for the given error_code.
2899*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002900static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002901expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2902 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002904 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002905 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002906
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002907 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002908 message ? message : EXPAT(ErrorString)(error_code),
2909 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002910 if (errmsg == NULL)
2911 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002912
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002913 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002914 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002915 if (!error)
2916 return;
2917
Eli Bendersky5b77d812012-03-16 08:20:05 +02002918 /* Add code and position attributes */
2919 code = PyLong_FromLong((long)error_code);
2920 if (!code) {
2921 Py_DECREF(error);
2922 return;
2923 }
2924 if (PyObject_SetAttrString(error, "code", code) == -1) {
2925 Py_DECREF(error);
2926 Py_DECREF(code);
2927 return;
2928 }
2929 Py_DECREF(code);
2930
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002931 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 if (!position) {
2933 Py_DECREF(error);
2934 return;
2935 }
2936 if (PyObject_SetAttrString(error, "position", position) == -1) {
2937 Py_DECREF(error);
2938 Py_DECREF(position);
2939 return;
2940 }
2941 Py_DECREF(position);
2942
Eli Bendersky532d03e2013-08-10 08:00:39 -07002943 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002944 Py_DECREF(error);
2945}
2946
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947/* -------------------------------------------------------------------- */
2948/* handlers */
2949
2950static void
2951expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2952 int data_len)
2953{
2954 PyObject* key;
2955 PyObject* value;
2956 PyObject* res;
2957
2958 if (data_len < 2 || data_in[0] != '&')
2959 return;
2960
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002961 if (PyErr_Occurred())
2962 return;
2963
Neal Norwitz0269b912007-08-08 06:56:02 +00002964 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 if (!key)
2966 return;
2967
2968 value = PyDict_GetItem(self->entity, key);
2969
2970 if (value) {
2971 if (TreeBuilder_CheckExact(self->target))
2972 res = treebuilder_handle_data(
2973 (TreeBuilderObject*) self->target, value
2974 );
2975 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002976 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 else
2978 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002980 } else if (!PyErr_Occurred()) {
2981 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002982 char message[128] = "undefined entity ";
2983 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002984 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002985 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002987 EXPAT(GetErrorColumnNumber)(self->parser),
2988 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 );
2990 }
2991
2992 Py_DECREF(key);
2993}
2994
2995static void
2996expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2997 const XML_Char **attrib_in)
2998{
2999 PyObject* res;
3000 PyObject* tag;
3001 PyObject* attrib;
3002 int ok;
3003
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003004 if (PyErr_Occurred())
3005 return;
3006
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007 /* tag name */
3008 tag = makeuniversal(self, tag_in);
3009 if (!tag)
3010 return; /* parser will look for errors */
3011
3012 /* attributes */
3013 if (attrib_in[0]) {
3014 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003015 if (!attrib) {
3016 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003017 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003018 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019 while (attrib_in[0] && attrib_in[1]) {
3020 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003021 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 if (!key || !value) {
3023 Py_XDECREF(value);
3024 Py_XDECREF(key);
3025 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003026 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027 return;
3028 }
3029 ok = PyDict_SetItem(attrib, key, value);
3030 Py_DECREF(value);
3031 Py_DECREF(key);
3032 if (ok < 0) {
3033 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003034 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035 return;
3036 }
3037 attrib_in += 2;
3038 }
3039 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003040 Py_INCREF(Py_None);
3041 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003042 }
3043
3044 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045 /* shortcut */
3046 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3047 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003048 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003049 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003050 if (attrib == Py_None) {
3051 Py_DECREF(attrib);
3052 attrib = PyDict_New();
3053 if (!attrib) {
3054 Py_DECREF(tag);
3055 return;
3056 }
3057 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003058 res = PyObject_CallFunctionObjArgs(self->handle_start,
3059 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003060 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061 res = NULL;
3062
3063 Py_DECREF(tag);
3064 Py_DECREF(attrib);
3065
3066 Py_XDECREF(res);
3067}
3068
3069static void
3070expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3071 int data_len)
3072{
3073 PyObject* data;
3074 PyObject* res;
3075
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003076 if (PyErr_Occurred())
3077 return;
3078
Neal Norwitz0269b912007-08-08 06:56:02 +00003079 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003080 if (!data)
3081 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082
3083 if (TreeBuilder_CheckExact(self->target))
3084 /* shortcut */
3085 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3086 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003087 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003088 else
3089 res = NULL;
3090
3091 Py_DECREF(data);
3092
3093 Py_XDECREF(res);
3094}
3095
3096static void
3097expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3098{
3099 PyObject* tag;
3100 PyObject* res = NULL;
3101
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003102 if (PyErr_Occurred())
3103 return;
3104
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105 if (TreeBuilder_CheckExact(self->target))
3106 /* shortcut */
3107 /* the standard tree builder doesn't look at the end tag */
3108 res = treebuilder_handle_end(
3109 (TreeBuilderObject*) self->target, Py_None
3110 );
3111 else if (self->handle_end) {
3112 tag = makeuniversal(self, tag_in);
3113 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003114 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003115 Py_DECREF(tag);
3116 }
3117 }
3118
3119 Py_XDECREF(res);
3120}
3121
3122static void
3123expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3124 const XML_Char *uri)
3125{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003126 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3127 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003128
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003129 if (PyErr_Occurred())
3130 return;
3131
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003132 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003133 return;
3134
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003135 if (!uri)
3136 uri = "";
3137 if (!prefix)
3138 prefix = "";
3139
3140 parcel = Py_BuildValue("ss", prefix, uri);
3141 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003142 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003143 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3144 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003145}
3146
3147static void
3148expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3149{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003150 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3151
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003152 if (PyErr_Occurred())
3153 return;
3154
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003155 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003156 return;
3157
3158 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159}
3160
3161static void
3162expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3163{
3164 PyObject* comment;
3165 PyObject* res;
3166
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003167 if (PyErr_Occurred())
3168 return;
3169
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003171 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003173 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3174 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003175 Py_XDECREF(res);
3176 Py_DECREF(comment);
3177 }
3178 }
3179}
3180
Eli Bendersky45839902013-01-13 05:14:47 -08003181static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003182expat_start_doctype_handler(XMLParserObject *self,
3183 const XML_Char *doctype_name,
3184 const XML_Char *sysid,
3185 const XML_Char *pubid,
3186 int has_internal_subset)
3187{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003188 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003189 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003190 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003191
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003192 if (PyErr_Occurred())
3193 return;
3194
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003195 doctype_name_obj = makeuniversal(self, doctype_name);
3196 if (!doctype_name_obj)
3197 return;
3198
3199 if (sysid) {
3200 sysid_obj = makeuniversal(self, sysid);
3201 if (!sysid_obj) {
3202 Py_DECREF(doctype_name_obj);
3203 return;
3204 }
3205 } else {
3206 Py_INCREF(Py_None);
3207 sysid_obj = Py_None;
3208 }
3209
3210 if (pubid) {
3211 pubid_obj = makeuniversal(self, pubid);
3212 if (!pubid_obj) {
3213 Py_DECREF(doctype_name_obj);
3214 Py_DECREF(sysid_obj);
3215 return;
3216 }
3217 } else {
3218 Py_INCREF(Py_None);
3219 pubid_obj = Py_None;
3220 }
3221
3222 /* If the target has a handler for doctype, call it. */
3223 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003224 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3225 doctype_name_obj, pubid_obj,
3226 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003227 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003228 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003229 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3230 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3231 "The doctype() method of XMLParser is ignored. "
3232 "Define doctype() method on the TreeBuilder target.",
3233 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003234 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003235 }
3236
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003237 Py_DECREF(doctype_name_obj);
3238 Py_DECREF(pubid_obj);
3239 Py_DECREF(sysid_obj);
3240}
3241
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242static void
3243expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3244 const XML_Char* data_in)
3245{
3246 PyObject* target;
3247 PyObject* data;
3248 PyObject* res;
3249
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003250 if (PyErr_Occurred())
3251 return;
3252
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003254 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3255 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003257 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3258 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 Py_XDECREF(res);
3260 Py_DECREF(data);
3261 Py_DECREF(target);
3262 } else {
3263 Py_XDECREF(data);
3264 Py_XDECREF(target);
3265 }
3266 }
3267}
3268
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270
Eli Bendersky52467b12012-06-01 07:13:08 +03003271static PyObject *
3272xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273{
Eli Bendersky52467b12012-06-01 07:13:08 +03003274 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3275 if (self) {
3276 self->parser = NULL;
3277 self->target = self->entity = self->names = NULL;
3278 self->handle_start = self->handle_data = self->handle_end = NULL;
3279 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003280 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003282 return (PyObject *)self;
3283}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284
scoderc8d8e152017-09-14 22:00:03 +02003285static int
3286ignore_attribute_error(PyObject *value)
3287{
3288 if (value == NULL) {
3289 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3290 return -1;
3291 }
3292 PyErr_Clear();
3293 }
3294 return 0;
3295}
3296
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297/*[clinic input]
3298_elementtree.XMLParser.__init__
3299
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003300 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003302 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303
3304[clinic start generated code]*/
3305
Eli Bendersky52467b12012-06-01 07:13:08 +03003306static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003307_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3308 const char *encoding)
3309/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003310{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003311 self->entity = PyDict_New();
3312 if (!self->entity)
3313 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
Serhiy Storchakacb985562015-05-04 15:32:48 +03003315 self->names = PyDict_New();
3316 if (!self->names) {
3317 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003318 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003320
Serhiy Storchakacb985562015-05-04 15:32:48 +03003321 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3322 if (!self->parser) {
3323 Py_CLEAR(self->entity);
3324 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003326 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003328 /* expat < 2.1.0 has no XML_SetHashSalt() */
3329 if (EXPAT(SetHashSalt) != NULL) {
3330 EXPAT(SetHashSalt)(self->parser,
3331 (unsigned long)_Py_HashSecret.expat.hashsalt);
3332 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333
Eli Bendersky52467b12012-06-01 07:13:08 +03003334 if (target) {
3335 Py_INCREF(target);
3336 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003337 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003339 Py_CLEAR(self->entity);
3340 Py_CLEAR(self->names);
3341 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003342 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003344 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003345 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346
Serhiy Storchakacb985562015-05-04 15:32:48 +03003347 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003348 if (ignore_attribute_error(self->handle_start)) {
3349 return -1;
3350 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003351 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003352 if (ignore_attribute_error(self->handle_data)) {
3353 return -1;
3354 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003355 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003356 if (ignore_attribute_error(self->handle_end)) {
3357 return -1;
3358 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003360 if (ignore_attribute_error(self->handle_comment)) {
3361 return -1;
3362 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003363 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003364 if (ignore_attribute_error(self->handle_pi)) {
3365 return -1;
3366 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003367 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003368 if (ignore_attribute_error(self->handle_close)) {
3369 return -1;
3370 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003371 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003372 if (ignore_attribute_error(self->handle_doctype)) {
3373 return -1;
3374 }
Eli Bendersky45839902013-01-13 05:14:47 -08003375
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003376 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 (XML_StartElementHandler) expat_start_handler,
3381 (XML_EndElementHandler) expat_end_handler
3382 );
3383 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003384 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385 (XML_DefaultHandler) expat_default_handler
3386 );
3387 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003388 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 (XML_CharacterDataHandler) expat_data_handler
3390 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003393 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 (XML_CommentHandler) expat_comment_handler
3395 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003396 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003398 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 (XML_ProcessingInstructionHandler) expat_pi_handler
3400 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003401 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003402 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003403 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3404 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003406 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003407 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409
Eli Bendersky52467b12012-06-01 07:13:08 +03003410 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411}
3412
Eli Bendersky52467b12012-06-01 07:13:08 +03003413static int
3414xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3415{
3416 Py_VISIT(self->handle_close);
3417 Py_VISIT(self->handle_pi);
3418 Py_VISIT(self->handle_comment);
3419 Py_VISIT(self->handle_end);
3420 Py_VISIT(self->handle_data);
3421 Py_VISIT(self->handle_start);
3422
3423 Py_VISIT(self->target);
3424 Py_VISIT(self->entity);
3425 Py_VISIT(self->names);
3426
3427 return 0;
3428}
3429
3430static int
3431xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432{
Victor Stinnere727d412017-09-18 05:29:37 -07003433 if (self->parser != NULL) {
3434 XML_Parser parser = self->parser;
3435 self->parser = NULL;
3436 EXPAT(ParserFree)(parser);
3437 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003438
Antoine Pitrouc1948842012-10-01 23:40:37 +02003439 Py_CLEAR(self->handle_close);
3440 Py_CLEAR(self->handle_pi);
3441 Py_CLEAR(self->handle_comment);
3442 Py_CLEAR(self->handle_end);
3443 Py_CLEAR(self->handle_data);
3444 Py_CLEAR(self->handle_start);
3445 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446
Antoine Pitrouc1948842012-10-01 23:40:37 +02003447 Py_CLEAR(self->target);
3448 Py_CLEAR(self->entity);
3449 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003450
Eli Bendersky52467b12012-06-01 07:13:08 +03003451 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452}
3453
Eli Bendersky52467b12012-06-01 07:13:08 +03003454static void
3455xmlparser_dealloc(XMLParserObject* self)
3456{
3457 PyObject_GC_UnTrack(self);
3458 xmlparser_gc_clear(self);
3459 Py_TYPE(self)->tp_free((PyObject *)self);
3460}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461
3462LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003463expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464{
3465 int ok;
3466
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003467 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3469
3470 if (PyErr_Occurred())
3471 return NULL;
3472
3473 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003474 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003475 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003477 EXPAT(GetErrorColumnNumber)(self->parser),
3478 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 );
3480 return NULL;
3481 }
3482
3483 Py_RETURN_NONE;
3484}
3485
Serhiy Storchakacb985562015-05-04 15:32:48 +03003486/*[clinic input]
3487_elementtree.XMLParser.close
3488
3489[clinic start generated code]*/
3490
3491static PyObject *
3492_elementtree_XMLParser_close_impl(XMLParserObject *self)
3493/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494{
3495 /* end feeding data to parser */
3496
3497 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003499 if (!res)
3500 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003502 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503 Py_DECREF(res);
3504 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003505 }
3506 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003507 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003508 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003509 }
3510 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003511 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003512 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003513}
3514
Serhiy Storchakacb985562015-05-04 15:32:48 +03003515/*[clinic input]
3516_elementtree.XMLParser.feed
3517
3518 data: object
3519 /
3520
3521[clinic start generated code]*/
3522
3523static PyObject *
3524_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3525/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526{
3527 /* feed data to parser */
3528
Serhiy Storchakacb985562015-05-04 15:32:48 +03003529 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003530 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003531 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3532 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003533 return NULL;
3534 if (data_len > INT_MAX) {
3535 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3536 return NULL;
3537 }
3538 /* Explicitly set UTF-8 encoding. Return code ignored. */
3539 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003540 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003541 }
3542 else {
3543 Py_buffer view;
3544 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003546 return NULL;
3547 if (view.len > INT_MAX) {
3548 PyBuffer_Release(&view);
3549 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3550 return NULL;
3551 }
3552 res = expat_parse(self, view.buf, (int)view.len, 0);
3553 PyBuffer_Release(&view);
3554 return res;
3555 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003556}
3557
Serhiy Storchakacb985562015-05-04 15:32:48 +03003558/*[clinic input]
3559_elementtree.XMLParser._parse_whole
3560
3561 file: object
3562 /
3563
3564[clinic start generated code]*/
3565
3566static PyObject *
3567_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3568/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003569{
Eli Benderskya3699232013-05-19 18:47:23 -07003570 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003571 PyObject* reader;
3572 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003573 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003574 PyObject* res;
3575
Serhiy Storchakacb985562015-05-04 15:32:48 +03003576 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577 if (!reader)
3578 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003579
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580 /* read from open file object */
3581 for (;;) {
3582
3583 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3584
3585 if (!buffer) {
3586 /* read failed (e.g. due to KeyboardInterrupt) */
3587 Py_DECREF(reader);
3588 return NULL;
3589 }
3590
Eli Benderskyf996e772012-03-16 05:53:30 +02003591 if (PyUnicode_CheckExact(buffer)) {
3592 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003593 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003594 Py_DECREF(buffer);
3595 break;
3596 }
3597 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003598 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003599 if (!temp) {
3600 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003601 Py_DECREF(reader);
3602 return NULL;
3603 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003604 buffer = temp;
3605 }
3606 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607 Py_DECREF(buffer);
3608 break;
3609 }
3610
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003611 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3612 Py_DECREF(buffer);
3613 Py_DECREF(reader);
3614 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3615 return NULL;
3616 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003617 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003618 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619 );
3620
3621 Py_DECREF(buffer);
3622
3623 if (!res) {
3624 Py_DECREF(reader);
3625 return NULL;
3626 }
3627 Py_DECREF(res);
3628
3629 }
3630
3631 Py_DECREF(reader);
3632
3633 res = expat_parse(self, "", 0, 1);
3634
3635 if (res && TreeBuilder_CheckExact(self->target)) {
3636 Py_DECREF(res);
3637 return treebuilder_done((TreeBuilderObject*) self->target);
3638 }
3639
3640 return res;
3641}
3642
Serhiy Storchakacb985562015-05-04 15:32:48 +03003643/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003644_elementtree.XMLParser._setevents
3645
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003646 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003647 events_to_report: object = None
3648 /
3649
3650[clinic start generated code]*/
3651
3652static PyObject *
3653_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3654 PyObject *events_queue,
3655 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003656/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657{
3658 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003659 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003660 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003661 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662
3663 if (!TreeBuilder_CheckExact(self->target)) {
3664 PyErr_SetString(
3665 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003666 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667 "targets"
3668 );
3669 return NULL;
3670 }
3671
3672 target = (TreeBuilderObject*) self->target;
3673
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003674 events_append = PyObject_GetAttrString(events_queue, "append");
3675 if (events_append == NULL)
3676 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003677 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678
3679 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003680 Py_CLEAR(target->start_event_obj);
3681 Py_CLEAR(target->end_event_obj);
3682 Py_CLEAR(target->start_ns_event_obj);
3683 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003685 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003687 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688 Py_RETURN_NONE;
3689 }
3690
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003691 if (!(events_seq = PySequence_Fast(events_to_report,
3692 "events must be a sequence"))) {
3693 return NULL;
3694 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003696 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003697 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003698 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003699 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003700 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003701 } else if (PyBytes_Check(event_name_obj)) {
3702 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003703 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003704 if (event_name == NULL) {
3705 Py_DECREF(events_seq);
3706 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3707 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003708 }
3709
3710 Py_INCREF(event_name_obj);
3711 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003712 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003713 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003714 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003715 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003716 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003717 EXPAT(SetNamespaceDeclHandler)(
3718 self->parser,
3719 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3720 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3721 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003722 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003723 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724 EXPAT(SetNamespaceDeclHandler)(
3725 self->parser,
3726 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3727 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3728 );
3729 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003730 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003731 Py_DECREF(events_seq);
3732 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733 return NULL;
3734 }
3735 }
3736
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003737 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003739}
3740
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003741static PyMemberDef xmlparser_members[] = {
3742 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3743 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3744 {NULL}
3745};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003746
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003747static PyObject*
3748xmlparser_version_getter(XMLParserObject *self, void *closure)
3749{
3750 return PyUnicode_FromFormat(
3751 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3752 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003753}
3754
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003755static PyGetSetDef xmlparser_getsetlist[] = {
3756 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3757 {NULL},
3758};
3759
Serhiy Storchakacb985562015-05-04 15:32:48 +03003760#include "clinic/_elementtree.c.h"
3761
3762static PyMethodDef element_methods[] = {
3763
3764 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3765
3766 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3767 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3768
3769 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3770 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3771 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3772
3773 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3774 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3775 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3776 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3777
3778 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3779 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3780 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3781
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003782 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003783 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3784
3785 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3786 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3787
3788 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3789
3790 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3791 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3792 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3793 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3794 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3795
3796 {NULL, NULL}
3797};
3798
3799static PyMappingMethods element_as_mapping = {
3800 (lenfunc) element_length,
3801 (binaryfunc) element_subscr,
3802 (objobjargproc) element_ass_subscr,
3803};
3804
Serhiy Storchakadde08152015-11-25 15:28:13 +02003805static PyGetSetDef element_getsetlist[] = {
3806 {"tag",
3807 (getter)element_tag_getter,
3808 (setter)element_tag_setter,
3809 "A string identifying what kind of data this element represents"},
3810 {"text",
3811 (getter)element_text_getter,
3812 (setter)element_text_setter,
3813 "A string of text directly after the start tag, or None"},
3814 {"tail",
3815 (getter)element_tail_getter,
3816 (setter)element_tail_setter,
3817 "A string of text directly after the end tag, or None"},
3818 {"attrib",
3819 (getter)element_attrib_getter,
3820 (setter)element_attrib_setter,
3821 "A dictionary containing the element's attributes"},
3822 {NULL},
3823};
3824
Serhiy Storchakacb985562015-05-04 15:32:48 +03003825static PyTypeObject Element_Type = {
3826 PyVarObject_HEAD_INIT(NULL, 0)
3827 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3828 /* methods */
3829 (destructor)element_dealloc, /* tp_dealloc */
3830 0, /* tp_print */
3831 0, /* tp_getattr */
3832 0, /* tp_setattr */
3833 0, /* tp_reserved */
3834 (reprfunc)element_repr, /* tp_repr */
3835 0, /* tp_as_number */
3836 &element_as_sequence, /* tp_as_sequence */
3837 &element_as_mapping, /* tp_as_mapping */
3838 0, /* tp_hash */
3839 0, /* tp_call */
3840 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003841 PyObject_GenericGetAttr, /* tp_getattro */
3842 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003843 0, /* tp_as_buffer */
3844 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3845 /* tp_flags */
3846 0, /* tp_doc */
3847 (traverseproc)element_gc_traverse, /* tp_traverse */
3848 (inquiry)element_gc_clear, /* tp_clear */
3849 0, /* tp_richcompare */
3850 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3851 0, /* tp_iter */
3852 0, /* tp_iternext */
3853 element_methods, /* tp_methods */
3854 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003855 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003856 0, /* tp_base */
3857 0, /* tp_dict */
3858 0, /* tp_descr_get */
3859 0, /* tp_descr_set */
3860 0, /* tp_dictoffset */
3861 (initproc)element_init, /* tp_init */
3862 PyType_GenericAlloc, /* tp_alloc */
3863 element_new, /* tp_new */
3864 0, /* tp_free */
3865};
3866
3867static PyMethodDef treebuilder_methods[] = {
3868 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3869 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3870 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3871 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3872 {NULL, NULL}
3873};
3874
3875static PyTypeObject TreeBuilder_Type = {
3876 PyVarObject_HEAD_INIT(NULL, 0)
3877 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3878 /* methods */
3879 (destructor)treebuilder_dealloc, /* tp_dealloc */
3880 0, /* tp_print */
3881 0, /* tp_getattr */
3882 0, /* tp_setattr */
3883 0, /* tp_reserved */
3884 0, /* tp_repr */
3885 0, /* tp_as_number */
3886 0, /* tp_as_sequence */
3887 0, /* tp_as_mapping */
3888 0, /* tp_hash */
3889 0, /* tp_call */
3890 0, /* tp_str */
3891 0, /* tp_getattro */
3892 0, /* tp_setattro */
3893 0, /* tp_as_buffer */
3894 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3895 /* tp_flags */
3896 0, /* tp_doc */
3897 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3898 (inquiry)treebuilder_gc_clear, /* tp_clear */
3899 0, /* tp_richcompare */
3900 0, /* tp_weaklistoffset */
3901 0, /* tp_iter */
3902 0, /* tp_iternext */
3903 treebuilder_methods, /* tp_methods */
3904 0, /* tp_members */
3905 0, /* tp_getset */
3906 0, /* tp_base */
3907 0, /* tp_dict */
3908 0, /* tp_descr_get */
3909 0, /* tp_descr_set */
3910 0, /* tp_dictoffset */
3911 _elementtree_TreeBuilder___init__, /* tp_init */
3912 PyType_GenericAlloc, /* tp_alloc */
3913 treebuilder_new, /* tp_new */
3914 0, /* tp_free */
3915};
3916
3917static PyMethodDef xmlparser_methods[] = {
3918 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3919 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3920 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3921 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003922 {NULL, NULL}
3923};
3924
Neal Norwitz227b5332006-03-22 09:28:35 +00003925static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003926 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003927 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003929 (destructor)xmlparser_dealloc, /* tp_dealloc */
3930 0, /* tp_print */
3931 0, /* tp_getattr */
3932 0, /* tp_setattr */
3933 0, /* tp_reserved */
3934 0, /* tp_repr */
3935 0, /* tp_as_number */
3936 0, /* tp_as_sequence */
3937 0, /* tp_as_mapping */
3938 0, /* tp_hash */
3939 0, /* tp_call */
3940 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003941 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003942 0, /* tp_setattro */
3943 0, /* tp_as_buffer */
3944 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3945 /* tp_flags */
3946 0, /* tp_doc */
3947 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3948 (inquiry)xmlparser_gc_clear, /* tp_clear */
3949 0, /* tp_richcompare */
3950 0, /* tp_weaklistoffset */
3951 0, /* tp_iter */
3952 0, /* tp_iternext */
3953 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003954 xmlparser_members, /* tp_members */
3955 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003956 0, /* tp_base */
3957 0, /* tp_dict */
3958 0, /* tp_descr_get */
3959 0, /* tp_descr_set */
3960 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003961 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003962 PyType_GenericAlloc, /* tp_alloc */
3963 xmlparser_new, /* tp_new */
3964 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003965};
3966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003967/* ==================================================================== */
3968/* python module interface */
3969
3970static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003971 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972 {NULL, NULL}
3973};
3974
Martin v. Löwis1a214512008-06-11 05:26:20 +00003975
Eli Bendersky532d03e2013-08-10 08:00:39 -07003976static struct PyModuleDef elementtreemodule = {
3977 PyModuleDef_HEAD_INIT,
3978 "_elementtree",
3979 NULL,
3980 sizeof(elementtreestate),
3981 _functions,
3982 NULL,
3983 elementtree_traverse,
3984 elementtree_clear,
3985 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003986};
3987
Neal Norwitzf6657e62006-12-28 04:47:50 +00003988PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003989PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003990{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003991 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003992 elementtreestate *st;
3993
3994 m = PyState_FindModule(&elementtreemodule);
3995 if (m) {
3996 Py_INCREF(m);
3997 return m;
3998 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003999
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004000 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004001 if (PyType_Ready(&ElementIter_Type) < 0)
4002 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004003 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004004 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004005 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004006 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004007 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004008 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004009
Eli Bendersky532d03e2013-08-10 08:00:39 -07004010 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004011 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004012 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004013 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004014
Eli Bendersky828efde2012-04-05 05:40:58 +03004015 if (!(temp = PyImport_ImportModule("copy")))
4016 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004017 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004018 Py_XDECREF(temp);
4019
Victor Stinnerb136f112017-07-10 22:28:02 +02004020 if (st->deepcopy_obj == NULL) {
4021 return NULL;
4022 }
4023
4024 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004025 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004026 return NULL;
4027
Eli Bendersky20d41742012-06-01 09:48:37 +03004028 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004029 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4030 if (expat_capi) {
4031 /* check that it's usable */
4032 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004033 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004034 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4035 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004036 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004037 PyErr_SetString(PyExc_ImportError,
4038 "pyexpat version is incompatible");
4039 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004040 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004041 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004042 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004043 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004044
Eli Bendersky532d03e2013-08-10 08:00:39 -07004045 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004046 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004047 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004048 Py_INCREF(st->parseerror_obj);
4049 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004050
Eli Bendersky092af1f2012-03-04 07:14:03 +02004051 Py_INCREF((PyObject *)&Element_Type);
4052 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4053
Eli Bendersky58d548d2012-05-29 15:45:16 +03004054 Py_INCREF((PyObject *)&TreeBuilder_Type);
4055 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4056
Eli Bendersky52467b12012-06-01 07:13:08 +03004057 Py_INCREF((PyObject *)&XMLParser_Type);
4058 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004059
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004060 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004061}