blob: 1500a6de270dcdc7405498864e70bab46eb8d32b [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000207
208/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200209/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
211LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200212create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213{
214 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200215 if (!self->extra) {
216 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200218 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219
220 if (!attrib)
221 attrib = Py_None;
222
223 Py_INCREF(attrib);
224 self->extra->attrib = attrib;
225
226 self->extra->length = 0;
227 self->extra->allocated = STATIC_CHILDREN;
228 self->extra->children = self->extra->_children;
229
230 return 0;
231}
232
233LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200234dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235{
Eli Bendersky08b85292012-04-04 15:55:07 +0300236 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200237 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300238
Eli Benderskyebf37a22012-04-03 22:02:37 +0300239 if (!self->extra)
240 return;
241
242 /* Avoid DECREFs calling into this code again (cycles, etc.)
243 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300244 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 self->extra = NULL;
246
247 Py_DECREF(myextra->attrib);
248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 for (i = 0; i < myextra->length; i++)
250 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 if (myextra->children != myextra->_children)
253 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254
Eli Benderskyebf37a22012-04-03 22:02:37 +0300255 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256}
257
Eli Bendersky092af1f2012-03-04 07:14:03 +0200258/* Convenience internal function to create new Element objects with the given
259 * tag and attributes.
260*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000261LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 ElementObject* self;
265
Eli Bendersky0192ba32012-03-30 16:38:33 +0300266 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000267 if (self == NULL)
268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000269 self->extra = NULL;
270
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 Py_INCREF(tag);
272 self->tag = tag;
273
274 Py_INCREF(Py_None);
275 self->text = Py_None;
276
277 Py_INCREF(Py_None);
278 self->tail = Py_None;
279
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 self->weakreflist = NULL;
281
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 ALLOC(sizeof(ElementObject), "create element");
283 PyObject_GC_Track(self);
284
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200285 if (attrib != Py_None && !is_empty_dict(attrib)) {
286 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200287 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200288 return NULL;
289 }
290 }
291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292 return (PyObject*) self;
293}
294
Eli Bendersky092af1f2012-03-04 07:14:03 +0200295static PyObject *
296element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
297{
298 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
299 if (e != NULL) {
300 Py_INCREF(Py_None);
301 e->tag = Py_None;
302
303 Py_INCREF(Py_None);
304 e->text = Py_None;
305
306 Py_INCREF(Py_None);
307 e->tail = Py_None;
308
309 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300310 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200311 }
312 return (PyObject *)e;
313}
314
Eli Bendersky737b1732012-05-29 06:02:56 +0300315/* Helper function for extracting the attrib dictionary from a keywords dict.
316 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800317 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300318 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700319 *
320 * Return a dictionary with the content of kwds merged into the content of
321 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300322 */
323static PyObject*
324get_attrib_from_keywords(PyObject *kwds)
325{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700326 PyObject *attrib_str = PyUnicode_FromString("attrib");
327 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328
329 if (attrib) {
330 /* If attrib was found in kwds, copy its value and remove it from
331 * kwds
332 */
333 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700334 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
336 Py_TYPE(attrib)->tp_name);
337 return NULL;
338 }
339 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700340 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 } else {
342 attrib = PyDict_New();
343 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344
345 Py_DECREF(attrib_str);
346
347 /* attrib can be NULL if PyDict_New failed */
348 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200349 if (PyDict_Update(attrib, kwds) < 0)
350 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300351 return attrib;
352}
353
Serhiy Storchakacb985562015-05-04 15:32:48 +0300354/*[clinic input]
355module _elementtree
356class _elementtree.Element "ElementObject *" "&Element_Type"
357class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
358class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
359[clinic start generated code]*/
360/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
361
Eli Bendersky092af1f2012-03-04 07:14:03 +0200362static int
363element_init(PyObject *self, PyObject *args, PyObject *kwds)
364{
365 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200366 PyObject *attrib = NULL;
367 ElementObject *self_elem;
368
369 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
370 return -1;
371
Eli Bendersky737b1732012-05-29 06:02:56 +0300372 if (attrib) {
373 /* attrib passed as positional arg */
374 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200375 if (!attrib)
376 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300377 if (kwds) {
378 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300380 return -1;
381 }
382 }
383 } else if (kwds) {
384 /* have keywords args */
385 attrib = get_attrib_from_keywords(kwds);
386 if (!attrib)
387 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200388 }
389
390 self_elem = (ElementObject *)self;
391
Antoine Pitrouc1948842012-10-01 23:40:37 +0200392 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 return -1;
396 }
397 }
398
Eli Bendersky48d358b2012-05-30 17:57:50 +0300399 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300404 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300407 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408
Eli Bendersky092af1f2012-03-04 07:14:03 +0200409 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300410 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
Serhiy Storchaka576def02017-03-30 09:47:31 +0300511 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300516 PyObject *tmp = list_join(res);
517 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000518 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300519 self->text = tmp;
520 Py_DECREF(res);
521 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000522 }
523 }
524
525 return res;
526}
527
528LOCAL(PyObject*)
529element_get_tail(ElementObject* self)
530{
531 /* return borrowed reference to text attribute */
532
Serhiy Storchaka576def02017-03-30 09:47:31 +0300533 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534
535 if (JOIN_GET(res)) {
536 res = JOIN_OBJ(res);
537 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300538 PyObject *tmp = list_join(res);
539 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000540 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300541 self->tail = tmp;
542 Py_DECREF(res);
543 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544 }
545 }
546
547 return res;
548}
549
550static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300551subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000552{
553 PyObject* elem;
554
555 ElementObject* parent;
556 PyObject* tag;
557 PyObject* attrib = NULL;
558 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
559 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800562 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
Eli Bendersky737b1732012-05-29 06:02:56 +0300564 if (attrib) {
565 /* attrib passed as positional arg */
566 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000567 if (!attrib)
568 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300569 if (kwds) {
570 if (PyDict_Update(attrib, kwds) < 0) {
571 return NULL;
572 }
573 }
574 } else if (kwds) {
575 /* have keyword args */
576 attrib = get_attrib_from_keywords(kwds);
577 if (!attrib)
578 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300580 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581 Py_INCREF(Py_None);
582 attrib = Py_None;
583 }
584
Eli Bendersky092af1f2012-03-04 07:14:03 +0200585 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200587 if (elem == NULL)
588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000590 if (element_add_subelement(parent, elem) < 0) {
591 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000593 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594
595 return elem;
596}
597
Eli Bendersky0192ba32012-03-30 16:38:33 +0300598static int
599element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
600{
601 Py_VISIT(self->tag);
602 Py_VISIT(JOIN_OBJ(self->text));
603 Py_VISIT(JOIN_OBJ(self->tail));
604
605 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200606 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300607 Py_VISIT(self->extra->attrib);
608
609 for (i = 0; i < self->extra->length; ++i)
610 Py_VISIT(self->extra->children[i]);
611 }
612 return 0;
613}
614
615static int
616element_gc_clear(ElementObject *self)
617{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300618 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700619 _clear_joined_ptr(&self->text);
620 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621
622 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300625 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300626 return 0;
627}
628
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000629static void
630element_dealloc(ElementObject* self)
631{
INADA Naokia6296d32017-08-24 14:55:17 +0900632 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200634 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300635
636 if (self->weakreflist != NULL)
637 PyObject_ClearWeakRefs((PyObject *) self);
638
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 /* element_gc_clear clears all references and deallocates extra
640 */
641 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200644 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200645 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646}
647
648/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000649
Serhiy Storchakacb985562015-05-04 15:32:48 +0300650/*[clinic input]
651_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000652
Serhiy Storchakacb985562015-05-04 15:32:48 +0300653 subelement: object(subclass_of='&Element_Type')
654 /
655
656[clinic start generated code]*/
657
658static PyObject *
659_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
660/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
661{
662 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663 return NULL;
664
665 Py_RETURN_NONE;
666}
667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668/*[clinic input]
669_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
Serhiy Storchakacb985562015-05-04 15:32:48 +0300671[clinic start generated code]*/
672
673static PyObject *
674_elementtree_Element_clear_impl(ElementObject *self)
675/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
676{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300677 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
679 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300680 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
682 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300683 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
685 Py_RETURN_NONE;
686}
687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.__copy__
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element___copy___impl(ElementObject *self)
695/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200697 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698 ElementObject* element;
699
Eli Bendersky092af1f2012-03-04 07:14:03 +0200700 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800701 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 if (!element)
703 return NULL;
704
Oren Milman39ecb9c2017-10-10 23:26:24 +0300705 Py_INCREF(JOIN_OBJ(self->text));
706 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707
Oren Milman39ecb9c2017-10-10 23:26:24 +0300708 Py_INCREF(JOIN_OBJ(self->tail));
709 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 if (element_resize(element, self->extra->length) < 0) {
713 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000715 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 for (i = 0; i < self->extra->length; i++) {
718 Py_INCREF(self->extra->children[i]);
719 element->extra->children[i] = self->extra->children[i];
720 }
721
722 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 }
724
725 return (PyObject*) element;
726}
727
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200728/* Helper for a deep copy. */
729LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
730
Serhiy Storchakacb985562015-05-04 15:32:48 +0300731/*[clinic input]
732_elementtree.Element.__deepcopy__
733
Oren Milmand0568182017-09-12 17:39:15 +0300734 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300735 /
736
737[clinic start generated code]*/
738
739static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300740_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
741/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200743 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744 ElementObject* element;
745 PyObject* tag;
746 PyObject* attrib;
747 PyObject* text;
748 PyObject* tail;
749 PyObject* id;
750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000751 tag = deepcopy(self->tag, memo);
752 if (!tag)
753 return NULL;
754
755 if (self->extra) {
756 attrib = deepcopy(self->extra->attrib, memo);
757 if (!attrib) {
758 Py_DECREF(tag);
759 return NULL;
760 }
761 } else {
762 Py_INCREF(Py_None);
763 attrib = Py_None;
764 }
765
Eli Bendersky092af1f2012-03-04 07:14:03 +0200766 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000767
768 Py_DECREF(tag);
769 Py_DECREF(attrib);
770
771 if (!element)
772 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100773
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000774 text = deepcopy(JOIN_OBJ(self->text), memo);
775 if (!text)
776 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300777 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300782 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783
784 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785 if (element_resize(element, self->extra->length) < 0)
786 goto error;
787
788 for (i = 0; i < self->extra->length; i++) {
789 PyObject* child = deepcopy(self->extra->children[i], memo);
790 if (!child) {
791 element->extra->length = i;
792 goto error;
793 }
794 element->extra->children[i] = child;
795 }
796
797 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798 }
799
800 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700801 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000802 if (!id)
803 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804
805 i = PyDict_SetItem(memo, id, (PyObject*) element);
806
807 Py_DECREF(id);
808
809 if (i < 0)
810 goto error;
811
812 return (PyObject*) element;
813
814 error:
815 Py_DECREF(element);
816 return NULL;
817}
818
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200819LOCAL(PyObject *)
820deepcopy(PyObject *object, PyObject *memo)
821{
822 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200823 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200824 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200825
826 /* Fast paths */
827 if (object == Py_None || PyUnicode_CheckExact(object)) {
828 Py_INCREF(object);
829 return object;
830 }
831
832 if (Py_REFCNT(object) == 1) {
833 if (PyDict_CheckExact(object)) {
834 PyObject *key, *value;
835 Py_ssize_t pos = 0;
836 int simple = 1;
837 while (PyDict_Next(object, &pos, &key, &value)) {
838 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
839 simple = 0;
840 break;
841 }
842 }
843 if (simple)
844 return PyDict_Copy(object);
845 /* Fall through to general case */
846 }
847 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300848 return _elementtree_Element___deepcopy___impl(
849 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200850 }
851 }
852
853 /* General case */
854 st = ET_STATE_GLOBAL;
855 if (!st->deepcopy_obj) {
856 PyErr_SetString(PyExc_RuntimeError,
857 "deepcopy helper not found");
858 return NULL;
859 }
860
Victor Stinner7fbac452016-08-20 01:34:44 +0200861 stack[0] = object;
862 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200863 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200864}
865
866
Serhiy Storchakacb985562015-05-04 15:32:48 +0300867/*[clinic input]
868_elementtree.Element.__sizeof__ -> Py_ssize_t
869
870[clinic start generated code]*/
871
872static Py_ssize_t
873_elementtree_Element___sizeof___impl(ElementObject *self)
874/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200876 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877 if (self->extra) {
878 result += sizeof(ElementObjectExtra);
879 if (self->extra->children != self->extra->_children)
880 result += sizeof(PyObject*) * self->extra->allocated;
881 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300882 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200883}
884
Eli Bendersky698bdb22013-01-10 06:01:06 -0800885/* dict keys for getstate/setstate. */
886#define PICKLED_TAG "tag"
887#define PICKLED_CHILDREN "_children"
888#define PICKLED_ATTRIB "attrib"
889#define PICKLED_TAIL "tail"
890#define PICKLED_TEXT "text"
891
892/* __getstate__ returns a fabricated instance dict as in the pure-Python
893 * Element implementation, for interoperability/interchangeability. This
894 * makes the pure-Python implementation details an API, but (a) there aren't
895 * any unnecessary structures there; and (b) it buys compatibility with 3.2
896 * pickles. See issue #16076.
897 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300898/*[clinic input]
899_elementtree.Element.__getstate__
900
901[clinic start generated code]*/
902
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300904_elementtree_Element___getstate___impl(ElementObject *self)
905/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200907 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908 PyObject *instancedict = NULL, *children;
909
910 /* Build a list of children. */
911 children = PyList_New(self->extra ? self->extra->length : 0);
912 if (!children)
913 return NULL;
914 for (i = 0; i < PyList_GET_SIZE(children); i++) {
915 PyObject *child = self->extra->children[i];
916 Py_INCREF(child);
917 PyList_SET_ITEM(children, i, child);
918 }
919
920 /* Construct the state object. */
921 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
922 if (noattrib)
923 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
924 PICKLED_TAG, self->tag,
925 PICKLED_CHILDREN, children,
926 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700927 PICKLED_TEXT, JOIN_OBJ(self->text),
928 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800929 else
930 instancedict = Py_BuildValue("{sOsOsOsOsO}",
931 PICKLED_TAG, self->tag,
932 PICKLED_CHILDREN, children,
933 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700934 PICKLED_TEXT, JOIN_OBJ(self->text),
935 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800936 if (instancedict) {
937 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800939 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 else {
941 for (i = 0; i < PyList_GET_SIZE(children); i++)
942 Py_DECREF(PyList_GET_ITEM(children, i));
943 Py_DECREF(children);
944
945 return NULL;
946 }
947}
948
949static PyObject *
950element_setstate_from_attributes(ElementObject *self,
951 PyObject *tag,
952 PyObject *attrib,
953 PyObject *text,
954 PyObject *tail,
955 PyObject *children)
956{
957 Py_ssize_t i, nchildren;
958
959 if (!tag) {
960 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
961 return NULL;
962 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200964 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300965 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966
Oren Milman39ecb9c2017-10-10 23:26:24 +0300967 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
968 Py_INCREF(JOIN_OBJ(text));
969 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Oren Milman39ecb9c2017-10-10 23:26:24 +0300971 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
972 Py_INCREF(JOIN_OBJ(tail));
973 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
975 /* Handle ATTRIB and CHILDREN. */
976 if (!children && !attrib)
977 Py_RETURN_NONE;
978
979 /* Compute 'nchildren'. */
980 if (children) {
981 if (!PyList_Check(children)) {
982 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
983 return NULL;
984 }
985 nchildren = PyList_Size(children);
986 }
987 else {
988 nchildren = 0;
989 }
990
991 /* Allocate 'extra'. */
992 if (element_resize(self, nchildren)) {
993 return NULL;
994 }
995 assert(self->extra && self->extra->allocated >= nchildren);
996
997 /* Copy children */
998 for (i = 0; i < nchildren; i++) {
999 self->extra->children[i] = PyList_GET_ITEM(children, i);
1000 Py_INCREF(self->extra->children[i]);
1001 }
1002
1003 self->extra->length = nchildren;
1004 self->extra->allocated = nchildren;
1005
1006 /* Stash attrib. */
1007 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001009 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 }
1011
1012 Py_RETURN_NONE;
1013}
1014
1015/* __setstate__ for Element instance from the Python implementation.
1016 * 'state' should be the instance dict.
1017 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001018
Eli Bendersky698bdb22013-01-10 06:01:06 -08001019static PyObject *
1020element_setstate_from_Python(ElementObject *self, PyObject *state)
1021{
1022 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1023 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1024 PyObject *args;
1025 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001026 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028 tag = attrib = text = tail = children = NULL;
1029 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032
1033 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1034 &attrib, &text, &tail, &children))
1035 retval = element_setstate_from_attributes(self, tag, attrib, text,
1036 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001037 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001038 retval = NULL;
1039
1040 Py_DECREF(args);
1041 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042}
1043
Serhiy Storchakacb985562015-05-04 15:32:48 +03001044/*[clinic input]
1045_elementtree.Element.__setstate__
1046
1047 state: object
1048 /
1049
1050[clinic start generated code]*/
1051
Eli Bendersky698bdb22013-01-10 06:01:06 -08001052static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001053_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1054/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001055{
1056 if (!PyDict_CheckExact(state)) {
1057 PyErr_Format(PyExc_TypeError,
1058 "Don't know how to unpickle \"%.200R\" as an Element",
1059 state);
1060 return NULL;
1061 }
1062 else
1063 return element_setstate_from_Python(self, state);
1064}
1065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066LOCAL(int)
1067checkpath(PyObject* tag)
1068{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001069 Py_ssize_t i;
1070 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071
1072 /* check if a tag contains an xpath character */
1073
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001074#define PATHCHAR(ch) \
1075 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1079 void *data = PyUnicode_DATA(tag);
1080 unsigned int kind = PyUnicode_KIND(tag);
1081 for (i = 0; i < len; i++) {
1082 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1083 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return 1;
1089 }
1090 return 0;
1091 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001092 if (PyBytes_Check(tag)) {
1093 char *p = PyBytes_AS_STRING(tag);
1094 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001095 if (p[i] == '{')
1096 check = 0;
1097 else if (p[i] == '}')
1098 check = 1;
1099 else if (check && PATHCHAR(p[i]))
1100 return 1;
1101 }
1102 return 0;
1103 }
1104
1105 return 1; /* unknown type; might be path expression */
1106}
1107
Serhiy Storchakacb985562015-05-04 15:32:48 +03001108/*[clinic input]
1109_elementtree.Element.extend
1110
1111 elements: object
1112 /
1113
1114[clinic start generated code]*/
1115
1116static PyObject *
1117_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1118/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119{
1120 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001121 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122
Serhiy Storchakacb985562015-05-04 15:32:48 +03001123 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 if (!seq) {
1125 PyErr_Format(
1126 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 );
1129 return NULL;
1130 }
1131
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001133 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 Py_INCREF(element);
1135 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001136 PyErr_Format(
1137 PyExc_TypeError,
1138 "expected an Element, not \"%.200s\"",
1139 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001140 Py_DECREF(seq);
1141 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001142 return NULL;
1143 }
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145 if (element_add_subelement(self, element) < 0) {
1146 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001147 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 return NULL;
1149 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001150 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 }
1152
1153 Py_DECREF(seq);
1154
1155 Py_RETURN_NONE;
1156}
1157
Serhiy Storchakacb985562015-05-04 15:32:48 +03001158/*[clinic input]
1159_elementtree.Element.find
1160
1161 path: object
1162 namespaces: object = None
1163
1164[clinic start generated code]*/
1165
1166static PyObject *
1167_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1168 PyObject *namespaces)
1169/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001171 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001172 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001173
Serhiy Storchakacb985562015-05-04 15:32:48 +03001174 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001176 return _PyObject_CallMethodIdObjArgs(
1177 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001179 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180
1181 if (!self->extra)
1182 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184 for (i = 0; i < self->extra->length; i++) {
1185 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001186 int rc;
1187 if (!Element_CheckExact(item))
1188 continue;
1189 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001190 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 Py_DECREF(item);
1194 if (rc < 0)
1195 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 }
1197
1198 Py_RETURN_NONE;
1199}
1200
Serhiy Storchakacb985562015-05-04 15:32:48 +03001201/*[clinic input]
1202_elementtree.Element.findtext
1203
1204 path: object
1205 default: object = None
1206 namespaces: object = None
1207
1208[clinic start generated code]*/
1209
1210static PyObject *
1211_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1212 PyObject *default_value,
1213 PyObject *namespaces)
1214/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001216 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001217 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001218 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219
Serhiy Storchakacb985562015-05-04 15:32:48 +03001220 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001221 return _PyObject_CallMethodIdObjArgs(
1222 st->elementpath_obj, &PyId_findtext,
1223 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 );
1225
1226 if (!self->extra) {
1227 Py_INCREF(default_value);
1228 return default_value;
1229 }
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001233 int rc;
1234 if (!Element_CheckExact(item))
1235 continue;
1236 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001237 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001238 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (text == Py_None) {
1241 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001242 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001244 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 return text;
1247 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 Py_DECREF(item);
1249 if (rc < 0)
1250 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251 }
1252
1253 Py_INCREF(default_value);
1254 return default_value;
1255}
1256
Serhiy Storchakacb985562015-05-04 15:32:48 +03001257/*[clinic input]
1258_elementtree.Element.findall
1259
1260 path: object
1261 namespaces: object = None
1262
1263[clinic start generated code]*/
1264
1265static PyObject *
1266_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1267 PyObject *namespaces)
1268/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001270 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001272 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001273 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001274
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001276 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001277 return _PyObject_CallMethodIdObjArgs(
1278 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281
1282 out = PyList_New(0);
1283 if (!out)
1284 return NULL;
1285
1286 if (!self->extra)
1287 return out;
1288
1289 for (i = 0; i < self->extra->length; i++) {
1290 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 int rc;
1292 if (!Element_CheckExact(item))
1293 continue;
1294 Py_INCREF(item);
1295 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1296 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1297 Py_DECREF(item);
1298 Py_DECREF(out);
1299 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001301 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 }
1303
1304 return out;
1305}
1306
Serhiy Storchakacb985562015-05-04 15:32:48 +03001307/*[clinic input]
1308_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310 path: object
1311 namespaces: object = None
1312
1313[clinic start generated code]*/
1314
1315static PyObject *
1316_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1317 PyObject *namespaces)
1318/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1319{
1320 PyObject* tag = path;
1321 _Py_IDENTIFIER(iterfind);
1322 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
Victor Stinnerf5616342016-12-09 15:26:00 +01001324 return _PyObject_CallMethodIdObjArgs(
1325 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326}
1327
Serhiy Storchakacb985562015-05-04 15:32:48 +03001328/*[clinic input]
1329_elementtree.Element.get
1330
1331 key: object
1332 default: object = None
1333
1334[clinic start generated code]*/
1335
1336static PyObject *
1337_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1338 PyObject *default_value)
1339/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340{
1341 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342
1343 if (!self->extra || self->extra->attrib == Py_None)
1344 value = default_value;
1345 else {
1346 value = PyDict_GetItem(self->extra->attrib, key);
1347 if (!value)
1348 value = default_value;
1349 }
1350
1351 Py_INCREF(value);
1352 return value;
1353}
1354
Serhiy Storchakacb985562015-05-04 15:32:48 +03001355/*[clinic input]
1356_elementtree.Element.getchildren
1357
1358[clinic start generated code]*/
1359
1360static PyObject *
1361_elementtree_Element_getchildren_impl(ElementObject *self)
1362/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001364 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365 PyObject* list;
1366
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001367 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1368 "This method will be removed in future versions. "
1369 "Use 'list(elem)' or iteration over elem instead.",
1370 1) < 0) {
1371 return NULL;
1372 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001373
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001374 if (!self->extra)
1375 return PyList_New(0);
1376
1377 list = PyList_New(self->extra->length);
1378 if (!list)
1379 return NULL;
1380
1381 for (i = 0; i < self->extra->length; i++) {
1382 PyObject* item = self->extra->children[i];
1383 Py_INCREF(item);
1384 PyList_SET_ITEM(list, i, item);
1385 }
1386
1387 return list;
1388}
1389
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001390
Eli Bendersky64d11e62012-06-15 07:42:50 +03001391static PyObject *
1392create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1393
1394
Serhiy Storchakacb985562015-05-04 15:32:48 +03001395/*[clinic input]
1396_elementtree.Element.iter
1397
1398 tag: object = None
1399
1400[clinic start generated code]*/
1401
Eli Bendersky64d11e62012-06-15 07:42:50 +03001402static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001403_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1404/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001405{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001406 if (PyUnicode_Check(tag)) {
1407 if (PyUnicode_READY(tag) < 0)
1408 return NULL;
1409 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1410 tag = Py_None;
1411 }
1412 else if (PyBytes_Check(tag)) {
1413 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1414 tag = Py_None;
1415 }
1416
Eli Bendersky64d11e62012-06-15 07:42:50 +03001417 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001418}
1419
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001422_elementtree.Element.getiterator
1423
1424 tag: object = None
1425
1426[clinic start generated code]*/
1427
1428static PyObject *
1429_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1430/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1431{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001432 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001433 "This method will be removed in future versions. "
1434 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1435 1) < 0) {
1436 return NULL;
1437 }
1438 return _elementtree_Element_iter_impl(self, tag);
1439}
1440
1441
1442/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001443_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001444
Serhiy Storchakacb985562015-05-04 15:32:48 +03001445[clinic start generated code]*/
1446
1447static PyObject *
1448_elementtree_Element_itertext_impl(ElementObject *self)
1449/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1450{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001451 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001452}
1453
Eli Bendersky64d11e62012-06-15 07:42:50 +03001454
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001456element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001458 ElementObject* self = (ElementObject*) self_;
1459
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460 if (!self->extra || index < 0 || index >= self->extra->length) {
1461 PyErr_SetString(
1462 PyExc_IndexError,
1463 "child index out of range"
1464 );
1465 return NULL;
1466 }
1467
1468 Py_INCREF(self->extra->children[index]);
1469 return self->extra->children[index];
1470}
1471
Serhiy Storchakacb985562015-05-04 15:32:48 +03001472/*[clinic input]
1473_elementtree.Element.insert
1474
1475 index: Py_ssize_t
1476 subelement: object(subclass_of='&Element_Type')
1477 /
1478
1479[clinic start generated code]*/
1480
1481static PyObject *
1482_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1483 PyObject *subelement)
1484/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001485{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001486 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487
Victor Stinner5f0af232013-07-11 23:01:36 +02001488 if (!self->extra) {
1489 if (create_extra(self, NULL) < 0)
1490 return NULL;
1491 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001493 if (index < 0) {
1494 index += self->extra->length;
1495 if (index < 0)
1496 index = 0;
1497 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001498 if (index > self->extra->length)
1499 index = self->extra->length;
1500
1501 if (element_resize(self, 1) < 0)
1502 return NULL;
1503
1504 for (i = self->extra->length; i > index; i--)
1505 self->extra->children[i] = self->extra->children[i-1];
1506
Serhiy Storchakacb985562015-05-04 15:32:48 +03001507 Py_INCREF(subelement);
1508 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509
1510 self->extra->length++;
1511
1512 Py_RETURN_NONE;
1513}
1514
Serhiy Storchakacb985562015-05-04 15:32:48 +03001515/*[clinic input]
1516_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001517
Serhiy Storchakacb985562015-05-04 15:32:48 +03001518[clinic start generated code]*/
1519
1520static PyObject *
1521_elementtree_Element_items_impl(ElementObject *self)
1522/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1523{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524 if (!self->extra || self->extra->attrib == Py_None)
1525 return PyList_New(0);
1526
1527 return PyDict_Items(self->extra->attrib);
1528}
1529
Serhiy Storchakacb985562015-05-04 15:32:48 +03001530/*[clinic input]
1531_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532
Serhiy Storchakacb985562015-05-04 15:32:48 +03001533[clinic start generated code]*/
1534
1535static PyObject *
1536_elementtree_Element_keys_impl(ElementObject *self)
1537/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1538{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539 if (!self->extra || self->extra->attrib == Py_None)
1540 return PyList_New(0);
1541
1542 return PyDict_Keys(self->extra->attrib);
1543}
1544
Martin v. Löwis18e16552006-02-15 17:27:45 +00001545static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546element_length(ElementObject* self)
1547{
1548 if (!self->extra)
1549 return 0;
1550
1551 return self->extra->length;
1552}
1553
Serhiy Storchakacb985562015-05-04 15:32:48 +03001554/*[clinic input]
1555_elementtree.Element.makeelement
1556
1557 tag: object
1558 attrib: object
1559 /
1560
1561[clinic start generated code]*/
1562
1563static PyObject *
1564_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1565 PyObject *attrib)
1566/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567{
1568 PyObject* elem;
1569
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570 attrib = PyDict_Copy(attrib);
1571 if (!attrib)
1572 return NULL;
1573
Eli Bendersky092af1f2012-03-04 07:14:03 +02001574 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
1576 Py_DECREF(attrib);
1577
1578 return elem;
1579}
1580
Serhiy Storchakacb985562015-05-04 15:32:48 +03001581/*[clinic input]
1582_elementtree.Element.remove
1583
1584 subelement: object(subclass_of='&Element_Type')
1585 /
1586
1587[clinic start generated code]*/
1588
1589static PyObject *
1590_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1591/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001593 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001594 int rc;
1595 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597 if (!self->extra) {
1598 /* element has no children, so raise exception */
1599 PyErr_SetString(
1600 PyExc_ValueError,
1601 "list.remove(x): x not in list"
1602 );
1603 return NULL;
1604 }
1605
1606 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001607 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001609 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001610 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001612 if (rc < 0)
1613 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614 }
1615
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001616 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001617 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618 PyErr_SetString(
1619 PyExc_ValueError,
1620 "list.remove(x): x not in list"
1621 );
1622 return NULL;
1623 }
1624
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001625 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626
1627 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628 for (; i < self->extra->length; i++)
1629 self->extra->children[i] = self->extra->children[i+1];
1630
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001631 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 Py_RETURN_NONE;
1633}
1634
1635static PyObject*
1636element_repr(ElementObject* self)
1637{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001638 int status;
1639
1640 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001641 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001642
1643 status = Py_ReprEnter((PyObject *)self);
1644 if (status == 0) {
1645 PyObject *res;
1646 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1647 Py_ReprLeave((PyObject *)self);
1648 return res;
1649 }
1650 if (status > 0)
1651 PyErr_Format(PyExc_RuntimeError,
1652 "reentrant call inside %s.__repr__",
1653 Py_TYPE(self)->tp_name);
1654 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655}
1656
Serhiy Storchakacb985562015-05-04 15:32:48 +03001657/*[clinic input]
1658_elementtree.Element.set
1659
1660 key: object
1661 value: object
1662 /
1663
1664[clinic start generated code]*/
1665
1666static PyObject *
1667_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1668 PyObject *value)
1669/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001670{
1671 PyObject* attrib;
1672
Victor Stinner5f0af232013-07-11 23:01:36 +02001673 if (!self->extra) {
1674 if (create_extra(self, NULL) < 0)
1675 return NULL;
1676 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001677
1678 attrib = element_get_attrib(self);
1679 if (!attrib)
1680 return NULL;
1681
1682 if (PyDict_SetItem(attrib, key, value) < 0)
1683 return NULL;
1684
1685 Py_RETURN_NONE;
1686}
1687
1688static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001689element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001691 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001692 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 PyObject* old;
1694
1695 if (!self->extra || index < 0 || index >= self->extra->length) {
1696 PyErr_SetString(
1697 PyExc_IndexError,
1698 "child assignment index out of range");
1699 return -1;
1700 }
1701
1702 old = self->extra->children[index];
1703
1704 if (item) {
1705 Py_INCREF(item);
1706 self->extra->children[index] = item;
1707 } else {
1708 self->extra->length--;
1709 for (i = index; i < self->extra->length; i++)
1710 self->extra->children[i] = self->extra->children[i+1];
1711 }
1712
1713 Py_DECREF(old);
1714
1715 return 0;
1716}
1717
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001718static PyObject*
1719element_subscr(PyObject* self_, PyObject* item)
1720{
1721 ElementObject* self = (ElementObject*) self_;
1722
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001723 if (PyIndex_Check(item)) {
1724 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001725
1726 if (i == -1 && PyErr_Occurred()) {
1727 return NULL;
1728 }
1729 if (i < 0 && self->extra)
1730 i += self->extra->length;
1731 return element_getitem(self_, i);
1732 }
1733 else if (PySlice_Check(item)) {
1734 Py_ssize_t start, stop, step, slicelen, cur, i;
1735 PyObject* list;
1736
1737 if (!self->extra)
1738 return PyList_New(0);
1739
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001740 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741 return NULL;
1742 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001743 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1744 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745
1746 if (slicelen <= 0)
1747 return PyList_New(0);
1748 else {
1749 list = PyList_New(slicelen);
1750 if (!list)
1751 return NULL;
1752
1753 for (cur = start, i = 0; i < slicelen;
1754 cur += step, i++) {
1755 PyObject* item = self->extra->children[cur];
1756 Py_INCREF(item);
1757 PyList_SET_ITEM(list, i, item);
1758 }
1759
1760 return list;
1761 }
1762 }
1763 else {
1764 PyErr_SetString(PyExc_TypeError,
1765 "element indices must be integers");
1766 return NULL;
1767 }
1768}
1769
1770static int
1771element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1772{
1773 ElementObject* self = (ElementObject*) self_;
1774
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001775 if (PyIndex_Check(item)) {
1776 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777
1778 if (i == -1 && PyErr_Occurred()) {
1779 return -1;
1780 }
1781 if (i < 0 && self->extra)
1782 i += self->extra->length;
1783 return element_setitem(self_, i, value);
1784 }
1785 else if (PySlice_Check(item)) {
1786 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1787
1788 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001789 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001790
Victor Stinner5f0af232013-07-11 23:01:36 +02001791 if (!self->extra) {
1792 if (create_extra(self, NULL) < 0)
1793 return -1;
1794 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001796 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001797 return -1;
1798 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001799 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1800 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801
Eli Bendersky865756a2012-03-09 13:38:15 +02001802 if (value == NULL) {
1803 /* Delete slice */
1804 size_t cur;
1805 Py_ssize_t i;
1806
1807 if (slicelen <= 0)
1808 return 0;
1809
1810 /* Since we're deleting, the direction of the range doesn't matter,
1811 * so for simplicity make it always ascending.
1812 */
1813 if (step < 0) {
1814 stop = start + 1;
1815 start = stop + step * (slicelen - 1) - 1;
1816 step = -step;
1817 }
1818
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001819 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001820
1821 /* recycle is a list that will contain all the children
1822 * scheduled for removal.
1823 */
1824 if (!(recycle = PyList_New(slicelen))) {
1825 PyErr_NoMemory();
1826 return -1;
1827 }
1828
1829 /* This loop walks over all the children that have to be deleted,
1830 * with cur pointing at them. num_moved is the amount of children
1831 * until the next deleted child that have to be "shifted down" to
1832 * occupy the deleted's places.
1833 * Note that in the ith iteration, shifting is done i+i places down
1834 * because i children were already removed.
1835 */
1836 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1837 /* Compute how many children have to be moved, clipping at the
1838 * list end.
1839 */
1840 Py_ssize_t num_moved = step - 1;
1841 if (cur + step >= (size_t)self->extra->length) {
1842 num_moved = self->extra->length - cur - 1;
1843 }
1844
1845 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1846
1847 memmove(
1848 self->extra->children + cur - i,
1849 self->extra->children + cur + 1,
1850 num_moved * sizeof(PyObject *));
1851 }
1852
1853 /* Leftover "tail" after the last removed child */
1854 cur = start + (size_t)slicelen * step;
1855 if (cur < (size_t)self->extra->length) {
1856 memmove(
1857 self->extra->children + cur - slicelen,
1858 self->extra->children + cur,
1859 (self->extra->length - cur) * sizeof(PyObject *));
1860 }
1861
1862 self->extra->length -= slicelen;
1863
1864 /* Discard the recycle list with all the deleted sub-elements */
1865 Py_XDECREF(recycle);
1866 return 0;
1867 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001868
1869 /* A new slice is actually being assigned */
1870 seq = PySequence_Fast(value, "");
1871 if (!seq) {
1872 PyErr_Format(
1873 PyExc_TypeError,
1874 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1875 );
1876 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001877 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001878 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001879
1880 if (step != 1 && newlen != slicelen)
1881 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001882 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 "attempt to assign sequence of size %zd "
1885 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001886 newlen, slicelen
1887 );
1888 return -1;
1889 }
1890
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001891 /* Resize before creating the recycle bin, to prevent refleaks. */
1892 if (newlen > slicelen) {
1893 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001894 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001895 return -1;
1896 }
1897 }
1898
1899 if (slicelen > 0) {
1900 /* to avoid recursive calls to this method (via decref), move
1901 old items to the recycle bin here, and get rid of them when
1902 we're done modifying the element */
1903 recycle = PyList_New(slicelen);
1904 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001905 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001906 return -1;
1907 }
1908 for (cur = start, i = 0; i < slicelen;
1909 cur += step, i++)
1910 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1911 }
1912
1913 if (newlen < slicelen) {
1914 /* delete slice */
1915 for (i = stop; i < self->extra->length; i++)
1916 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1917 } else if (newlen > slicelen) {
1918 /* insert slice */
1919 for (i = self->extra->length-1; i >= stop; i--)
1920 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1921 }
1922
1923 /* replace the slice */
1924 for (cur = start, i = 0; i < newlen;
1925 cur += step, i++) {
1926 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1927 Py_INCREF(element);
1928 self->extra->children[cur] = element;
1929 }
1930
1931 self->extra->length += newlen - slicelen;
1932
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001933 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001934
1935 /* discard the recycle bin, and everything in it */
1936 Py_XDECREF(recycle);
1937
1938 return 0;
1939 }
1940 else {
1941 PyErr_SetString(PyExc_TypeError,
1942 "element indices must be integers");
1943 return -1;
1944 }
1945}
1946
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001948element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001949{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001950 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001951 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001952 return res;
1953}
1954
Serhiy Storchakadde08152015-11-25 15:28:13 +02001955static PyObject*
1956element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001957{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001958 PyObject *res = element_get_text(self);
1959 Py_XINCREF(res);
1960 return res;
1961}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001962
Serhiy Storchakadde08152015-11-25 15:28:13 +02001963static PyObject*
1964element_tail_getter(ElementObject *self, void *closure)
1965{
1966 PyObject *res = element_get_tail(self);
1967 Py_XINCREF(res);
1968 return res;
1969}
1970
1971static PyObject*
1972element_attrib_getter(ElementObject *self, void *closure)
1973{
1974 PyObject *res;
1975 if (!self->extra) {
1976 if (create_extra(self, NULL) < 0)
1977 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001978 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001979 res = element_get_attrib(self);
1980 Py_XINCREF(res);
1981 return res;
1982}
Victor Stinner4d463432013-07-11 23:05:03 +02001983
Serhiy Storchakadde08152015-11-25 15:28:13 +02001984/* macro for setter validation */
1985#define _VALIDATE_ATTR_VALUE(V) \
1986 if ((V) == NULL) { \
1987 PyErr_SetString( \
1988 PyExc_AttributeError, \
1989 "can't delete element attribute"); \
1990 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001991 }
1992
Serhiy Storchakadde08152015-11-25 15:28:13 +02001993static int
1994element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1995{
1996 _VALIDATE_ATTR_VALUE(value);
1997 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001998 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001999 return 0;
2000}
2001
2002static int
2003element_text_setter(ElementObject *self, PyObject *value, void *closure)
2004{
2005 _VALIDATE_ATTR_VALUE(value);
2006 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002007 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002008 return 0;
2009}
2010
2011static int
2012element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2013{
2014 _VALIDATE_ATTR_VALUE(value);
2015 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002016 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002017 return 0;
2018}
2019
2020static int
2021element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2022{
2023 _VALIDATE_ATTR_VALUE(value);
2024 if (!self->extra) {
2025 if (create_extra(self, NULL) < 0)
2026 return -1;
2027 }
2028 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002029 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002030 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002031}
2032
2033static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002034 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002035 0, /* sq_concat */
2036 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002037 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002038 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002039 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002040 0,
2041};
2042
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043/******************************* Element iterator ****************************/
2044
2045/* ElementIterObject represents the iteration state over an XML element in
2046 * pre-order traversal. To keep track of which sub-element should be returned
2047 * next, a stack of parents is maintained. This is a standard stack-based
2048 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002049 * The stack is managed using a continuous array.
2050 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002051 * the current one is exhausted, and the next child to examine in that parent.
2052 */
2053typedef struct ParentLocator_t {
2054 ElementObject *parent;
2055 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002056} ParentLocator;
2057
2058typedef struct {
2059 PyObject_HEAD
2060 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002061 Py_ssize_t parent_stack_used;
2062 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002063 ElementObject *root_element;
2064 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065 int gettext;
2066} ElementIterObject;
2067
2068
2069static void
2070elementiter_dealloc(ElementIterObject *it)
2071{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002072 Py_ssize_t i = it->parent_stack_used;
2073 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002074 /* bpo-31095: UnTrack is needed before calling any callbacks */
2075 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002076 while (i--)
2077 Py_XDECREF(it->parent_stack[i].parent);
2078 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002079
2080 Py_XDECREF(it->sought_tag);
2081 Py_XDECREF(it->root_element);
2082
Eli Bendersky64d11e62012-06-15 07:42:50 +03002083 PyObject_GC_Del(it);
2084}
2085
2086static int
2087elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2088{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002089 Py_ssize_t i = it->parent_stack_used;
2090 while (i--)
2091 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092
2093 Py_VISIT(it->root_element);
2094 Py_VISIT(it->sought_tag);
2095 return 0;
2096}
2097
2098/* Helper function for elementiter_next. Add a new parent to the parent stack.
2099 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002100static int
2101parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002103 ParentLocator *item;
2104
2105 if (it->parent_stack_used >= it->parent_stack_size) {
2106 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2107 ParentLocator *parent_stack = it->parent_stack;
2108 PyMem_Resize(parent_stack, ParentLocator, new_size);
2109 if (parent_stack == NULL)
2110 return -1;
2111 it->parent_stack = parent_stack;
2112 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002114 item = it->parent_stack + it->parent_stack_used++;
2115 Py_INCREF(parent);
2116 item->parent = parent;
2117 item->child_index = 0;
2118 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119}
2120
2121static PyObject *
2122elementiter_next(ElementIterObject *it)
2123{
2124 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002125 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126 * A short note on gettext: this function serves both the iter() and
2127 * itertext() methods to avoid code duplication. However, there are a few
2128 * small differences in the way these iterations work. Namely:
2129 * - itertext() only yields text from nodes that have it, and continues
2130 * iterating when a node doesn't have text (so it doesn't return any
2131 * node like iter())
2132 * - itertext() also has to handle tail, after finishing with all the
2133 * children of a node.
2134 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002135 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002136 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002137 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138
2139 while (1) {
2140 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002141 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142 * iterator is exhausted.
2143 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002144 if (!it->parent_stack_used) {
2145 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002146 PyErr_SetNone(PyExc_StopIteration);
2147 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 }
2149
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002150 elem = it->root_element; /* steals a reference */
2151 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 }
2153 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002154 /* See if there are children left to traverse in the current parent. If
2155 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002156 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002157 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2158 Py_ssize_t child_index = item->child_index;
2159 ElementObjectExtra *extra;
2160 elem = item->parent;
2161 extra = elem->extra;
2162 if (!extra || child_index >= extra->length) {
2163 it->parent_stack_used--;
2164 /* Note that extra condition on it->parent_stack_used here;
2165 * this is because itertext() is supposed to only return *inner*
2166 * text, not text following the element it began iteration with.
2167 */
2168 if (it->gettext && it->parent_stack_used) {
2169 text = element_get_tail(elem);
2170 goto gettext;
2171 }
2172 Py_DECREF(elem);
2173 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002174 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002175
Serhiy Storchaka576def02017-03-30 09:47:31 +03002176 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2177 PyErr_Format(PyExc_AttributeError,
2178 "'%.100s' object has no attribute 'iter'",
2179 Py_TYPE(extra->children[child_index])->tp_name);
2180 return NULL;
2181 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002182 elem = (ElementObject *)extra->children[child_index];
2183 item->child_index++;
2184 Py_INCREF(elem);
2185 }
2186
2187 if (parent_stack_push_new(it, elem) < 0) {
2188 Py_DECREF(elem);
2189 PyErr_NoMemory();
2190 return NULL;
2191 }
2192 if (it->gettext) {
2193 text = element_get_text(elem);
2194 goto gettext;
2195 }
2196
2197 if (it->sought_tag == Py_None)
2198 return (PyObject *)elem;
2199
2200 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2201 if (rc > 0)
2202 return (PyObject *)elem;
2203
2204 Py_DECREF(elem);
2205 if (rc < 0)
2206 return NULL;
2207 continue;
2208
2209gettext:
2210 if (!text) {
2211 Py_DECREF(elem);
2212 return NULL;
2213 }
2214 if (text == Py_None) {
2215 Py_DECREF(elem);
2216 }
2217 else {
2218 Py_INCREF(text);
2219 Py_DECREF(elem);
2220 rc = PyObject_IsTrue(text);
2221 if (rc > 0)
2222 return text;
2223 Py_DECREF(text);
2224 if (rc < 0)
2225 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002226 }
2227 }
2228
2229 return NULL;
2230}
2231
2232
2233static PyTypeObject ElementIter_Type = {
2234 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002235 /* Using the module's name since the pure-Python implementation does not
2236 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237 "_elementtree._element_iterator", /* tp_name */
2238 sizeof(ElementIterObject), /* tp_basicsize */
2239 0, /* tp_itemsize */
2240 /* methods */
2241 (destructor)elementiter_dealloc, /* tp_dealloc */
2242 0, /* tp_print */
2243 0, /* tp_getattr */
2244 0, /* tp_setattr */
2245 0, /* tp_reserved */
2246 0, /* tp_repr */
2247 0, /* tp_as_number */
2248 0, /* tp_as_sequence */
2249 0, /* tp_as_mapping */
2250 0, /* tp_hash */
2251 0, /* tp_call */
2252 0, /* tp_str */
2253 0, /* tp_getattro */
2254 0, /* tp_setattro */
2255 0, /* tp_as_buffer */
2256 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2257 0, /* tp_doc */
2258 (traverseproc)elementiter_traverse, /* tp_traverse */
2259 0, /* tp_clear */
2260 0, /* tp_richcompare */
2261 0, /* tp_weaklistoffset */
2262 PyObject_SelfIter, /* tp_iter */
2263 (iternextfunc)elementiter_next, /* tp_iternext */
2264 0, /* tp_methods */
2265 0, /* tp_members */
2266 0, /* tp_getset */
2267 0, /* tp_base */
2268 0, /* tp_dict */
2269 0, /* tp_descr_get */
2270 0, /* tp_descr_set */
2271 0, /* tp_dictoffset */
2272 0, /* tp_init */
2273 0, /* tp_alloc */
2274 0, /* tp_new */
2275};
2276
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002277#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002278
2279static PyObject *
2280create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2281{
2282 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002283
2284 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2285 if (!it)
2286 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002287
Victor Stinner4d463432013-07-11 23:05:03 +02002288 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002289 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002290 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002291 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002292 it->root_element = self;
2293
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002295
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002296 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002297 if (it->parent_stack == NULL) {
2298 Py_DECREF(it);
2299 PyErr_NoMemory();
2300 return NULL;
2301 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002302 it->parent_stack_used = 0;
2303 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002304
Eli Bendersky64d11e62012-06-15 07:42:50 +03002305 return (PyObject *)it;
2306}
2307
2308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309/* ==================================================================== */
2310/* the tree builder type */
2311
2312typedef struct {
2313 PyObject_HEAD
2314
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002316
Antoine Pitrouee329312012-10-04 19:53:29 +02002317 PyObject *this; /* current node */
2318 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319
Eli Bendersky58d548d2012-05-29 15:45:16 +03002320 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 PyObject *stack; /* element stack */
2323 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324
Eli Bendersky48d358b2012-05-30 17:57:50 +03002325 PyObject *element_factory;
2326
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002328 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2330 PyObject *end_event_obj;
2331 PyObject *start_ns_event_obj;
2332 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002333} TreeBuilderObject;
2334
Christian Heimes90aa7642007-12-19 02:45:37 +00002335#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336
2337/* -------------------------------------------------------------------- */
2338/* constructor and destructor */
2339
Eli Bendersky58d548d2012-05-29 15:45:16 +03002340static PyObject *
2341treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002342{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002343 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2344 if (t != NULL) {
2345 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346
Eli Bendersky58d548d2012-05-29 15:45:16 +03002347 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002348 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002349 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002350 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002351
Eli Bendersky58d548d2012-05-29 15:45:16 +03002352 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002353 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 t->stack = PyList_New(20);
2355 if (!t->stack) {
2356 Py_DECREF(t->this);
2357 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002358 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002359 return NULL;
2360 }
2361 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002363 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002364 t->start_event_obj = t->end_event_obj = NULL;
2365 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2366 }
2367 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368}
2369
Serhiy Storchakacb985562015-05-04 15:32:48 +03002370/*[clinic input]
2371_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002372
Serhiy Storchakacb985562015-05-04 15:32:48 +03002373 element_factory: object = NULL
2374
2375[clinic start generated code]*/
2376
2377static int
2378_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2379 PyObject *element_factory)
2380/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2381{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 if (element_factory) {
2383 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002384 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385 }
2386
Eli Bendersky58d548d2012-05-29 15:45:16 +03002387 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388}
2389
Eli Bendersky48d358b2012-05-30 17:57:50 +03002390static int
2391treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2392{
2393 Py_VISIT(self->root);
2394 Py_VISIT(self->this);
2395 Py_VISIT(self->last);
2396 Py_VISIT(self->data);
2397 Py_VISIT(self->stack);
2398 Py_VISIT(self->element_factory);
2399 return 0;
2400}
2401
2402static int
2403treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002405 Py_CLEAR(self->end_ns_event_obj);
2406 Py_CLEAR(self->start_ns_event_obj);
2407 Py_CLEAR(self->end_event_obj);
2408 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002409 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002410 Py_CLEAR(self->stack);
2411 Py_CLEAR(self->data);
2412 Py_CLEAR(self->last);
2413 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002414 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002415 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002416 return 0;
2417}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002418
Eli Bendersky48d358b2012-05-30 17:57:50 +03002419static void
2420treebuilder_dealloc(TreeBuilderObject *self)
2421{
2422 PyObject_GC_UnTrack(self);
2423 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002424 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425}
2426
2427/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002428/* helpers for handling of arbitrary element-like objects */
2429
2430static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002431treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002432 PyObject **dest, _Py_Identifier *name)
2433{
2434 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002435 PyObject *tmp = JOIN_OBJ(*dest);
2436 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2437 *data = NULL;
2438 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002439 return 0;
2440 }
2441 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002442 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002443 int r;
2444 if (joined == NULL)
2445 return -1;
2446 r = _PyObject_SetAttrId(element, name, joined);
2447 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002448 if (r < 0)
2449 return -1;
2450 Py_CLEAR(*data);
2451 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002452 }
2453}
2454
Serhiy Storchaka576def02017-03-30 09:47:31 +03002455LOCAL(int)
2456treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002457{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002458 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002459
Serhiy Storchaka576def02017-03-30 09:47:31 +03002460 if (!self->data) {
2461 return 0;
2462 }
2463
2464 if (self->this == element) {
2465 _Py_IDENTIFIER(text);
2466 return treebuilder_set_element_text_or_tail(
2467 element, &self->data,
2468 &((ElementObject *) element)->text, &PyId_text);
2469 }
2470 else {
2471 _Py_IDENTIFIER(tail);
2472 return treebuilder_set_element_text_or_tail(
2473 element, &self->data,
2474 &((ElementObject *) element)->tail, &PyId_tail);
2475 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002476}
2477
2478static int
2479treebuilder_add_subelement(PyObject *element, PyObject *child)
2480{
2481 _Py_IDENTIFIER(append);
2482 if (Element_CheckExact(element)) {
2483 ElementObject *elem = (ElementObject *) element;
2484 return element_add_subelement(elem, child);
2485 }
2486 else {
2487 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002488 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002489 if (res == NULL)
2490 return -1;
2491 Py_DECREF(res);
2492 return 0;
2493 }
2494}
2495
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002496LOCAL(int)
2497treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2498 PyObject *node)
2499{
2500 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002501 PyObject *res;
2502 PyObject *event = PyTuple_Pack(2, action, node);
2503 if (event == NULL)
2504 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002505 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002506 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002507 if (res == NULL)
2508 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002509 Py_DECREF(res);
2510 }
2511 return 0;
2512}
2513
Antoine Pitrouee329312012-10-04 19:53:29 +02002514/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515/* handlers */
2516
2517LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2519 PyObject* attrib)
2520{
2521 PyObject* node;
2522 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002523 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524
Serhiy Storchaka576def02017-03-30 09:47:31 +03002525 if (treebuilder_flush_data(self) < 0) {
2526 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527 }
2528
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002529 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002530 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002531 } else if (attrib == Py_None) {
2532 attrib = PyDict_New();
2533 if (!attrib)
2534 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002535 node = PyObject_CallFunctionObjArgs(self->element_factory,
2536 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002537 Py_DECREF(attrib);
2538 }
2539 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002540 node = PyObject_CallFunctionObjArgs(self->element_factory,
2541 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002542 }
2543 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002545 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546
Antoine Pitrouee329312012-10-04 19:53:29 +02002547 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548
2549 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002550 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002551 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552 } else {
2553 if (self->root) {
2554 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002555 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 "multiple elements on top level"
2557 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002558 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559 }
2560 Py_INCREF(node);
2561 self->root = node;
2562 }
2563
2564 if (self->index < PyList_GET_SIZE(self->stack)) {
2565 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002566 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 Py_INCREF(this);
2568 } else {
2569 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002570 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 }
2572 self->index++;
2573
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002575 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002577 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002579 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2580 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581
2582 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002583
2584 error:
2585 Py_DECREF(node);
2586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587}
2588
2589LOCAL(PyObject*)
2590treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2591{
2592 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002593 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002594 /* ignore calls to data before the first call to start */
2595 Py_RETURN_NONE;
2596 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597 /* store the first item as is */
2598 Py_INCREF(data); self->data = data;
2599 } else {
2600 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002601 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2602 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002603 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 /* expat often generates single character data sections; handle
2605 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002606 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2607 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002609 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 } else if (PyList_CheckExact(self->data)) {
2611 if (PyList_Append(self->data, data) < 0)
2612 return NULL;
2613 } else {
2614 PyObject* list = PyList_New(2);
2615 if (!list)
2616 return NULL;
2617 PyList_SET_ITEM(list, 0, self->data);
2618 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2619 self->data = list;
2620 }
2621 }
2622
2623 Py_RETURN_NONE;
2624}
2625
2626LOCAL(PyObject*)
2627treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2628{
2629 PyObject* item;
2630
Serhiy Storchaka576def02017-03-30 09:47:31 +03002631 if (treebuilder_flush_data(self) < 0) {
2632 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633 }
2634
2635 if (self->index == 0) {
2636 PyErr_SetString(
2637 PyExc_IndexError,
2638 "pop from empty stack"
2639 );
2640 return NULL;
2641 }
2642
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002643 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002644 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002645 self->index--;
2646 self->this = PyList_GET_ITEM(self->stack, self->index);
2647 Py_INCREF(self->this);
2648 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002650 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2651 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652
2653 Py_INCREF(self->last);
2654 return (PyObject*) self->last;
2655}
2656
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657/* -------------------------------------------------------------------- */
2658/* methods (in alphabetical order) */
2659
Serhiy Storchakacb985562015-05-04 15:32:48 +03002660/*[clinic input]
2661_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662
Serhiy Storchakacb985562015-05-04 15:32:48 +03002663 data: object
2664 /
2665
2666[clinic start generated code]*/
2667
2668static PyObject *
2669_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2670/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2671{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672 return treebuilder_handle_data(self, data);
2673}
2674
Serhiy Storchakacb985562015-05-04 15:32:48 +03002675/*[clinic input]
2676_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677
Serhiy Storchakacb985562015-05-04 15:32:48 +03002678 tag: object
2679 /
2680
2681[clinic start generated code]*/
2682
2683static PyObject *
2684_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2685/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2686{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 return treebuilder_handle_end(self, tag);
2688}
2689
2690LOCAL(PyObject*)
2691treebuilder_done(TreeBuilderObject* self)
2692{
2693 PyObject* res;
2694
2695 /* FIXME: check stack size? */
2696
2697 if (self->root)
2698 res = self->root;
2699 else
2700 res = Py_None;
2701
2702 Py_INCREF(res);
2703 return res;
2704}
2705
Serhiy Storchakacb985562015-05-04 15:32:48 +03002706/*[clinic input]
2707_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708
Serhiy Storchakacb985562015-05-04 15:32:48 +03002709[clinic start generated code]*/
2710
2711static PyObject *
2712_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2713/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2714{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715 return treebuilder_done(self);
2716}
2717
Serhiy Storchakacb985562015-05-04 15:32:48 +03002718/*[clinic input]
2719_elementtree.TreeBuilder.start
2720
2721 tag: object
2722 attrs: object = None
2723 /
2724
2725[clinic start generated code]*/
2726
2727static PyObject *
2728_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2729 PyObject *attrs)
2730/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002732 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733}
2734
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735/* ==================================================================== */
2736/* the expat interface */
2737
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002740
2741/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2742 * cached globally without being in per-module state.
2743 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002744static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746
Eli Bendersky52467b12012-06-01 07:13:08 +03002747static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2748 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750typedef struct {
2751 PyObject_HEAD
2752
2753 XML_Parser parser;
2754
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002755 PyObject *target;
2756 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002758 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002760 PyObject *handle_start;
2761 PyObject *handle_data;
2762 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002764 PyObject *handle_comment;
2765 PyObject *handle_pi;
2766 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002768 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002769
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770} XMLParserObject;
2771
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772/* helpers */
2773
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774LOCAL(PyObject*)
2775makeuniversal(XMLParserObject* self, const char* string)
2776{
2777 /* convert a UTF-8 tag/attribute name from the expat parser
2778 to a universal name string */
2779
Antoine Pitrouc1948842012-10-01 23:40:37 +02002780 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781 PyObject* key;
2782 PyObject* value;
2783
2784 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002785 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786 if (!key)
2787 return NULL;
2788
2789 value = PyDict_GetItem(self->names, key);
2790
2791 if (value) {
2792 Py_INCREF(value);
2793 } else {
2794 /* new name. convert to universal name, and decode as
2795 necessary */
2796
2797 PyObject* tag;
2798 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002799 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800
2801 /* look for namespace separator */
2802 for (i = 0; i < size; i++)
2803 if (string[i] == '}')
2804 break;
2805 if (i != size) {
2806 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002807 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002808 if (tag == NULL) {
2809 Py_DECREF(key);
2810 return NULL;
2811 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002812 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813 p[0] = '{';
2814 memcpy(p+1, string, size);
2815 size++;
2816 } else {
2817 /* plain name; use key as tag */
2818 Py_INCREF(key);
2819 tag = key;
2820 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002821
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002823 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002824 value = PyUnicode_DecodeUTF8(p, size, "strict");
2825 Py_DECREF(tag);
2826 if (!value) {
2827 Py_DECREF(key);
2828 return NULL;
2829 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830
2831 /* add to names dictionary */
2832 if (PyDict_SetItem(self->names, key, value) < 0) {
2833 Py_DECREF(key);
2834 Py_DECREF(value);
2835 return NULL;
2836 }
2837 }
2838
2839 Py_DECREF(key);
2840 return value;
2841}
2842
Eli Bendersky5b77d812012-03-16 08:20:05 +02002843/* Set the ParseError exception with the given parameters.
2844 * If message is not NULL, it's used as the error string. Otherwise, the
2845 * message string is the default for the given error_code.
2846*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002847static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002848expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2849 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002850{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002851 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002852 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002853
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002854 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002855 message ? message : EXPAT(ErrorString)(error_code),
2856 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002857 if (errmsg == NULL)
2858 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002859
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002860 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002861 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002862 if (!error)
2863 return;
2864
Eli Bendersky5b77d812012-03-16 08:20:05 +02002865 /* Add code and position attributes */
2866 code = PyLong_FromLong((long)error_code);
2867 if (!code) {
2868 Py_DECREF(error);
2869 return;
2870 }
2871 if (PyObject_SetAttrString(error, "code", code) == -1) {
2872 Py_DECREF(error);
2873 Py_DECREF(code);
2874 return;
2875 }
2876 Py_DECREF(code);
2877
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002878 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002879 if (!position) {
2880 Py_DECREF(error);
2881 return;
2882 }
2883 if (PyObject_SetAttrString(error, "position", position) == -1) {
2884 Py_DECREF(error);
2885 Py_DECREF(position);
2886 return;
2887 }
2888 Py_DECREF(position);
2889
Eli Bendersky532d03e2013-08-10 08:00:39 -07002890 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891 Py_DECREF(error);
2892}
2893
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002894/* -------------------------------------------------------------------- */
2895/* handlers */
2896
2897static void
2898expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2899 int data_len)
2900{
2901 PyObject* key;
2902 PyObject* value;
2903 PyObject* res;
2904
2905 if (data_len < 2 || data_in[0] != '&')
2906 return;
2907
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002908 if (PyErr_Occurred())
2909 return;
2910
Neal Norwitz0269b912007-08-08 06:56:02 +00002911 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 if (!key)
2913 return;
2914
2915 value = PyDict_GetItem(self->entity, key);
2916
2917 if (value) {
2918 if (TreeBuilder_CheckExact(self->target))
2919 res = treebuilder_handle_data(
2920 (TreeBuilderObject*) self->target, value
2921 );
2922 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002923 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 else
2925 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002926 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002927 } else if (!PyErr_Occurred()) {
2928 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002929 char message[128] = "undefined entity ";
2930 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002931 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002932 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002934 EXPAT(GetErrorColumnNumber)(self->parser),
2935 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 );
2937 }
2938
2939 Py_DECREF(key);
2940}
2941
2942static void
2943expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2944 const XML_Char **attrib_in)
2945{
2946 PyObject* res;
2947 PyObject* tag;
2948 PyObject* attrib;
2949 int ok;
2950
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002951 if (PyErr_Occurred())
2952 return;
2953
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954 /* tag name */
2955 tag = makeuniversal(self, tag_in);
2956 if (!tag)
2957 return; /* parser will look for errors */
2958
2959 /* attributes */
2960 if (attrib_in[0]) {
2961 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002962 if (!attrib) {
2963 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002965 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 while (attrib_in[0] && attrib_in[1]) {
2967 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002968 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 if (!key || !value) {
2970 Py_XDECREF(value);
2971 Py_XDECREF(key);
2972 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002973 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 return;
2975 }
2976 ok = PyDict_SetItem(attrib, key, value);
2977 Py_DECREF(value);
2978 Py_DECREF(key);
2979 if (ok < 0) {
2980 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002981 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982 return;
2983 }
2984 attrib_in += 2;
2985 }
2986 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002987 Py_INCREF(Py_None);
2988 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002989 }
2990
2991 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992 /* shortcut */
2993 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2994 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002995 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002996 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002997 if (attrib == Py_None) {
2998 Py_DECREF(attrib);
2999 attrib = PyDict_New();
3000 if (!attrib) {
3001 Py_DECREF(tag);
3002 return;
3003 }
3004 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003005 res = PyObject_CallFunctionObjArgs(self->handle_start,
3006 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 res = NULL;
3009
3010 Py_DECREF(tag);
3011 Py_DECREF(attrib);
3012
3013 Py_XDECREF(res);
3014}
3015
3016static void
3017expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3018 int data_len)
3019{
3020 PyObject* data;
3021 PyObject* res;
3022
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003023 if (PyErr_Occurred())
3024 return;
3025
Neal Norwitz0269b912007-08-08 06:56:02 +00003026 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003027 if (!data)
3028 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029
3030 if (TreeBuilder_CheckExact(self->target))
3031 /* shortcut */
3032 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3033 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003034 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035 else
3036 res = NULL;
3037
3038 Py_DECREF(data);
3039
3040 Py_XDECREF(res);
3041}
3042
3043static void
3044expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3045{
3046 PyObject* tag;
3047 PyObject* res = NULL;
3048
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003049 if (PyErr_Occurred())
3050 return;
3051
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 if (TreeBuilder_CheckExact(self->target))
3053 /* shortcut */
3054 /* the standard tree builder doesn't look at the end tag */
3055 res = treebuilder_handle_end(
3056 (TreeBuilderObject*) self->target, Py_None
3057 );
3058 else if (self->handle_end) {
3059 tag = makeuniversal(self, tag_in);
3060 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003061 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 Py_DECREF(tag);
3063 }
3064 }
3065
3066 Py_XDECREF(res);
3067}
3068
3069static void
3070expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3071 const XML_Char *uri)
3072{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003073 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3074 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003075
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003076 if (PyErr_Occurred())
3077 return;
3078
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003079 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003080 return;
3081
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003082 if (!uri)
3083 uri = "";
3084 if (!prefix)
3085 prefix = "";
3086
3087 parcel = Py_BuildValue("ss", prefix, uri);
3088 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003089 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003090 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3091 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092}
3093
3094static void
3095expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3096{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003097 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3098
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003099 if (PyErr_Occurred())
3100 return;
3101
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003102 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003103 return;
3104
3105 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106}
3107
3108static void
3109expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3110{
3111 PyObject* comment;
3112 PyObject* res;
3113
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003114 if (PyErr_Occurred())
3115 return;
3116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003118 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003120 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3121 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003122 Py_XDECREF(res);
3123 Py_DECREF(comment);
3124 }
3125 }
3126}
3127
Eli Bendersky45839902013-01-13 05:14:47 -08003128static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003129expat_start_doctype_handler(XMLParserObject *self,
3130 const XML_Char *doctype_name,
3131 const XML_Char *sysid,
3132 const XML_Char *pubid,
3133 int has_internal_subset)
3134{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003135 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003136 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003137 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003138
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003139 if (PyErr_Occurred())
3140 return;
3141
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003142 doctype_name_obj = makeuniversal(self, doctype_name);
3143 if (!doctype_name_obj)
3144 return;
3145
3146 if (sysid) {
3147 sysid_obj = makeuniversal(self, sysid);
3148 if (!sysid_obj) {
3149 Py_DECREF(doctype_name_obj);
3150 return;
3151 }
3152 } else {
3153 Py_INCREF(Py_None);
3154 sysid_obj = Py_None;
3155 }
3156
3157 if (pubid) {
3158 pubid_obj = makeuniversal(self, pubid);
3159 if (!pubid_obj) {
3160 Py_DECREF(doctype_name_obj);
3161 Py_DECREF(sysid_obj);
3162 return;
3163 }
3164 } else {
3165 Py_INCREF(Py_None);
3166 pubid_obj = Py_None;
3167 }
3168
3169 /* If the target has a handler for doctype, call it. */
3170 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003171 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3172 doctype_name_obj, pubid_obj,
3173 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003174 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003175 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003176 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3177 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3178 "The doctype() method of XMLParser is ignored. "
3179 "Define doctype() method on the TreeBuilder target.",
3180 1);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003181 }
3182
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003183 Py_DECREF(doctype_name_obj);
3184 Py_DECREF(pubid_obj);
3185 Py_DECREF(sysid_obj);
3186}
3187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188static void
3189expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3190 const XML_Char* data_in)
3191{
3192 PyObject* target;
3193 PyObject* data;
3194 PyObject* res;
3195
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003196 if (PyErr_Occurred())
3197 return;
3198
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003200 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3201 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003203 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3204 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205 Py_XDECREF(res);
3206 Py_DECREF(data);
3207 Py_DECREF(target);
3208 } else {
3209 Py_XDECREF(data);
3210 Py_XDECREF(target);
3211 }
3212 }
3213}
3214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216
Eli Bendersky52467b12012-06-01 07:13:08 +03003217static PyObject *
3218xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219{
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3221 if (self) {
3222 self->parser = NULL;
3223 self->target = self->entity = self->names = NULL;
3224 self->handle_start = self->handle_data = self->handle_end = NULL;
3225 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003226 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003227 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003228 return (PyObject *)self;
3229}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003230
scoderc8d8e152017-09-14 22:00:03 +02003231static int
3232ignore_attribute_error(PyObject *value)
3233{
3234 if (value == NULL) {
3235 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3236 return -1;
3237 }
3238 PyErr_Clear();
3239 }
3240 return 0;
3241}
3242
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243/*[clinic input]
3244_elementtree.XMLParser.__init__
3245
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003246 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003248 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003249
3250[clinic start generated code]*/
3251
Eli Bendersky52467b12012-06-01 07:13:08 +03003252static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003253_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3254 const char *encoding)
3255/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003256{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003257 self->entity = PyDict_New();
3258 if (!self->entity)
3259 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260
Serhiy Storchakacb985562015-05-04 15:32:48 +03003261 self->names = PyDict_New();
3262 if (!self->names) {
3263 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003264 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003266
Serhiy Storchakacb985562015-05-04 15:32:48 +03003267 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3268 if (!self->parser) {
3269 Py_CLEAR(self->entity);
3270 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003272 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 }
3274
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 if (target) {
3276 Py_INCREF(target);
3277 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003278 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003280 Py_CLEAR(self->entity);
3281 Py_CLEAR(self->names);
3282 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003285 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003289 if (ignore_attribute_error(self->handle_start)) {
3290 return -1;
3291 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003292 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003293 if (ignore_attribute_error(self->handle_data)) {
3294 return -1;
3295 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003297 if (ignore_attribute_error(self->handle_end)) {
3298 return -1;
3299 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003300 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003301 if (ignore_attribute_error(self->handle_comment)) {
3302 return -1;
3303 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003305 if (ignore_attribute_error(self->handle_pi)) {
3306 return -1;
3307 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003308 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003309 if (ignore_attribute_error(self->handle_close)) {
3310 return -1;
3311 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003313 if (ignore_attribute_error(self->handle_doctype)) {
3314 return -1;
3315 }
Eli Bendersky45839902013-01-13 05:14:47 -08003316
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321 (XML_StartElementHandler) expat_start_handler,
3322 (XML_EndElementHandler) expat_end_handler
3323 );
3324 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003325 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326 (XML_DefaultHandler) expat_default_handler
3327 );
3328 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003329 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330 (XML_CharacterDataHandler) expat_data_handler
3331 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003332 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003334 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335 (XML_CommentHandler) expat_comment_handler
3336 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003337 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003339 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 (XML_ProcessingInstructionHandler) expat_pi_handler
3341 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003342 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003343 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003344 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3345 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003347 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003348 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350
Eli Bendersky52467b12012-06-01 07:13:08 +03003351 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352}
3353
Eli Bendersky52467b12012-06-01 07:13:08 +03003354static int
3355xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3356{
3357 Py_VISIT(self->handle_close);
3358 Py_VISIT(self->handle_pi);
3359 Py_VISIT(self->handle_comment);
3360 Py_VISIT(self->handle_end);
3361 Py_VISIT(self->handle_data);
3362 Py_VISIT(self->handle_start);
3363
3364 Py_VISIT(self->target);
3365 Py_VISIT(self->entity);
3366 Py_VISIT(self->names);
3367
3368 return 0;
3369}
3370
3371static int
3372xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373{
Victor Stinnere727d412017-09-18 05:29:37 -07003374 if (self->parser != NULL) {
3375 XML_Parser parser = self->parser;
3376 self->parser = NULL;
3377 EXPAT(ParserFree)(parser);
3378 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379
Antoine Pitrouc1948842012-10-01 23:40:37 +02003380 Py_CLEAR(self->handle_close);
3381 Py_CLEAR(self->handle_pi);
3382 Py_CLEAR(self->handle_comment);
3383 Py_CLEAR(self->handle_end);
3384 Py_CLEAR(self->handle_data);
3385 Py_CLEAR(self->handle_start);
3386 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387
Antoine Pitrouc1948842012-10-01 23:40:37 +02003388 Py_CLEAR(self->target);
3389 Py_CLEAR(self->entity);
3390 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391
Eli Bendersky52467b12012-06-01 07:13:08 +03003392 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003393}
3394
Eli Bendersky52467b12012-06-01 07:13:08 +03003395static void
3396xmlparser_dealloc(XMLParserObject* self)
3397{
3398 PyObject_GC_UnTrack(self);
3399 xmlparser_gc_clear(self);
3400 Py_TYPE(self)->tp_free((PyObject *)self);
3401}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402
3403LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003404expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405{
3406 int ok;
3407
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003408 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3410
3411 if (PyErr_Occurred())
3412 return NULL;
3413
3414 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003415 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003416 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003417 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003418 EXPAT(GetErrorColumnNumber)(self->parser),
3419 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003420 );
3421 return NULL;
3422 }
3423
3424 Py_RETURN_NONE;
3425}
3426
Serhiy Storchakacb985562015-05-04 15:32:48 +03003427/*[clinic input]
3428_elementtree.XMLParser.close
3429
3430[clinic start generated code]*/
3431
3432static PyObject *
3433_elementtree_XMLParser_close_impl(XMLParserObject *self)
3434/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003435{
3436 /* end feeding data to parser */
3437
3438 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003439 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003440 if (!res)
3441 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003443 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003444 Py_DECREF(res);
3445 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003446 }
3447 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003448 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003449 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003450 }
3451 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003452 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454}
3455
Serhiy Storchakacb985562015-05-04 15:32:48 +03003456/*[clinic input]
3457_elementtree.XMLParser.feed
3458
3459 data: object
3460 /
3461
3462[clinic start generated code]*/
3463
3464static PyObject *
3465_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3466/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467{
3468 /* feed data to parser */
3469
Serhiy Storchakacb985562015-05-04 15:32:48 +03003470 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003471 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003472 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3473 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003474 return NULL;
3475 if (data_len > INT_MAX) {
3476 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3477 return NULL;
3478 }
3479 /* Explicitly set UTF-8 encoding. Return code ignored. */
3480 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003481 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003482 }
3483 else {
3484 Py_buffer view;
3485 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003486 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003487 return NULL;
3488 if (view.len > INT_MAX) {
3489 PyBuffer_Release(&view);
3490 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3491 return NULL;
3492 }
3493 res = expat_parse(self, view.buf, (int)view.len, 0);
3494 PyBuffer_Release(&view);
3495 return res;
3496 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003497}
3498
Serhiy Storchakacb985562015-05-04 15:32:48 +03003499/*[clinic input]
3500_elementtree.XMLParser._parse_whole
3501
3502 file: object
3503 /
3504
3505[clinic start generated code]*/
3506
3507static PyObject *
3508_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3509/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003510{
Eli Benderskya3699232013-05-19 18:47:23 -07003511 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512 PyObject* reader;
3513 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003514 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 PyObject* res;
3516
Serhiy Storchakacb985562015-05-04 15:32:48 +03003517 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518 if (!reader)
3519 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003520
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521 /* read from open file object */
3522 for (;;) {
3523
3524 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3525
3526 if (!buffer) {
3527 /* read failed (e.g. due to KeyboardInterrupt) */
3528 Py_DECREF(reader);
3529 return NULL;
3530 }
3531
Eli Benderskyf996e772012-03-16 05:53:30 +02003532 if (PyUnicode_CheckExact(buffer)) {
3533 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003534 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003535 Py_DECREF(buffer);
3536 break;
3537 }
3538 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003539 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003540 if (!temp) {
3541 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003542 Py_DECREF(reader);
3543 return NULL;
3544 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003545 buffer = temp;
3546 }
3547 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548 Py_DECREF(buffer);
3549 break;
3550 }
3551
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003552 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3553 Py_DECREF(buffer);
3554 Py_DECREF(reader);
3555 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3556 return NULL;
3557 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003559 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003560 );
3561
3562 Py_DECREF(buffer);
3563
3564 if (!res) {
3565 Py_DECREF(reader);
3566 return NULL;
3567 }
3568 Py_DECREF(res);
3569
3570 }
3571
3572 Py_DECREF(reader);
3573
3574 res = expat_parse(self, "", 0, 1);
3575
3576 if (res && TreeBuilder_CheckExact(self->target)) {
3577 Py_DECREF(res);
3578 return treebuilder_done((TreeBuilderObject*) self->target);
3579 }
3580
3581 return res;
3582}
3583
Serhiy Storchakacb985562015-05-04 15:32:48 +03003584/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003585_elementtree.XMLParser._setevents
3586
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003587 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003588 events_to_report: object = None
3589 /
3590
3591[clinic start generated code]*/
3592
3593static PyObject *
3594_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3595 PyObject *events_queue,
3596 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003597/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598{
3599 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003600 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003601 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003602 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603
3604 if (!TreeBuilder_CheckExact(self->target)) {
3605 PyErr_SetString(
3606 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003607 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608 "targets"
3609 );
3610 return NULL;
3611 }
3612
3613 target = (TreeBuilderObject*) self->target;
3614
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003615 events_append = PyObject_GetAttrString(events_queue, "append");
3616 if (events_append == NULL)
3617 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003618 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619
3620 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003621 Py_CLEAR(target->start_event_obj);
3622 Py_CLEAR(target->end_event_obj);
3623 Py_CLEAR(target->start_ns_event_obj);
3624 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003626 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003628 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 Py_RETURN_NONE;
3630 }
3631
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003632 if (!(events_seq = PySequence_Fast(events_to_report,
3633 "events must be a sequence"))) {
3634 return NULL;
3635 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003637 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003638 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003639 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003640 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003641 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 } else if (PyBytes_Check(event_name_obj)) {
3643 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003644 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003645 if (event_name == NULL) {
3646 Py_DECREF(events_seq);
3647 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3648 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003649 }
3650
3651 Py_INCREF(event_name_obj);
3652 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003653 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003654 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003655 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003656 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003657 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658 EXPAT(SetNamespaceDeclHandler)(
3659 self->parser,
3660 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3661 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3662 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003664 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665 EXPAT(SetNamespaceDeclHandler)(
3666 self->parser,
3667 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3668 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3669 );
3670 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003671 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003672 Py_DECREF(events_seq);
3673 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003674 return NULL;
3675 }
3676 }
3677
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003678 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003680}
3681
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003682static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003683xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003685 if (PyUnicode_Check(nameobj)) {
3686 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003687 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003688 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003689 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003690 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003691 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003692 return PyUnicode_FromFormat(
3693 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003695 }
3696 else
3697 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003698
Alexander Belopolskye239d232010-12-08 23:31:48 +00003699 Py_INCREF(res);
3700 return res;
3701 }
3702 generic:
3703 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003704}
3705
Serhiy Storchakacb985562015-05-04 15:32:48 +03003706#include "clinic/_elementtree.c.h"
3707
3708static PyMethodDef element_methods[] = {
3709
3710 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3711
3712 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3713 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3714
3715 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3716 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3717 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3718
3719 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3720 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3721 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3722 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3723
3724 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3725 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3726 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3727
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003728 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003729 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3730
3731 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3732 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3733
3734 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3735
3736 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3737 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3738 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3739 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3740 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3741
3742 {NULL, NULL}
3743};
3744
3745static PyMappingMethods element_as_mapping = {
3746 (lenfunc) element_length,
3747 (binaryfunc) element_subscr,
3748 (objobjargproc) element_ass_subscr,
3749};
3750
Serhiy Storchakadde08152015-11-25 15:28:13 +02003751static PyGetSetDef element_getsetlist[] = {
3752 {"tag",
3753 (getter)element_tag_getter,
3754 (setter)element_tag_setter,
3755 "A string identifying what kind of data this element represents"},
3756 {"text",
3757 (getter)element_text_getter,
3758 (setter)element_text_setter,
3759 "A string of text directly after the start tag, or None"},
3760 {"tail",
3761 (getter)element_tail_getter,
3762 (setter)element_tail_setter,
3763 "A string of text directly after the end tag, or None"},
3764 {"attrib",
3765 (getter)element_attrib_getter,
3766 (setter)element_attrib_setter,
3767 "A dictionary containing the element's attributes"},
3768 {NULL},
3769};
3770
Serhiy Storchakacb985562015-05-04 15:32:48 +03003771static PyTypeObject Element_Type = {
3772 PyVarObject_HEAD_INIT(NULL, 0)
3773 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3774 /* methods */
3775 (destructor)element_dealloc, /* tp_dealloc */
3776 0, /* tp_print */
3777 0, /* tp_getattr */
3778 0, /* tp_setattr */
3779 0, /* tp_reserved */
3780 (reprfunc)element_repr, /* tp_repr */
3781 0, /* tp_as_number */
3782 &element_as_sequence, /* tp_as_sequence */
3783 &element_as_mapping, /* tp_as_mapping */
3784 0, /* tp_hash */
3785 0, /* tp_call */
3786 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003787 PyObject_GenericGetAttr, /* tp_getattro */
3788 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003789 0, /* tp_as_buffer */
3790 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3791 /* tp_flags */
3792 0, /* tp_doc */
3793 (traverseproc)element_gc_traverse, /* tp_traverse */
3794 (inquiry)element_gc_clear, /* tp_clear */
3795 0, /* tp_richcompare */
3796 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3797 0, /* tp_iter */
3798 0, /* tp_iternext */
3799 element_methods, /* tp_methods */
3800 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003801 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003802 0, /* tp_base */
3803 0, /* tp_dict */
3804 0, /* tp_descr_get */
3805 0, /* tp_descr_set */
3806 0, /* tp_dictoffset */
3807 (initproc)element_init, /* tp_init */
3808 PyType_GenericAlloc, /* tp_alloc */
3809 element_new, /* tp_new */
3810 0, /* tp_free */
3811};
3812
3813static PyMethodDef treebuilder_methods[] = {
3814 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3815 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3816 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3817 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3818 {NULL, NULL}
3819};
3820
3821static PyTypeObject TreeBuilder_Type = {
3822 PyVarObject_HEAD_INIT(NULL, 0)
3823 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3824 /* methods */
3825 (destructor)treebuilder_dealloc, /* tp_dealloc */
3826 0, /* tp_print */
3827 0, /* tp_getattr */
3828 0, /* tp_setattr */
3829 0, /* tp_reserved */
3830 0, /* tp_repr */
3831 0, /* tp_as_number */
3832 0, /* tp_as_sequence */
3833 0, /* tp_as_mapping */
3834 0, /* tp_hash */
3835 0, /* tp_call */
3836 0, /* tp_str */
3837 0, /* tp_getattro */
3838 0, /* tp_setattro */
3839 0, /* tp_as_buffer */
3840 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3841 /* tp_flags */
3842 0, /* tp_doc */
3843 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3844 (inquiry)treebuilder_gc_clear, /* tp_clear */
3845 0, /* tp_richcompare */
3846 0, /* tp_weaklistoffset */
3847 0, /* tp_iter */
3848 0, /* tp_iternext */
3849 treebuilder_methods, /* tp_methods */
3850 0, /* tp_members */
3851 0, /* tp_getset */
3852 0, /* tp_base */
3853 0, /* tp_dict */
3854 0, /* tp_descr_get */
3855 0, /* tp_descr_set */
3856 0, /* tp_dictoffset */
3857 _elementtree_TreeBuilder___init__, /* tp_init */
3858 PyType_GenericAlloc, /* tp_alloc */
3859 treebuilder_new, /* tp_new */
3860 0, /* tp_free */
3861};
3862
3863static PyMethodDef xmlparser_methods[] = {
3864 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3865 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3866 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3867 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003868 {NULL, NULL}
3869};
3870
Neal Norwitz227b5332006-03-22 09:28:35 +00003871static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003872 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003873 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003874 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003875 (destructor)xmlparser_dealloc, /* tp_dealloc */
3876 0, /* tp_print */
3877 0, /* tp_getattr */
3878 0, /* tp_setattr */
3879 0, /* tp_reserved */
3880 0, /* tp_repr */
3881 0, /* tp_as_number */
3882 0, /* tp_as_sequence */
3883 0, /* tp_as_mapping */
3884 0, /* tp_hash */
3885 0, /* tp_call */
3886 0, /* tp_str */
3887 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3888 0, /* tp_setattro */
3889 0, /* tp_as_buffer */
3890 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3891 /* tp_flags */
3892 0, /* tp_doc */
3893 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3894 (inquiry)xmlparser_gc_clear, /* tp_clear */
3895 0, /* tp_richcompare */
3896 0, /* tp_weaklistoffset */
3897 0, /* tp_iter */
3898 0, /* tp_iternext */
3899 xmlparser_methods, /* tp_methods */
3900 0, /* tp_members */
3901 0, /* tp_getset */
3902 0, /* tp_base */
3903 0, /* tp_dict */
3904 0, /* tp_descr_get */
3905 0, /* tp_descr_set */
3906 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003907 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003908 PyType_GenericAlloc, /* tp_alloc */
3909 xmlparser_new, /* tp_new */
3910 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003911};
3912
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003913/* ==================================================================== */
3914/* python module interface */
3915
3916static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003917 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003918 {NULL, NULL}
3919};
3920
Martin v. Löwis1a214512008-06-11 05:26:20 +00003921
Eli Bendersky532d03e2013-08-10 08:00:39 -07003922static struct PyModuleDef elementtreemodule = {
3923 PyModuleDef_HEAD_INIT,
3924 "_elementtree",
3925 NULL,
3926 sizeof(elementtreestate),
3927 _functions,
3928 NULL,
3929 elementtree_traverse,
3930 elementtree_clear,
3931 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003932};
3933
Neal Norwitzf6657e62006-12-28 04:47:50 +00003934PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003935PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003936{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003937 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003938 elementtreestate *st;
3939
3940 m = PyState_FindModule(&elementtreemodule);
3941 if (m) {
3942 Py_INCREF(m);
3943 return m;
3944 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003945
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003946 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003947 if (PyType_Ready(&ElementIter_Type) < 0)
3948 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003949 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003950 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003951 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003952 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003953 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003954 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003955
Eli Bendersky532d03e2013-08-10 08:00:39 -07003956 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003957 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003958 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003959 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003960
Eli Bendersky828efde2012-04-05 05:40:58 +03003961 if (!(temp = PyImport_ImportModule("copy")))
3962 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003963 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003964 Py_XDECREF(temp);
3965
Victor Stinnerb136f112017-07-10 22:28:02 +02003966 if (st->deepcopy_obj == NULL) {
3967 return NULL;
3968 }
3969
3970 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07003971 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003972 return NULL;
3973
Eli Bendersky20d41742012-06-01 09:48:37 +03003974 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003975 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3976 if (expat_capi) {
3977 /* check that it's usable */
3978 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003979 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003980 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3981 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003982 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003983 PyErr_SetString(PyExc_ImportError,
3984 "pyexpat version is incompatible");
3985 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003986 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003987 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003988 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003989 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003990
Eli Bendersky532d03e2013-08-10 08:00:39 -07003991 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003992 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003993 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003994 Py_INCREF(st->parseerror_obj);
3995 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003996
Eli Bendersky092af1f2012-03-04 07:14:03 +02003997 Py_INCREF((PyObject *)&Element_Type);
3998 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3999
Eli Bendersky58d548d2012-05-29 15:45:16 +03004000 Py_INCREF((PyObject *)&TreeBuilder_Type);
4001 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4002
Eli Bendersky52467b12012-06-01 07:13:08 +03004003 Py_INCREF((PyObject *)&XMLParser_Type);
4004 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004005
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004006 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004007}