blob: 919591467c0da0d6983e59fe0d4d0f2ff8c462e6 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
339 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300340
341 if (attrib) {
342 /* If attrib was found in kwds, copy its value and remove it from
343 * kwds
344 */
345 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700346 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
348 Py_TYPE(attrib)->tp_name);
349 return NULL;
350 }
351 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700352 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300353 } else {
354 attrib = PyDict_New();
355 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700356
357 Py_DECREF(attrib_str);
358
359 /* attrib can be NULL if PyDict_New failed */
360 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200361 if (PyDict_Update(attrib, kwds) < 0)
362 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300363 return attrib;
364}
365
Serhiy Storchakacb985562015-05-04 15:32:48 +0300366/*[clinic input]
367module _elementtree
368class _elementtree.Element "ElementObject *" "&Element_Type"
369class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
370class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
371[clinic start generated code]*/
372/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
373
Eli Bendersky092af1f2012-03-04 07:14:03 +0200374static int
375element_init(PyObject *self, PyObject *args, PyObject *kwds)
376{
377 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 PyObject *attrib = NULL;
379 ElementObject *self_elem;
380
381 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
382 return -1;
383
Eli Bendersky737b1732012-05-29 06:02:56 +0300384 if (attrib) {
385 /* attrib passed as positional arg */
386 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (!attrib)
388 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300389 if (kwds) {
390 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200391 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 return -1;
393 }
394 }
395 } else if (kwds) {
396 /* have keywords args */
397 attrib = get_attrib_from_keywords(kwds);
398 if (!attrib)
399 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 }
401
402 self_elem = (ElementObject *)self;
403
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 return -1;
408 }
409 }
410
Eli Bendersky48d358b2012-05-30 17:57:50 +0300411 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413
414 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300416 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200417
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300419 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300422 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
424 return 0;
425}
426
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000427LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200428element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200430 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431 PyObject* *children;
432
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300433 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 /* make sure self->children can hold the given number of extra
435 elements. set an exception and return -1 if allocation failed */
436
Victor Stinner5f0af232013-07-11 23:01:36 +0200437 if (!self->extra) {
438 if (create_extra(self, NULL) < 0)
439 return -1;
440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443
444 if (size > self->extra->allocated) {
445 /* use Python 2.4's list growth strategy */
446 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100448 * which needs at least 4 bytes.
449 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 * be safe.
451 */
452 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200453 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
454 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000455 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100457 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000458 * false alarm always assume at least one child to be safe.
459 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460 children = PyObject_Realloc(self->extra->children,
461 size * sizeof(PyObject*));
462 if (!children)
463 goto nomemory;
464 } else {
465 children = PyObject_Malloc(size * sizeof(PyObject*));
466 if (!children)
467 goto nomemory;
468 /* copy existing children from static area to malloc buffer */
469 memcpy(children, self->extra->children,
470 self->extra->length * sizeof(PyObject*));
471 }
472 self->extra->children = children;
473 self->extra->allocated = size;
474 }
475
476 return 0;
477
478 nomemory:
479 PyErr_NoMemory();
480 return -1;
481}
482
483LOCAL(int)
484element_add_subelement(ElementObject* self, PyObject* element)
485{
486 /* add a child element to a parent */
487
488 if (element_resize(self, 1) < 0)
489 return -1;
490
491 Py_INCREF(element);
492 self->extra->children[self->extra->length] = element;
493
494 self->extra->length++;
495
496 return 0;
497}
498
499LOCAL(PyObject*)
500element_get_attrib(ElementObject* self)
501{
502 /* return borrowed reference to attrib dictionary */
503 /* note: this function assumes that the extra section exists */
504
505 PyObject* res = self->extra->attrib;
506
507 if (res == Py_None) {
508 /* create missing dictionary */
509 res = PyDict_New();
510 if (!res)
511 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200512 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000513 self->extra->attrib = res;
514 }
515
516 return res;
517}
518
519LOCAL(PyObject*)
520element_get_text(ElementObject* self)
521{
522 /* return borrowed reference to text attribute */
523
Serhiy Storchaka576def02017-03-30 09:47:31 +0300524 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525
526 if (JOIN_GET(res)) {
527 res = JOIN_OBJ(res);
528 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300529 PyObject *tmp = list_join(res);
530 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000531 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300532 self->text = tmp;
533 Py_DECREF(res);
534 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535 }
536 }
537
538 return res;
539}
540
541LOCAL(PyObject*)
542element_get_tail(ElementObject* self)
543{
544 /* return borrowed reference to text attribute */
545
Serhiy Storchaka576def02017-03-30 09:47:31 +0300546 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000547
548 if (JOIN_GET(res)) {
549 res = JOIN_OBJ(res);
550 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300551 PyObject *tmp = list_join(res);
552 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300554 self->tail = tmp;
555 Py_DECREF(res);
556 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 }
558 }
559
560 return res;
561}
562
563static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300564subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565{
566 PyObject* elem;
567
568 ElementObject* parent;
569 PyObject* tag;
570 PyObject* attrib = NULL;
571 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
572 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800573 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800575 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576
Eli Bendersky737b1732012-05-29 06:02:56 +0300577 if (attrib) {
578 /* attrib passed as positional arg */
579 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000580 if (!attrib)
581 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300582 if (kwds) {
583 if (PyDict_Update(attrib, kwds) < 0) {
584 return NULL;
585 }
586 }
587 } else if (kwds) {
588 /* have keyword args */
589 attrib = get_attrib_from_keywords(kwds);
590 if (!attrib)
591 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300593 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 Py_INCREF(Py_None);
595 attrib = Py_None;
596 }
597
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200600 if (elem == NULL)
601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000603 if (element_add_subelement(parent, elem) < 0) {
604 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000605 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000606 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607
608 return elem;
609}
610
Eli Bendersky0192ba32012-03-30 16:38:33 +0300611static int
612element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
613{
614 Py_VISIT(self->tag);
615 Py_VISIT(JOIN_OBJ(self->text));
616 Py_VISIT(JOIN_OBJ(self->tail));
617
618 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200619 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 Py_VISIT(self->extra->attrib);
621
622 for (i = 0; i < self->extra->length; ++i)
623 Py_VISIT(self->extra->children[i]);
624 }
625 return 0;
626}
627
628static int
629element_gc_clear(ElementObject *self)
630{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700632 _clear_joined_ptr(&self->text);
633 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634
635 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300636 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300637 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300638 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 return 0;
640}
641
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642static void
643element_dealloc(ElementObject* self)
644{
INADA Naokia6296d32017-08-24 14:55:17 +0900645 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200647 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300648
649 if (self->weakreflist != NULL)
650 PyObject_ClearWeakRefs((PyObject *) self);
651
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652 /* element_gc_clear clears all references and deallocates extra
653 */
654 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000655
656 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200657 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200658 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000659}
660
661/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666 subelement: object(subclass_of='&Element_Type')
667 /
668
669[clinic start generated code]*/
670
671static PyObject *
672_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
673/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
674{
675 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676 return NULL;
677
678 Py_RETURN_NONE;
679}
680
Serhiy Storchakacb985562015-05-04 15:32:48 +0300681/*[clinic input]
682_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000683
Serhiy Storchakacb985562015-05-04 15:32:48 +0300684[clinic start generated code]*/
685
686static PyObject *
687_elementtree_Element_clear_impl(ElementObject *self)
688/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
689{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300690 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691
692 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300693 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000694
695 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300696 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_RETURN_NONE;
699}
700
Serhiy Storchakacb985562015-05-04 15:32:48 +0300701/*[clinic input]
702_elementtree.Element.__copy__
703
704[clinic start generated code]*/
705
706static PyObject *
707_elementtree_Element___copy___impl(ElementObject *self)
708/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200710 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 ElementObject* element;
712
Eli Bendersky092af1f2012-03-04 07:14:03 +0200713 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800714 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715 if (!element)
716 return NULL;
717
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 Py_INCREF(JOIN_OBJ(self->text));
719 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 Py_INCREF(JOIN_OBJ(self->tail));
722 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300724 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000726 if (element_resize(element, self->extra->length) < 0) {
727 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000729 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730
731 for (i = 0; i < self->extra->length; i++) {
732 Py_INCREF(self->extra->children[i]);
733 element->extra->children[i] = self->extra->children[i];
734 }
735
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300736 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000738 }
739
740 return (PyObject*) element;
741}
742
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200743/* Helper for a deep copy. */
744LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
745
Serhiy Storchakacb985562015-05-04 15:32:48 +0300746/*[clinic input]
747_elementtree.Element.__deepcopy__
748
Oren Milmand0568182017-09-12 17:39:15 +0300749 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300750 /
751
752[clinic start generated code]*/
753
754static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300755_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
756/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000757{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200758 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759 ElementObject* element;
760 PyObject* tag;
761 PyObject* attrib;
762 PyObject* text;
763 PyObject* tail;
764 PyObject* id;
765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766 tag = deepcopy(self->tag, memo);
767 if (!tag)
768 return NULL;
769
770 if (self->extra) {
771 attrib = deepcopy(self->extra->attrib, memo);
772 if (!attrib) {
773 Py_DECREF(tag);
774 return NULL;
775 }
776 } else {
777 Py_INCREF(Py_None);
778 attrib = Py_None;
779 }
780
Eli Bendersky092af1f2012-03-04 07:14:03 +0200781 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782
783 Py_DECREF(tag);
784 Py_DECREF(attrib);
785
786 if (!element)
787 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100788
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789 text = deepcopy(JOIN_OBJ(self->text), memo);
790 if (!text)
791 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300792 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000793
794 tail = deepcopy(JOIN_OBJ(self->tail), memo);
795 if (!tail)
796 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300797 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300799 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 if (element_resize(element, self->extra->length) < 0)
802 goto error;
803
804 for (i = 0; i < self->extra->length; i++) {
805 PyObject* child = deepcopy(self->extra->children[i], memo);
806 if (!child) {
807 element->extra->length = i;
808 goto error;
809 }
810 element->extra->children[i] = child;
811 }
812
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300813 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000815 }
816
817 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700818 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000819 if (!id)
820 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821
822 i = PyDict_SetItem(memo, id, (PyObject*) element);
823
824 Py_DECREF(id);
825
826 if (i < 0)
827 goto error;
828
829 return (PyObject*) element;
830
831 error:
832 Py_DECREF(element);
833 return NULL;
834}
835
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200836LOCAL(PyObject *)
837deepcopy(PyObject *object, PyObject *memo)
838{
839 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200840 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200841 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200842
843 /* Fast paths */
844 if (object == Py_None || PyUnicode_CheckExact(object)) {
845 Py_INCREF(object);
846 return object;
847 }
848
849 if (Py_REFCNT(object) == 1) {
850 if (PyDict_CheckExact(object)) {
851 PyObject *key, *value;
852 Py_ssize_t pos = 0;
853 int simple = 1;
854 while (PyDict_Next(object, &pos, &key, &value)) {
855 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
856 simple = 0;
857 break;
858 }
859 }
860 if (simple)
861 return PyDict_Copy(object);
862 /* Fall through to general case */
863 }
864 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300865 return _elementtree_Element___deepcopy___impl(
866 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200867 }
868 }
869
870 /* General case */
871 st = ET_STATE_GLOBAL;
872 if (!st->deepcopy_obj) {
873 PyErr_SetString(PyExc_RuntimeError,
874 "deepcopy helper not found");
875 return NULL;
876 }
877
Victor Stinner7fbac452016-08-20 01:34:44 +0200878 stack[0] = object;
879 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200880 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200881}
882
883
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884/*[clinic input]
885_elementtree.Element.__sizeof__ -> Py_ssize_t
886
887[clinic start generated code]*/
888
889static Py_ssize_t
890_elementtree_Element___sizeof___impl(ElementObject *self)
891/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200892{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200893 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200894 if (self->extra) {
895 result += sizeof(ElementObjectExtra);
896 if (self->extra->children != self->extra->_children)
897 result += sizeof(PyObject*) * self->extra->allocated;
898 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300899 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200900}
901
Eli Bendersky698bdb22013-01-10 06:01:06 -0800902/* dict keys for getstate/setstate. */
903#define PICKLED_TAG "tag"
904#define PICKLED_CHILDREN "_children"
905#define PICKLED_ATTRIB "attrib"
906#define PICKLED_TAIL "tail"
907#define PICKLED_TEXT "text"
908
909/* __getstate__ returns a fabricated instance dict as in the pure-Python
910 * Element implementation, for interoperability/interchangeability. This
911 * makes the pure-Python implementation details an API, but (a) there aren't
912 * any unnecessary structures there; and (b) it buys compatibility with 3.2
913 * pickles. See issue #16076.
914 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300915/*[clinic input]
916_elementtree.Element.__getstate__
917
918[clinic start generated code]*/
919
Eli Bendersky698bdb22013-01-10 06:01:06 -0800920static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300921_elementtree_Element___getstate___impl(ElementObject *self)
922/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800923{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200924 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925 PyObject *instancedict = NULL, *children;
926
927 /* Build a list of children. */
928 children = PyList_New(self->extra ? self->extra->length : 0);
929 if (!children)
930 return NULL;
931 for (i = 0; i < PyList_GET_SIZE(children); i++) {
932 PyObject *child = self->extra->children[i];
933 Py_INCREF(child);
934 PyList_SET_ITEM(children, i, child);
935 }
936
937 /* Construct the state object. */
938 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
939 if (noattrib)
940 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
941 PICKLED_TAG, self->tag,
942 PICKLED_CHILDREN, children,
943 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700944 PICKLED_TEXT, JOIN_OBJ(self->text),
945 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800946 else
947 instancedict = Py_BuildValue("{sOsOsOsOsO}",
948 PICKLED_TAG, self->tag,
949 PICKLED_CHILDREN, children,
950 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700951 PICKLED_TEXT, JOIN_OBJ(self->text),
952 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800953 if (instancedict) {
954 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800955 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800956 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800957 else {
958 for (i = 0; i < PyList_GET_SIZE(children); i++)
959 Py_DECREF(PyList_GET_ITEM(children, i));
960 Py_DECREF(children);
961
962 return NULL;
963 }
964}
965
966static PyObject *
967element_setstate_from_attributes(ElementObject *self,
968 PyObject *tag,
969 PyObject *attrib,
970 PyObject *text,
971 PyObject *tail,
972 PyObject *children)
973{
974 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300975 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 if (!tag) {
978 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
979 return NULL;
980 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800981
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200982 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300983 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984
Oren Milman39ecb9c2017-10-10 23:26:24 +0300985 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
986 Py_INCREF(JOIN_OBJ(text));
987 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800988
Oren Milman39ecb9c2017-10-10 23:26:24 +0300989 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
990 Py_INCREF(JOIN_OBJ(tail));
991 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800992
993 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300994 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300996 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997
998 /* Compute 'nchildren'. */
999 if (children) {
1000 if (!PyList_Check(children)) {
1001 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1002 return NULL;
1003 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 nchildren = PyList_GET_SIZE(children);
1005
1006 /* (Re-)allocate 'extra'.
1007 Avoid DECREFs calling into this code again (cycles, etc.)
1008 */
1009 oldextra = self->extra;
1010 self->extra = NULL;
1011 if (element_resize(self, nchildren)) {
1012 assert(!self->extra || !self->extra->length);
1013 clear_extra(self);
1014 self->extra = oldextra;
1015 return NULL;
1016 }
1017 assert(self->extra);
1018 assert(self->extra->allocated >= nchildren);
1019 if (oldextra) {
1020 assert(self->extra->attrib == Py_None);
1021 self->extra->attrib = oldextra->attrib;
1022 oldextra->attrib = Py_None;
1023 }
1024
1025 /* Copy children */
1026 for (i = 0; i < nchildren; i++) {
1027 self->extra->children[i] = PyList_GET_ITEM(children, i);
1028 Py_INCREF(self->extra->children[i]);
1029 }
1030
1031 assert(!self->extra->length);
1032 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 }
1034 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001035 if (element_resize(self, 0)) {
1036 return NULL;
1037 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001038 }
1039
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040 /* Stash attrib. */
1041 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001043 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001045 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001046
1047 Py_RETURN_NONE;
1048}
1049
1050/* __setstate__ for Element instance from the Python implementation.
1051 * 'state' should be the instance dict.
1052 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
1055element_setstate_from_Python(ElementObject *self, PyObject *state)
1056{
1057 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1058 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1059 PyObject *args;
1060 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001061 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001062
Eli Bendersky698bdb22013-01-10 06:01:06 -08001063 tag = attrib = text = tail = children = NULL;
1064 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001065 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001066 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001067
1068 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1069 &attrib, &text, &tail, &children))
1070 retval = element_setstate_from_attributes(self, tag, attrib, text,
1071 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001072 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001073 retval = NULL;
1074
1075 Py_DECREF(args);
1076 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001077}
1078
Serhiy Storchakacb985562015-05-04 15:32:48 +03001079/*[clinic input]
1080_elementtree.Element.__setstate__
1081
1082 state: object
1083 /
1084
1085[clinic start generated code]*/
1086
Eli Bendersky698bdb22013-01-10 06:01:06 -08001087static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001088_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1089/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090{
1091 if (!PyDict_CheckExact(state)) {
1092 PyErr_Format(PyExc_TypeError,
1093 "Don't know how to unpickle \"%.200R\" as an Element",
1094 state);
1095 return NULL;
1096 }
1097 else
1098 return element_setstate_from_Python(self, state);
1099}
1100
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101LOCAL(int)
1102checkpath(PyObject* tag)
1103{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001104 Py_ssize_t i;
1105 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106
1107 /* check if a tag contains an xpath character */
1108
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109#define PATHCHAR(ch) \
1110 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001112 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1114 void *data = PyUnicode_DATA(tag);
1115 unsigned int kind = PyUnicode_KIND(tag);
1116 for (i = 0; i < len; i++) {
1117 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1118 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123 return 1;
1124 }
1125 return 0;
1126 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001127 if (PyBytes_Check(tag)) {
1128 char *p = PyBytes_AS_STRING(tag);
1129 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130 if (p[i] == '{')
1131 check = 0;
1132 else if (p[i] == '}')
1133 check = 1;
1134 else if (check && PATHCHAR(p[i]))
1135 return 1;
1136 }
1137 return 0;
1138 }
1139
1140 return 1; /* unknown type; might be path expression */
1141}
1142
Serhiy Storchakacb985562015-05-04 15:32:48 +03001143/*[clinic input]
1144_elementtree.Element.extend
1145
1146 elements: object
1147 /
1148
1149[clinic start generated code]*/
1150
1151static PyObject *
1152_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1153/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001154{
1155 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001156 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001157
Serhiy Storchakacb985562015-05-04 15:32:48 +03001158 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159 if (!seq) {
1160 PyErr_Format(
1161 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001162 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001163 );
1164 return NULL;
1165 }
1166
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001168 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_INCREF(element);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001170 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001171 PyErr_Format(
1172 PyExc_TypeError,
1173 "expected an Element, not \"%.200s\"",
1174 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001175 Py_DECREF(seq);
1176 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001177 return NULL;
1178 }
1179
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001180 if (element_add_subelement(self, element) < 0) {
1181 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001182 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001183 return NULL;
1184 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001185 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001186 }
1187
1188 Py_DECREF(seq);
1189
1190 Py_RETURN_NONE;
1191}
1192
Serhiy Storchakacb985562015-05-04 15:32:48 +03001193/*[clinic input]
1194_elementtree.Element.find
1195
1196 path: object
1197 namespaces: object = None
1198
1199[clinic start generated code]*/
1200
1201static PyObject *
1202_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1203 PyObject *namespaces)
1204/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001206 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001207 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001208
Serhiy Storchakacb985562015-05-04 15:32:48 +03001209 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001210 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001211 return _PyObject_CallMethodIdObjArgs(
1212 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001214 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215
1216 if (!self->extra)
1217 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 for (i = 0; i < self->extra->length; i++) {
1220 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 int rc;
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001222 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 continue;
1224 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001225 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001226 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001228 Py_DECREF(item);
1229 if (rc < 0)
1230 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231 }
1232
1233 Py_RETURN_NONE;
1234}
1235
Serhiy Storchakacb985562015-05-04 15:32:48 +03001236/*[clinic input]
1237_elementtree.Element.findtext
1238
1239 path: object
1240 default: object = None
1241 namespaces: object = None
1242
1243[clinic start generated code]*/
1244
1245static PyObject *
1246_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1247 PyObject *default_value,
1248 PyObject *namespaces)
1249/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001251 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001252 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001254
Serhiy Storchakacb985562015-05-04 15:32:48 +03001255 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001256 return _PyObject_CallMethodIdObjArgs(
1257 st->elementpath_obj, &PyId_findtext,
1258 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001259 );
1260
1261 if (!self->extra) {
1262 Py_INCREF(default_value);
1263 return default_value;
1264 }
1265
1266 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001267 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001268 int rc;
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001269 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001270 continue;
1271 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001272 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001273 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001274 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001275 if (text == Py_None) {
1276 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001277 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001278 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001279 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001280 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281 return text;
1282 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001283 Py_DECREF(item);
1284 if (rc < 0)
1285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286 }
1287
1288 Py_INCREF(default_value);
1289 return default_value;
1290}
1291
Serhiy Storchakacb985562015-05-04 15:32:48 +03001292/*[clinic input]
1293_elementtree.Element.findall
1294
1295 path: object
1296 namespaces: object = None
1297
1298[clinic start generated code]*/
1299
1300static PyObject *
1301_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1302 PyObject *namespaces)
1303/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001305 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001306 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001307 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001308
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001309 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001310 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001311 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001312 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001314 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315
1316 out = PyList_New(0);
1317 if (!out)
1318 return NULL;
1319
1320 if (!self->extra)
1321 return out;
1322
1323 for (i = 0; i < self->extra->length; i++) {
1324 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001325 int rc;
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001326 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001327 continue;
1328 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001329 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001330 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1331 Py_DECREF(item);
1332 Py_DECREF(out);
1333 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001335 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336 }
1337
1338 return out;
1339}
1340
Serhiy Storchakacb985562015-05-04 15:32:48 +03001341/*[clinic input]
1342_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001343
Serhiy Storchakacb985562015-05-04 15:32:48 +03001344 path: object
1345 namespaces: object = None
1346
1347[clinic start generated code]*/
1348
1349static PyObject *
1350_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1351 PyObject *namespaces)
1352/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1353{
1354 PyObject* tag = path;
1355 _Py_IDENTIFIER(iterfind);
1356 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001357
Victor Stinnerf5616342016-12-09 15:26:00 +01001358 return _PyObject_CallMethodIdObjArgs(
1359 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001360}
1361
Serhiy Storchakacb985562015-05-04 15:32:48 +03001362/*[clinic input]
1363_elementtree.Element.get
1364
1365 key: object
1366 default: object = None
1367
1368[clinic start generated code]*/
1369
1370static PyObject *
1371_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1372 PyObject *default_value)
1373/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001374{
1375 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376
1377 if (!self->extra || self->extra->attrib == Py_None)
1378 value = default_value;
1379 else {
1380 value = PyDict_GetItem(self->extra->attrib, key);
1381 if (!value)
1382 value = default_value;
1383 }
1384
1385 Py_INCREF(value);
1386 return value;
1387}
1388
Serhiy Storchakacb985562015-05-04 15:32:48 +03001389/*[clinic input]
1390_elementtree.Element.getchildren
1391
1392[clinic start generated code]*/
1393
1394static PyObject *
1395_elementtree_Element_getchildren_impl(ElementObject *self)
1396/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001398 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399 PyObject* list;
1400
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001401 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1402 "This method will be removed in future versions. "
1403 "Use 'list(elem)' or iteration over elem instead.",
1404 1) < 0) {
1405 return NULL;
1406 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001407
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408 if (!self->extra)
1409 return PyList_New(0);
1410
1411 list = PyList_New(self->extra->length);
1412 if (!list)
1413 return NULL;
1414
1415 for (i = 0; i < self->extra->length; i++) {
1416 PyObject* item = self->extra->children[i];
1417 Py_INCREF(item);
1418 PyList_SET_ITEM(list, i, item);
1419 }
1420
1421 return list;
1422}
1423
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001424
Eli Bendersky64d11e62012-06-15 07:42:50 +03001425static PyObject *
1426create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1427
1428
Serhiy Storchakacb985562015-05-04 15:32:48 +03001429/*[clinic input]
1430_elementtree.Element.iter
1431
1432 tag: object = None
1433
1434[clinic start generated code]*/
1435
Eli Bendersky64d11e62012-06-15 07:42:50 +03001436static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001437_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1438/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001439{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001440 if (PyUnicode_Check(tag)) {
1441 if (PyUnicode_READY(tag) < 0)
1442 return NULL;
1443 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1444 tag = Py_None;
1445 }
1446 else if (PyBytes_Check(tag)) {
1447 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1448 tag = Py_None;
1449 }
1450
Eli Bendersky64d11e62012-06-15 07:42:50 +03001451 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001452}
1453
1454
Serhiy Storchakacb985562015-05-04 15:32:48 +03001455/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001456_elementtree.Element.getiterator
1457
1458 tag: object = None
1459
1460[clinic start generated code]*/
1461
1462static PyObject *
1463_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1464/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1465{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001466 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001467 "This method will be removed in future versions. "
1468 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1469 1) < 0) {
1470 return NULL;
1471 }
1472 return _elementtree_Element_iter_impl(self, tag);
1473}
1474
1475
1476/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479[clinic start generated code]*/
1480
1481static PyObject *
1482_elementtree_Element_itertext_impl(ElementObject *self)
1483/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1484{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001485 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486}
1487
Eli Bendersky64d11e62012-06-15 07:42:50 +03001488
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001490element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001492 ElementObject* self = (ElementObject*) self_;
1493
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494 if (!self->extra || index < 0 || index >= self->extra->length) {
1495 PyErr_SetString(
1496 PyExc_IndexError,
1497 "child index out of range"
1498 );
1499 return NULL;
1500 }
1501
1502 Py_INCREF(self->extra->children[index]);
1503 return self->extra->children[index];
1504}
1505
Serhiy Storchakacb985562015-05-04 15:32:48 +03001506/*[clinic input]
1507_elementtree.Element.insert
1508
1509 index: Py_ssize_t
1510 subelement: object(subclass_of='&Element_Type')
1511 /
1512
1513[clinic start generated code]*/
1514
1515static PyObject *
1516_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1517 PyObject *subelement)
1518/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001520 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521
Victor Stinner5f0af232013-07-11 23:01:36 +02001522 if (!self->extra) {
1523 if (create_extra(self, NULL) < 0)
1524 return NULL;
1525 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001527 if (index < 0) {
1528 index += self->extra->length;
1529 if (index < 0)
1530 index = 0;
1531 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532 if (index > self->extra->length)
1533 index = self->extra->length;
1534
1535 if (element_resize(self, 1) < 0)
1536 return NULL;
1537
1538 for (i = self->extra->length; i > index; i--)
1539 self->extra->children[i] = self->extra->children[i-1];
1540
Serhiy Storchakacb985562015-05-04 15:32:48 +03001541 Py_INCREF(subelement);
1542 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543
1544 self->extra->length++;
1545
1546 Py_RETURN_NONE;
1547}
1548
Serhiy Storchakacb985562015-05-04 15:32:48 +03001549/*[clinic input]
1550_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551
Serhiy Storchakacb985562015-05-04 15:32:48 +03001552[clinic start generated code]*/
1553
1554static PyObject *
1555_elementtree_Element_items_impl(ElementObject *self)
1556/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1557{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001558 if (!self->extra || self->extra->attrib == Py_None)
1559 return PyList_New(0);
1560
1561 return PyDict_Items(self->extra->attrib);
1562}
1563
Serhiy Storchakacb985562015-05-04 15:32:48 +03001564/*[clinic input]
1565_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566
Serhiy Storchakacb985562015-05-04 15:32:48 +03001567[clinic start generated code]*/
1568
1569static PyObject *
1570_elementtree_Element_keys_impl(ElementObject *self)
1571/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1572{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 if (!self->extra || self->extra->attrib == Py_None)
1574 return PyList_New(0);
1575
1576 return PyDict_Keys(self->extra->attrib);
1577}
1578
Martin v. Löwis18e16552006-02-15 17:27:45 +00001579static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580element_length(ElementObject* self)
1581{
1582 if (!self->extra)
1583 return 0;
1584
1585 return self->extra->length;
1586}
1587
Serhiy Storchakacb985562015-05-04 15:32:48 +03001588/*[clinic input]
1589_elementtree.Element.makeelement
1590
1591 tag: object
1592 attrib: object
1593 /
1594
1595[clinic start generated code]*/
1596
1597static PyObject *
1598_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1599 PyObject *attrib)
1600/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601{
1602 PyObject* elem;
1603
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 attrib = PyDict_Copy(attrib);
1605 if (!attrib)
1606 return NULL;
1607
Eli Bendersky092af1f2012-03-04 07:14:03 +02001608 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609
1610 Py_DECREF(attrib);
1611
1612 return elem;
1613}
1614
Serhiy Storchakacb985562015-05-04 15:32:48 +03001615/*[clinic input]
1616_elementtree.Element.remove
1617
1618 subelement: object(subclass_of='&Element_Type')
1619 /
1620
1621[clinic start generated code]*/
1622
1623static PyObject *
1624_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1625/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001627 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001628 int rc;
1629 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 if (!self->extra) {
1632 /* element has no children, so raise exception */
1633 PyErr_SetString(
1634 PyExc_ValueError,
1635 "list.remove(x): x not in list"
1636 );
1637 return NULL;
1638 }
1639
1640 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001641 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001643 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001644 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001646 if (rc < 0)
1647 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001648 }
1649
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001650 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001651 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652 PyErr_SetString(
1653 PyExc_ValueError,
1654 "list.remove(x): x not in list"
1655 );
1656 return NULL;
1657 }
1658
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001659 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001660
1661 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662 for (; i < self->extra->length; i++)
1663 self->extra->children[i] = self->extra->children[i+1];
1664
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001665 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666 Py_RETURN_NONE;
1667}
1668
1669static PyObject*
1670element_repr(ElementObject* self)
1671{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001672 int status;
1673
1674 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001675 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001676
1677 status = Py_ReprEnter((PyObject *)self);
1678 if (status == 0) {
1679 PyObject *res;
1680 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1681 Py_ReprLeave((PyObject *)self);
1682 return res;
1683 }
1684 if (status > 0)
1685 PyErr_Format(PyExc_RuntimeError,
1686 "reentrant call inside %s.__repr__",
1687 Py_TYPE(self)->tp_name);
1688 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001689}
1690
Serhiy Storchakacb985562015-05-04 15:32:48 +03001691/*[clinic input]
1692_elementtree.Element.set
1693
1694 key: object
1695 value: object
1696 /
1697
1698[clinic start generated code]*/
1699
1700static PyObject *
1701_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1702 PyObject *value)
1703/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001704{
1705 PyObject* attrib;
1706
Victor Stinner5f0af232013-07-11 23:01:36 +02001707 if (!self->extra) {
1708 if (create_extra(self, NULL) < 0)
1709 return NULL;
1710 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711
1712 attrib = element_get_attrib(self);
1713 if (!attrib)
1714 return NULL;
1715
1716 if (PyDict_SetItem(attrib, key, value) < 0)
1717 return NULL;
1718
1719 Py_RETURN_NONE;
1720}
1721
1722static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001723element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001725 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001726 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727 PyObject* old;
1728
1729 if (!self->extra || index < 0 || index >= self->extra->length) {
1730 PyErr_SetString(
1731 PyExc_IndexError,
1732 "child assignment index out of range");
1733 return -1;
1734 }
1735
1736 old = self->extra->children[index];
1737
1738 if (item) {
1739 Py_INCREF(item);
1740 self->extra->children[index] = item;
1741 } else {
1742 self->extra->length--;
1743 for (i = index; i < self->extra->length; i++)
1744 self->extra->children[i] = self->extra->children[i+1];
1745 }
1746
1747 Py_DECREF(old);
1748
1749 return 0;
1750}
1751
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752static PyObject*
1753element_subscr(PyObject* self_, PyObject* item)
1754{
1755 ElementObject* self = (ElementObject*) self_;
1756
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001757 if (PyIndex_Check(item)) {
1758 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001759
1760 if (i == -1 && PyErr_Occurred()) {
1761 return NULL;
1762 }
1763 if (i < 0 && self->extra)
1764 i += self->extra->length;
1765 return element_getitem(self_, i);
1766 }
1767 else if (PySlice_Check(item)) {
1768 Py_ssize_t start, stop, step, slicelen, cur, i;
1769 PyObject* list;
1770
1771 if (!self->extra)
1772 return PyList_New(0);
1773
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001774 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001775 return NULL;
1776 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001777 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1778 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779
1780 if (slicelen <= 0)
1781 return PyList_New(0);
1782 else {
1783 list = PyList_New(slicelen);
1784 if (!list)
1785 return NULL;
1786
1787 for (cur = start, i = 0; i < slicelen;
1788 cur += step, i++) {
1789 PyObject* item = self->extra->children[cur];
1790 Py_INCREF(item);
1791 PyList_SET_ITEM(list, i, item);
1792 }
1793
1794 return list;
1795 }
1796 }
1797 else {
1798 PyErr_SetString(PyExc_TypeError,
1799 "element indices must be integers");
1800 return NULL;
1801 }
1802}
1803
1804static int
1805element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1806{
1807 ElementObject* self = (ElementObject*) self_;
1808
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001809 if (PyIndex_Check(item)) {
1810 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001811
1812 if (i == -1 && PyErr_Occurred()) {
1813 return -1;
1814 }
1815 if (i < 0 && self->extra)
1816 i += self->extra->length;
1817 return element_setitem(self_, i, value);
1818 }
1819 else if (PySlice_Check(item)) {
1820 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1821
1822 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001823 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001824
Victor Stinner5f0af232013-07-11 23:01:36 +02001825 if (!self->extra) {
1826 if (create_extra(self, NULL) < 0)
1827 return -1;
1828 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001829
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001830 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831 return -1;
1832 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001833 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1834 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001835
Eli Bendersky865756a2012-03-09 13:38:15 +02001836 if (value == NULL) {
1837 /* Delete slice */
1838 size_t cur;
1839 Py_ssize_t i;
1840
1841 if (slicelen <= 0)
1842 return 0;
1843
1844 /* Since we're deleting, the direction of the range doesn't matter,
1845 * so for simplicity make it always ascending.
1846 */
1847 if (step < 0) {
1848 stop = start + 1;
1849 start = stop + step * (slicelen - 1) - 1;
1850 step = -step;
1851 }
1852
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001853 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001854
1855 /* recycle is a list that will contain all the children
1856 * scheduled for removal.
1857 */
1858 if (!(recycle = PyList_New(slicelen))) {
1859 PyErr_NoMemory();
1860 return -1;
1861 }
1862
1863 /* This loop walks over all the children that have to be deleted,
1864 * with cur pointing at them. num_moved is the amount of children
1865 * until the next deleted child that have to be "shifted down" to
1866 * occupy the deleted's places.
1867 * Note that in the ith iteration, shifting is done i+i places down
1868 * because i children were already removed.
1869 */
1870 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1871 /* Compute how many children have to be moved, clipping at the
1872 * list end.
1873 */
1874 Py_ssize_t num_moved = step - 1;
1875 if (cur + step >= (size_t)self->extra->length) {
1876 num_moved = self->extra->length - cur - 1;
1877 }
1878
1879 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1880
1881 memmove(
1882 self->extra->children + cur - i,
1883 self->extra->children + cur + 1,
1884 num_moved * sizeof(PyObject *));
1885 }
1886
1887 /* Leftover "tail" after the last removed child */
1888 cur = start + (size_t)slicelen * step;
1889 if (cur < (size_t)self->extra->length) {
1890 memmove(
1891 self->extra->children + cur - slicelen,
1892 self->extra->children + cur,
1893 (self->extra->length - cur) * sizeof(PyObject *));
1894 }
1895
1896 self->extra->length -= slicelen;
1897
1898 /* Discard the recycle list with all the deleted sub-elements */
1899 Py_XDECREF(recycle);
1900 return 0;
1901 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001902
1903 /* A new slice is actually being assigned */
1904 seq = PySequence_Fast(value, "");
1905 if (!seq) {
1906 PyErr_Format(
1907 PyExc_TypeError,
1908 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1909 );
1910 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001911 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001912 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001913
1914 if (step != 1 && newlen != slicelen)
1915 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001916 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001917 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918 "attempt to assign sequence of size %zd "
1919 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001920 newlen, slicelen
1921 );
1922 return -1;
1923 }
1924
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001925 /* Resize before creating the recycle bin, to prevent refleaks. */
1926 if (newlen > slicelen) {
1927 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001928 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001929 return -1;
1930 }
1931 }
1932
1933 if (slicelen > 0) {
1934 /* to avoid recursive calls to this method (via decref), move
1935 old items to the recycle bin here, and get rid of them when
1936 we're done modifying the element */
1937 recycle = PyList_New(slicelen);
1938 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001939 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001940 return -1;
1941 }
1942 for (cur = start, i = 0; i < slicelen;
1943 cur += step, i++)
1944 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1945 }
1946
1947 if (newlen < slicelen) {
1948 /* delete slice */
1949 for (i = stop; i < self->extra->length; i++)
1950 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1951 } else if (newlen > slicelen) {
1952 /* insert slice */
1953 for (i = self->extra->length-1; i >= stop; i--)
1954 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1955 }
1956
1957 /* replace the slice */
1958 for (cur = start, i = 0; i < newlen;
1959 cur += step, i++) {
1960 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1961 Py_INCREF(element);
1962 self->extra->children[cur] = element;
1963 }
1964
1965 self->extra->length += newlen - slicelen;
1966
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001967 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001968
1969 /* discard the recycle bin, and everything in it */
1970 Py_XDECREF(recycle);
1971
1972 return 0;
1973 }
1974 else {
1975 PyErr_SetString(PyExc_TypeError,
1976 "element indices must be integers");
1977 return -1;
1978 }
1979}
1980
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001981static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001982element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001983{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001984 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001985 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001986 return res;
1987}
1988
Serhiy Storchakadde08152015-11-25 15:28:13 +02001989static PyObject*
1990element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001991{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001992 PyObject *res = element_get_text(self);
1993 Py_XINCREF(res);
1994 return res;
1995}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001996
Serhiy Storchakadde08152015-11-25 15:28:13 +02001997static PyObject*
1998element_tail_getter(ElementObject *self, void *closure)
1999{
2000 PyObject *res = element_get_tail(self);
2001 Py_XINCREF(res);
2002 return res;
2003}
2004
2005static PyObject*
2006element_attrib_getter(ElementObject *self, void *closure)
2007{
2008 PyObject *res;
2009 if (!self->extra) {
2010 if (create_extra(self, NULL) < 0)
2011 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002012 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002013 res = element_get_attrib(self);
2014 Py_XINCREF(res);
2015 return res;
2016}
Victor Stinner4d463432013-07-11 23:05:03 +02002017
Serhiy Storchakadde08152015-11-25 15:28:13 +02002018/* macro for setter validation */
2019#define _VALIDATE_ATTR_VALUE(V) \
2020 if ((V) == NULL) { \
2021 PyErr_SetString( \
2022 PyExc_AttributeError, \
2023 "can't delete element attribute"); \
2024 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002025 }
2026
Serhiy Storchakadde08152015-11-25 15:28:13 +02002027static int
2028element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2029{
2030 _VALIDATE_ATTR_VALUE(value);
2031 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002032 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002033 return 0;
2034}
2035
2036static int
2037element_text_setter(ElementObject *self, PyObject *value, void *closure)
2038{
2039 _VALIDATE_ATTR_VALUE(value);
2040 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002041 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002042 return 0;
2043}
2044
2045static int
2046element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2047{
2048 _VALIDATE_ATTR_VALUE(value);
2049 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002050 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002051 return 0;
2052}
2053
2054static int
2055element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2056{
2057 _VALIDATE_ATTR_VALUE(value);
2058 if (!self->extra) {
2059 if (create_extra(self, NULL) < 0)
2060 return -1;
2061 }
2062 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002063 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002064 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002065}
2066
2067static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002068 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002069 0, /* sq_concat */
2070 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002071 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002072 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002073 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002074 0,
2075};
2076
Eli Bendersky64d11e62012-06-15 07:42:50 +03002077/******************************* Element iterator ****************************/
2078
2079/* ElementIterObject represents the iteration state over an XML element in
2080 * pre-order traversal. To keep track of which sub-element should be returned
2081 * next, a stack of parents is maintained. This is a standard stack-based
2082 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002083 * The stack is managed using a continuous array.
2084 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002085 * the current one is exhausted, and the next child to examine in that parent.
2086 */
2087typedef struct ParentLocator_t {
2088 ElementObject *parent;
2089 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090} ParentLocator;
2091
2092typedef struct {
2093 PyObject_HEAD
2094 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002095 Py_ssize_t parent_stack_used;
2096 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002097 ElementObject *root_element;
2098 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099 int gettext;
2100} ElementIterObject;
2101
2102
2103static void
2104elementiter_dealloc(ElementIterObject *it)
2105{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002106 Py_ssize_t i = it->parent_stack_used;
2107 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002108 /* bpo-31095: UnTrack is needed before calling any callbacks */
2109 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002110 while (i--)
2111 Py_XDECREF(it->parent_stack[i].parent);
2112 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113
2114 Py_XDECREF(it->sought_tag);
2115 Py_XDECREF(it->root_element);
2116
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 PyObject_GC_Del(it);
2118}
2119
2120static int
2121elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2122{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002123 Py_ssize_t i = it->parent_stack_used;
2124 while (i--)
2125 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126
2127 Py_VISIT(it->root_element);
2128 Py_VISIT(it->sought_tag);
2129 return 0;
2130}
2131
2132/* Helper function for elementiter_next. Add a new parent to the parent stack.
2133 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134static int
2135parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002136{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002137 ParentLocator *item;
2138
2139 if (it->parent_stack_used >= it->parent_stack_size) {
2140 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2141 ParentLocator *parent_stack = it->parent_stack;
2142 PyMem_Resize(parent_stack, ParentLocator, new_size);
2143 if (parent_stack == NULL)
2144 return -1;
2145 it->parent_stack = parent_stack;
2146 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002147 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002148 item = it->parent_stack + it->parent_stack_used++;
2149 Py_INCREF(parent);
2150 item->parent = parent;
2151 item->child_index = 0;
2152 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002153}
2154
2155static PyObject *
2156elementiter_next(ElementIterObject *it)
2157{
2158 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002159 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 * A short note on gettext: this function serves both the iter() and
2161 * itertext() methods to avoid code duplication. However, there are a few
2162 * small differences in the way these iterations work. Namely:
2163 * - itertext() only yields text from nodes that have it, and continues
2164 * iterating when a node doesn't have text (so it doesn't return any
2165 * node like iter())
2166 * - itertext() also has to handle tail, after finishing with all the
2167 * children of a node.
2168 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002169 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002170 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002171 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002172
2173 while (1) {
2174 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002175 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002176 * iterator is exhausted.
2177 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002178 if (!it->parent_stack_used) {
2179 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002180 PyErr_SetNone(PyExc_StopIteration);
2181 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002182 }
2183
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002184 elem = it->root_element; /* steals a reference */
2185 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 }
2187 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002188 /* See if there are children left to traverse in the current parent. If
2189 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002190 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002191 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2192 Py_ssize_t child_index = item->child_index;
2193 ElementObjectExtra *extra;
2194 elem = item->parent;
2195 extra = elem->extra;
2196 if (!extra || child_index >= extra->length) {
2197 it->parent_stack_used--;
2198 /* Note that extra condition on it->parent_stack_used here;
2199 * this is because itertext() is supposed to only return *inner*
2200 * text, not text following the element it began iteration with.
2201 */
2202 if (it->gettext && it->parent_stack_used) {
2203 text = element_get_tail(elem);
2204 goto gettext;
2205 }
2206 Py_DECREF(elem);
2207 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002208 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002209
Serhiy Storchakab11c5662018-10-14 10:32:19 +03002210 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002211 PyErr_Format(PyExc_AttributeError,
2212 "'%.100s' object has no attribute 'iter'",
2213 Py_TYPE(extra->children[child_index])->tp_name);
2214 return NULL;
2215 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002216 elem = (ElementObject *)extra->children[child_index];
2217 item->child_index++;
2218 Py_INCREF(elem);
2219 }
2220
2221 if (parent_stack_push_new(it, elem) < 0) {
2222 Py_DECREF(elem);
2223 PyErr_NoMemory();
2224 return NULL;
2225 }
2226 if (it->gettext) {
2227 text = element_get_text(elem);
2228 goto gettext;
2229 }
2230
2231 if (it->sought_tag == Py_None)
2232 return (PyObject *)elem;
2233
2234 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2235 if (rc > 0)
2236 return (PyObject *)elem;
2237
2238 Py_DECREF(elem);
2239 if (rc < 0)
2240 return NULL;
2241 continue;
2242
2243gettext:
2244 if (!text) {
2245 Py_DECREF(elem);
2246 return NULL;
2247 }
2248 if (text == Py_None) {
2249 Py_DECREF(elem);
2250 }
2251 else {
2252 Py_INCREF(text);
2253 Py_DECREF(elem);
2254 rc = PyObject_IsTrue(text);
2255 if (rc > 0)
2256 return text;
2257 Py_DECREF(text);
2258 if (rc < 0)
2259 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260 }
2261 }
2262
2263 return NULL;
2264}
2265
2266
2267static PyTypeObject ElementIter_Type = {
2268 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002269 /* Using the module's name since the pure-Python implementation does not
2270 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002271 "_elementtree._element_iterator", /* tp_name */
2272 sizeof(ElementIterObject), /* tp_basicsize */
2273 0, /* tp_itemsize */
2274 /* methods */
2275 (destructor)elementiter_dealloc, /* tp_dealloc */
2276 0, /* tp_print */
2277 0, /* tp_getattr */
2278 0, /* tp_setattr */
2279 0, /* tp_reserved */
2280 0, /* tp_repr */
2281 0, /* tp_as_number */
2282 0, /* tp_as_sequence */
2283 0, /* tp_as_mapping */
2284 0, /* tp_hash */
2285 0, /* tp_call */
2286 0, /* tp_str */
2287 0, /* tp_getattro */
2288 0, /* tp_setattro */
2289 0, /* tp_as_buffer */
2290 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2291 0, /* tp_doc */
2292 (traverseproc)elementiter_traverse, /* tp_traverse */
2293 0, /* tp_clear */
2294 0, /* tp_richcompare */
2295 0, /* tp_weaklistoffset */
2296 PyObject_SelfIter, /* tp_iter */
2297 (iternextfunc)elementiter_next, /* tp_iternext */
2298 0, /* tp_methods */
2299 0, /* tp_members */
2300 0, /* tp_getset */
2301 0, /* tp_base */
2302 0, /* tp_dict */
2303 0, /* tp_descr_get */
2304 0, /* tp_descr_set */
2305 0, /* tp_dictoffset */
2306 0, /* tp_init */
2307 0, /* tp_alloc */
2308 0, /* tp_new */
2309};
2310
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002311#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002312
2313static PyObject *
2314create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2315{
2316 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002317
2318 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2319 if (!it)
2320 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002321
Victor Stinner4d463432013-07-11 23:05:03 +02002322 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002323 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002324 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002325 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002326 it->root_element = self;
2327
Eli Bendersky64d11e62012-06-15 07:42:50 +03002328 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002329
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002330 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002331 if (it->parent_stack == NULL) {
2332 Py_DECREF(it);
2333 PyErr_NoMemory();
2334 return NULL;
2335 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002336 it->parent_stack_used = 0;
2337 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002338
Eli Bendersky64d11e62012-06-15 07:42:50 +03002339 return (PyObject *)it;
2340}
2341
2342
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343/* ==================================================================== */
2344/* the tree builder type */
2345
2346typedef struct {
2347 PyObject_HEAD
2348
Eli Bendersky58d548d2012-05-29 15:45:16 +03002349 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350
Antoine Pitrouee329312012-10-04 19:53:29 +02002351 PyObject *this; /* current node */
2352 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 PyObject *stack; /* element stack */
2357 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358
Eli Bendersky48d358b2012-05-30 17:57:50 +03002359 PyObject *element_factory;
2360
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002362 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002363 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2364 PyObject *end_event_obj;
2365 PyObject *start_ns_event_obj;
2366 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002367} TreeBuilderObject;
2368
Christian Heimes90aa7642007-12-19 02:45:37 +00002369#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370
2371/* -------------------------------------------------------------------- */
2372/* constructor and destructor */
2373
Eli Bendersky58d548d2012-05-29 15:45:16 +03002374static PyObject *
2375treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002376{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002377 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2378 if (t != NULL) {
2379 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380
Eli Bendersky58d548d2012-05-29 15:45:16 +03002381 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002382 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002384 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385
Eli Bendersky58d548d2012-05-29 15:45:16 +03002386 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 t->stack = PyList_New(20);
2389 if (!t->stack) {
2390 Py_DECREF(t->this);
2391 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002392 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002393 return NULL;
2394 }
2395 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002397 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002398 t->start_event_obj = t->end_event_obj = NULL;
2399 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2400 }
2401 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402}
2403
Serhiy Storchakacb985562015-05-04 15:32:48 +03002404/*[clinic input]
2405_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002406
Serhiy Storchakacb985562015-05-04 15:32:48 +03002407 element_factory: object = NULL
2408
2409[clinic start generated code]*/
2410
2411static int
2412_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2413 PyObject *element_factory)
2414/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2415{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002416 if (element_factory) {
2417 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002418 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002419 }
2420
Eli Bendersky58d548d2012-05-29 15:45:16 +03002421 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422}
2423
Eli Bendersky48d358b2012-05-30 17:57:50 +03002424static int
2425treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2426{
2427 Py_VISIT(self->root);
2428 Py_VISIT(self->this);
2429 Py_VISIT(self->last);
2430 Py_VISIT(self->data);
2431 Py_VISIT(self->stack);
2432 Py_VISIT(self->element_factory);
2433 return 0;
2434}
2435
2436static int
2437treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002438{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002439 Py_CLEAR(self->end_ns_event_obj);
2440 Py_CLEAR(self->start_ns_event_obj);
2441 Py_CLEAR(self->end_event_obj);
2442 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002443 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002444 Py_CLEAR(self->stack);
2445 Py_CLEAR(self->data);
2446 Py_CLEAR(self->last);
2447 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002448 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002449 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002450 return 0;
2451}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002452
Eli Bendersky48d358b2012-05-30 17:57:50 +03002453static void
2454treebuilder_dealloc(TreeBuilderObject *self)
2455{
2456 PyObject_GC_UnTrack(self);
2457 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002458 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459}
2460
2461/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002462/* helpers for handling of arbitrary element-like objects */
2463
2464static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002465treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002466 PyObject **dest, _Py_Identifier *name)
2467{
2468 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002469 PyObject *tmp = JOIN_OBJ(*dest);
2470 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2471 *data = NULL;
2472 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002473 return 0;
2474 }
2475 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002476 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002477 int r;
2478 if (joined == NULL)
2479 return -1;
2480 r = _PyObject_SetAttrId(element, name, joined);
2481 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002482 if (r < 0)
2483 return -1;
2484 Py_CLEAR(*data);
2485 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002486 }
2487}
2488
Serhiy Storchaka576def02017-03-30 09:47:31 +03002489LOCAL(int)
2490treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002491{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002492 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002493
Serhiy Storchaka576def02017-03-30 09:47:31 +03002494 if (!self->data) {
2495 return 0;
2496 }
2497
2498 if (self->this == element) {
2499 _Py_IDENTIFIER(text);
2500 return treebuilder_set_element_text_or_tail(
2501 element, &self->data,
2502 &((ElementObject *) element)->text, &PyId_text);
2503 }
2504 else {
2505 _Py_IDENTIFIER(tail);
2506 return treebuilder_set_element_text_or_tail(
2507 element, &self->data,
2508 &((ElementObject *) element)->tail, &PyId_tail);
2509 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002510}
2511
2512static int
2513treebuilder_add_subelement(PyObject *element, PyObject *child)
2514{
2515 _Py_IDENTIFIER(append);
2516 if (Element_CheckExact(element)) {
2517 ElementObject *elem = (ElementObject *) element;
2518 return element_add_subelement(elem, child);
2519 }
2520 else {
2521 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002522 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002523 if (res == NULL)
2524 return -1;
2525 Py_DECREF(res);
2526 return 0;
2527 }
2528}
2529
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002530LOCAL(int)
2531treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2532 PyObject *node)
2533{
2534 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002535 PyObject *res;
2536 PyObject *event = PyTuple_Pack(2, action, node);
2537 if (event == NULL)
2538 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002539 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002540 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002541 if (res == NULL)
2542 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002543 Py_DECREF(res);
2544 }
2545 return 0;
2546}
2547
Antoine Pitrouee329312012-10-04 19:53:29 +02002548/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549/* handlers */
2550
2551LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2553 PyObject* attrib)
2554{
2555 PyObject* node;
2556 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002557 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558
Serhiy Storchaka576def02017-03-30 09:47:31 +03002559 if (treebuilder_flush_data(self) < 0) {
2560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 }
2562
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002563 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002564 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002565 } else if (attrib == Py_None) {
2566 attrib = PyDict_New();
2567 if (!attrib)
2568 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002569 node = PyObject_CallFunctionObjArgs(self->element_factory,
2570 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002571 Py_DECREF(attrib);
2572 }
2573 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002574 node = PyObject_CallFunctionObjArgs(self->element_factory,
2575 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002576 }
2577 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002579 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580
Antoine Pitrouee329312012-10-04 19:53:29 +02002581 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582
2583 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002584 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002585 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586 } else {
2587 if (self->root) {
2588 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002589 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590 "multiple elements on top level"
2591 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002592 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593 }
2594 Py_INCREF(node);
2595 self->root = node;
2596 }
2597
2598 if (self->index < PyList_GET_SIZE(self->stack)) {
2599 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002600 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 Py_INCREF(this);
2602 } else {
2603 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002604 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 }
2606 self->index++;
2607
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002609 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002611 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002613 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2614 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615
2616 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002617
2618 error:
2619 Py_DECREF(node);
2620 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621}
2622
2623LOCAL(PyObject*)
2624treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2625{
2626 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002627 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002628 /* ignore calls to data before the first call to start */
2629 Py_RETURN_NONE;
2630 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631 /* store the first item as is */
2632 Py_INCREF(data); self->data = data;
2633 } else {
2634 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002635 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2636 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002637 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638 /* expat often generates single character data sections; handle
2639 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002640 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2641 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002643 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 } else if (PyList_CheckExact(self->data)) {
2645 if (PyList_Append(self->data, data) < 0)
2646 return NULL;
2647 } else {
2648 PyObject* list = PyList_New(2);
2649 if (!list)
2650 return NULL;
2651 PyList_SET_ITEM(list, 0, self->data);
2652 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2653 self->data = list;
2654 }
2655 }
2656
2657 Py_RETURN_NONE;
2658}
2659
2660LOCAL(PyObject*)
2661treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2662{
2663 PyObject* item;
2664
Serhiy Storchaka576def02017-03-30 09:47:31 +03002665 if (treebuilder_flush_data(self) < 0) {
2666 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667 }
2668
2669 if (self->index == 0) {
2670 PyErr_SetString(
2671 PyExc_IndexError,
2672 "pop from empty stack"
2673 );
2674 return NULL;
2675 }
2676
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002677 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002678 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002679 self->index--;
2680 self->this = PyList_GET_ITEM(self->stack, self->index);
2681 Py_INCREF(self->this);
2682 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002684 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2685 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686
2687 Py_INCREF(self->last);
2688 return (PyObject*) self->last;
2689}
2690
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691/* -------------------------------------------------------------------- */
2692/* methods (in alphabetical order) */
2693
Serhiy Storchakacb985562015-05-04 15:32:48 +03002694/*[clinic input]
2695_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696
Serhiy Storchakacb985562015-05-04 15:32:48 +03002697 data: object
2698 /
2699
2700[clinic start generated code]*/
2701
2702static PyObject *
2703_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2704/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2705{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706 return treebuilder_handle_data(self, data);
2707}
2708
Serhiy Storchakacb985562015-05-04 15:32:48 +03002709/*[clinic input]
2710_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711
Serhiy Storchakacb985562015-05-04 15:32:48 +03002712 tag: object
2713 /
2714
2715[clinic start generated code]*/
2716
2717static PyObject *
2718_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2719/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2720{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721 return treebuilder_handle_end(self, tag);
2722}
2723
2724LOCAL(PyObject*)
2725treebuilder_done(TreeBuilderObject* self)
2726{
2727 PyObject* res;
2728
2729 /* FIXME: check stack size? */
2730
2731 if (self->root)
2732 res = self->root;
2733 else
2734 res = Py_None;
2735
2736 Py_INCREF(res);
2737 return res;
2738}
2739
Serhiy Storchakacb985562015-05-04 15:32:48 +03002740/*[clinic input]
2741_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
Serhiy Storchakacb985562015-05-04 15:32:48 +03002743[clinic start generated code]*/
2744
2745static PyObject *
2746_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2747/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2748{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749 return treebuilder_done(self);
2750}
2751
Serhiy Storchakacb985562015-05-04 15:32:48 +03002752/*[clinic input]
2753_elementtree.TreeBuilder.start
2754
2755 tag: object
2756 attrs: object = None
2757 /
2758
2759[clinic start generated code]*/
2760
2761static PyObject *
2762_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2763 PyObject *attrs)
2764/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002766 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767}
2768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769/* ==================================================================== */
2770/* the expat interface */
2771
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002774
2775/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2776 * cached globally without being in per-module state.
2777 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002778static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780
Eli Bendersky52467b12012-06-01 07:13:08 +03002781static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2782 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784typedef struct {
2785 PyObject_HEAD
2786
2787 XML_Parser parser;
2788
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002789 PyObject *target;
2790 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002792 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002794 PyObject *handle_start;
2795 PyObject *handle_data;
2796 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002798 PyObject *handle_comment;
2799 PyObject *handle_pi;
2800 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002802 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804} XMLParserObject;
2805
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806/* helpers */
2807
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808LOCAL(PyObject*)
2809makeuniversal(XMLParserObject* self, const char* string)
2810{
2811 /* convert a UTF-8 tag/attribute name from the expat parser
2812 to a universal name string */
2813
Antoine Pitrouc1948842012-10-01 23:40:37 +02002814 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 PyObject* key;
2816 PyObject* value;
2817
2818 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002819 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820 if (!key)
2821 return NULL;
2822
2823 value = PyDict_GetItem(self->names, key);
2824
2825 if (value) {
2826 Py_INCREF(value);
2827 } else {
2828 /* new name. convert to universal name, and decode as
2829 necessary */
2830
2831 PyObject* tag;
2832 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002833 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834
2835 /* look for namespace separator */
2836 for (i = 0; i < size; i++)
2837 if (string[i] == '}')
2838 break;
2839 if (i != size) {
2840 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002841 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002842 if (tag == NULL) {
2843 Py_DECREF(key);
2844 return NULL;
2845 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002846 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002847 p[0] = '{';
2848 memcpy(p+1, string, size);
2849 size++;
2850 } else {
2851 /* plain name; use key as tag */
2852 Py_INCREF(key);
2853 tag = key;
2854 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002855
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002856 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002857 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002858 value = PyUnicode_DecodeUTF8(p, size, "strict");
2859 Py_DECREF(tag);
2860 if (!value) {
2861 Py_DECREF(key);
2862 return NULL;
2863 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002864
2865 /* add to names dictionary */
2866 if (PyDict_SetItem(self->names, key, value) < 0) {
2867 Py_DECREF(key);
2868 Py_DECREF(value);
2869 return NULL;
2870 }
2871 }
2872
2873 Py_DECREF(key);
2874 return value;
2875}
2876
Eli Bendersky5b77d812012-03-16 08:20:05 +02002877/* Set the ParseError exception with the given parameters.
2878 * If message is not NULL, it's used as the error string. Otherwise, the
2879 * message string is the default for the given error_code.
2880*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002881static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002882expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2883 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002884{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002885 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002886 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002887
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002888 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002889 message ? message : EXPAT(ErrorString)(error_code),
2890 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002891 if (errmsg == NULL)
2892 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002893
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002894 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002895 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002896 if (!error)
2897 return;
2898
Eli Bendersky5b77d812012-03-16 08:20:05 +02002899 /* Add code and position attributes */
2900 code = PyLong_FromLong((long)error_code);
2901 if (!code) {
2902 Py_DECREF(error);
2903 return;
2904 }
2905 if (PyObject_SetAttrString(error, "code", code) == -1) {
2906 Py_DECREF(error);
2907 Py_DECREF(code);
2908 return;
2909 }
2910 Py_DECREF(code);
2911
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002912 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002913 if (!position) {
2914 Py_DECREF(error);
2915 return;
2916 }
2917 if (PyObject_SetAttrString(error, "position", position) == -1) {
2918 Py_DECREF(error);
2919 Py_DECREF(position);
2920 return;
2921 }
2922 Py_DECREF(position);
2923
Eli Bendersky532d03e2013-08-10 08:00:39 -07002924 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002925 Py_DECREF(error);
2926}
2927
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928/* -------------------------------------------------------------------- */
2929/* handlers */
2930
2931static void
2932expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2933 int data_len)
2934{
2935 PyObject* key;
2936 PyObject* value;
2937 PyObject* res;
2938
2939 if (data_len < 2 || data_in[0] != '&')
2940 return;
2941
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002942 if (PyErr_Occurred())
2943 return;
2944
Neal Norwitz0269b912007-08-08 06:56:02 +00002945 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 if (!key)
2947 return;
2948
2949 value = PyDict_GetItem(self->entity, key);
2950
2951 if (value) {
2952 if (TreeBuilder_CheckExact(self->target))
2953 res = treebuilder_handle_data(
2954 (TreeBuilderObject*) self->target, value
2955 );
2956 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002957 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 else
2959 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002961 } else if (!PyErr_Occurred()) {
2962 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002963 char message[128] = "undefined entity ";
2964 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002965 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002966 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002968 EXPAT(GetErrorColumnNumber)(self->parser),
2969 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 );
2971 }
2972
2973 Py_DECREF(key);
2974}
2975
2976static void
2977expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2978 const XML_Char **attrib_in)
2979{
2980 PyObject* res;
2981 PyObject* tag;
2982 PyObject* attrib;
2983 int ok;
2984
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002985 if (PyErr_Occurred())
2986 return;
2987
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 /* tag name */
2989 tag = makeuniversal(self, tag_in);
2990 if (!tag)
2991 return; /* parser will look for errors */
2992
2993 /* attributes */
2994 if (attrib_in[0]) {
2995 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002996 if (!attrib) {
2997 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002999 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000 while (attrib_in[0] && attrib_in[1]) {
3001 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003002 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003 if (!key || !value) {
3004 Py_XDECREF(value);
3005 Py_XDECREF(key);
3006 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003007 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 return;
3009 }
3010 ok = PyDict_SetItem(attrib, key, value);
3011 Py_DECREF(value);
3012 Py_DECREF(key);
3013 if (ok < 0) {
3014 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003015 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 return;
3017 }
3018 attrib_in += 2;
3019 }
3020 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003021 Py_INCREF(Py_None);
3022 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003023 }
3024
3025 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 /* shortcut */
3027 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3028 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003029 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003030 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003031 if (attrib == Py_None) {
3032 Py_DECREF(attrib);
3033 attrib = PyDict_New();
3034 if (!attrib) {
3035 Py_DECREF(tag);
3036 return;
3037 }
3038 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003039 res = PyObject_CallFunctionObjArgs(self->handle_start,
3040 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003041 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042 res = NULL;
3043
3044 Py_DECREF(tag);
3045 Py_DECREF(attrib);
3046
3047 Py_XDECREF(res);
3048}
3049
3050static void
3051expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3052 int data_len)
3053{
3054 PyObject* data;
3055 PyObject* res;
3056
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003057 if (PyErr_Occurred())
3058 return;
3059
Neal Norwitz0269b912007-08-08 06:56:02 +00003060 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003061 if (!data)
3062 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063
3064 if (TreeBuilder_CheckExact(self->target))
3065 /* shortcut */
3066 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3067 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003068 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069 else
3070 res = NULL;
3071
3072 Py_DECREF(data);
3073
3074 Py_XDECREF(res);
3075}
3076
3077static void
3078expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3079{
3080 PyObject* tag;
3081 PyObject* res = NULL;
3082
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003083 if (PyErr_Occurred())
3084 return;
3085
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086 if (TreeBuilder_CheckExact(self->target))
3087 /* shortcut */
3088 /* the standard tree builder doesn't look at the end tag */
3089 res = treebuilder_handle_end(
3090 (TreeBuilderObject*) self->target, Py_None
3091 );
3092 else if (self->handle_end) {
3093 tag = makeuniversal(self, tag_in);
3094 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003095 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 Py_DECREF(tag);
3097 }
3098 }
3099
3100 Py_XDECREF(res);
3101}
3102
3103static void
3104expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3105 const XML_Char *uri)
3106{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003107 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3108 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003109
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003110 if (PyErr_Occurred())
3111 return;
3112
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003113 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003114 return;
3115
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003116 if (!uri)
3117 uri = "";
3118 if (!prefix)
3119 prefix = "";
3120
3121 parcel = Py_BuildValue("ss", prefix, uri);
3122 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003123 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003124 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3125 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126}
3127
3128static void
3129expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3130{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003131 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3132
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003133 if (PyErr_Occurred())
3134 return;
3135
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003136 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003137 return;
3138
3139 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140}
3141
3142static void
3143expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3144{
3145 PyObject* comment;
3146 PyObject* res;
3147
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003148 if (PyErr_Occurred())
3149 return;
3150
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003151 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003152 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003153 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003154 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3155 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156 Py_XDECREF(res);
3157 Py_DECREF(comment);
3158 }
3159 }
3160}
3161
Eli Bendersky45839902013-01-13 05:14:47 -08003162static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003163expat_start_doctype_handler(XMLParserObject *self,
3164 const XML_Char *doctype_name,
3165 const XML_Char *sysid,
3166 const XML_Char *pubid,
3167 int has_internal_subset)
3168{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003169 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003170 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003171 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003172
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003173 if (PyErr_Occurred())
3174 return;
3175
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003176 doctype_name_obj = makeuniversal(self, doctype_name);
3177 if (!doctype_name_obj)
3178 return;
3179
3180 if (sysid) {
3181 sysid_obj = makeuniversal(self, sysid);
3182 if (!sysid_obj) {
3183 Py_DECREF(doctype_name_obj);
3184 return;
3185 }
3186 } else {
3187 Py_INCREF(Py_None);
3188 sysid_obj = Py_None;
3189 }
3190
3191 if (pubid) {
3192 pubid_obj = makeuniversal(self, pubid);
3193 if (!pubid_obj) {
3194 Py_DECREF(doctype_name_obj);
3195 Py_DECREF(sysid_obj);
3196 return;
3197 }
3198 } else {
3199 Py_INCREF(Py_None);
3200 pubid_obj = Py_None;
3201 }
3202
3203 /* If the target has a handler for doctype, call it. */
3204 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003205 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3206 doctype_name_obj, pubid_obj,
3207 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003208 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003209 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003210 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3211 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3212 "The doctype() method of XMLParser is ignored. "
3213 "Define doctype() method on the TreeBuilder target.",
3214 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003215 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003216 }
3217
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003218 Py_DECREF(doctype_name_obj);
3219 Py_DECREF(pubid_obj);
3220 Py_DECREF(sysid_obj);
3221}
3222
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223static void
3224expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3225 const XML_Char* data_in)
3226{
3227 PyObject* target;
3228 PyObject* data;
3229 PyObject* res;
3230
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003231 if (PyErr_Occurred())
3232 return;
3233
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003235 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3236 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003238 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3239 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 Py_XDECREF(res);
3241 Py_DECREF(data);
3242 Py_DECREF(target);
3243 } else {
3244 Py_XDECREF(data);
3245 Py_XDECREF(target);
3246 }
3247 }
3248}
3249
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251
Eli Bendersky52467b12012-06-01 07:13:08 +03003252static PyObject *
3253xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254{
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3256 if (self) {
3257 self->parser = NULL;
3258 self->target = self->entity = self->names = NULL;
3259 self->handle_start = self->handle_data = self->handle_end = NULL;
3260 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003261 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 return (PyObject *)self;
3264}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265
scoderc8d8e152017-09-14 22:00:03 +02003266static int
3267ignore_attribute_error(PyObject *value)
3268{
3269 if (value == NULL) {
3270 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3271 return -1;
3272 }
3273 PyErr_Clear();
3274 }
3275 return 0;
3276}
3277
Serhiy Storchakacb985562015-05-04 15:32:48 +03003278/*[clinic input]
3279_elementtree.XMLParser.__init__
3280
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003281 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003282 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003283 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284
3285[clinic start generated code]*/
3286
Eli Bendersky52467b12012-06-01 07:13:08 +03003287static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003288_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3289 const char *encoding)
3290/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003291{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003292 self->entity = PyDict_New();
3293 if (!self->entity)
3294 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->names = PyDict_New();
3297 if (!self->names) {
3298 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003299 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003301
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3303 if (!self->parser) {
3304 Py_CLEAR(self->entity);
3305 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003307 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003309 /* expat < 2.1.0 has no XML_SetHashSalt() */
3310 if (EXPAT(SetHashSalt) != NULL) {
3311 EXPAT(SetHashSalt)(self->parser,
3312 (unsigned long)_Py_HashSecret.expat.hashsalt);
3313 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
Eli Bendersky52467b12012-06-01 07:13:08 +03003315 if (target) {
3316 Py_INCREF(target);
3317 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003318 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 Py_CLEAR(self->entity);
3321 Py_CLEAR(self->names);
3322 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003323 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003325 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327
Serhiy Storchakacb985562015-05-04 15:32:48 +03003328 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003329 if (ignore_attribute_error(self->handle_start)) {
3330 return -1;
3331 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003332 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003333 if (ignore_attribute_error(self->handle_data)) {
3334 return -1;
3335 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003337 if (ignore_attribute_error(self->handle_end)) {
3338 return -1;
3339 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003341 if (ignore_attribute_error(self->handle_comment)) {
3342 return -1;
3343 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003344 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003345 if (ignore_attribute_error(self->handle_pi)) {
3346 return -1;
3347 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003348 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003349 if (ignore_attribute_error(self->handle_close)) {
3350 return -1;
3351 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003352 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003353 if (ignore_attribute_error(self->handle_doctype)) {
3354 return -1;
3355 }
Eli Bendersky45839902013-01-13 05:14:47 -08003356
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003358 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003360 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361 (XML_StartElementHandler) expat_start_handler,
3362 (XML_EndElementHandler) expat_end_handler
3363 );
3364 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366 (XML_DefaultHandler) expat_default_handler
3367 );
3368 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 (XML_CharacterDataHandler) expat_data_handler
3371 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003372 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003374 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375 (XML_CommentHandler) expat_comment_handler
3376 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 (XML_ProcessingInstructionHandler) expat_pi_handler
3381 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003382 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003383 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003384 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3385 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003388 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390
Eli Bendersky52467b12012-06-01 07:13:08 +03003391 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392}
3393
Eli Bendersky52467b12012-06-01 07:13:08 +03003394static int
3395xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3396{
3397 Py_VISIT(self->handle_close);
3398 Py_VISIT(self->handle_pi);
3399 Py_VISIT(self->handle_comment);
3400 Py_VISIT(self->handle_end);
3401 Py_VISIT(self->handle_data);
3402 Py_VISIT(self->handle_start);
3403
3404 Py_VISIT(self->target);
3405 Py_VISIT(self->entity);
3406 Py_VISIT(self->names);
3407
3408 return 0;
3409}
3410
3411static int
3412xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413{
Victor Stinnere727d412017-09-18 05:29:37 -07003414 if (self->parser != NULL) {
3415 XML_Parser parser = self->parser;
3416 self->parser = NULL;
3417 EXPAT(ParserFree)(parser);
3418 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419
Antoine Pitrouc1948842012-10-01 23:40:37 +02003420 Py_CLEAR(self->handle_close);
3421 Py_CLEAR(self->handle_pi);
3422 Py_CLEAR(self->handle_comment);
3423 Py_CLEAR(self->handle_end);
3424 Py_CLEAR(self->handle_data);
3425 Py_CLEAR(self->handle_start);
3426 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427
Antoine Pitrouc1948842012-10-01 23:40:37 +02003428 Py_CLEAR(self->target);
3429 Py_CLEAR(self->entity);
3430 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431
Eli Bendersky52467b12012-06-01 07:13:08 +03003432 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433}
3434
Eli Bendersky52467b12012-06-01 07:13:08 +03003435static void
3436xmlparser_dealloc(XMLParserObject* self)
3437{
3438 PyObject_GC_UnTrack(self);
3439 xmlparser_gc_clear(self);
3440 Py_TYPE(self)->tp_free((PyObject *)self);
3441}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442
3443LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003444expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003445{
3446 int ok;
3447
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003448 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3450
3451 if (PyErr_Occurred())
3452 return NULL;
3453
3454 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003455 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003456 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003457 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003458 EXPAT(GetErrorColumnNumber)(self->parser),
3459 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460 );
3461 return NULL;
3462 }
3463
3464 Py_RETURN_NONE;
3465}
3466
Serhiy Storchakacb985562015-05-04 15:32:48 +03003467/*[clinic input]
3468_elementtree.XMLParser.close
3469
3470[clinic start generated code]*/
3471
3472static PyObject *
3473_elementtree_XMLParser_close_impl(XMLParserObject *self)
3474/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475{
3476 /* end feeding data to parser */
3477
3478 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003480 if (!res)
3481 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003483 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003484 Py_DECREF(res);
3485 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003486 }
3487 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003488 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003489 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003490 }
3491 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003492 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003493 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494}
3495
Serhiy Storchakacb985562015-05-04 15:32:48 +03003496/*[clinic input]
3497_elementtree.XMLParser.feed
3498
3499 data: object
3500 /
3501
3502[clinic start generated code]*/
3503
3504static PyObject *
3505_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3506/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507{
3508 /* feed data to parser */
3509
Serhiy Storchakacb985562015-05-04 15:32:48 +03003510 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003511 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003512 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3513 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003514 return NULL;
3515 if (data_len > INT_MAX) {
3516 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3517 return NULL;
3518 }
3519 /* Explicitly set UTF-8 encoding. Return code ignored. */
3520 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003521 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003522 }
3523 else {
3524 Py_buffer view;
3525 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003526 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003527 return NULL;
3528 if (view.len > INT_MAX) {
3529 PyBuffer_Release(&view);
3530 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3531 return NULL;
3532 }
3533 res = expat_parse(self, view.buf, (int)view.len, 0);
3534 PyBuffer_Release(&view);
3535 return res;
3536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003537}
3538
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539/*[clinic input]
3540_elementtree.XMLParser._parse_whole
3541
3542 file: object
3543 /
3544
3545[clinic start generated code]*/
3546
3547static PyObject *
3548_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3549/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550{
Eli Benderskya3699232013-05-19 18:47:23 -07003551 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 PyObject* reader;
3553 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003554 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 PyObject* res;
3556
Serhiy Storchakacb985562015-05-04 15:32:48 +03003557 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558 if (!reader)
3559 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003560
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003561 /* read from open file object */
3562 for (;;) {
3563
3564 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3565
3566 if (!buffer) {
3567 /* read failed (e.g. due to KeyboardInterrupt) */
3568 Py_DECREF(reader);
3569 return NULL;
3570 }
3571
Eli Benderskyf996e772012-03-16 05:53:30 +02003572 if (PyUnicode_CheckExact(buffer)) {
3573 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003574 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003575 Py_DECREF(buffer);
3576 break;
3577 }
3578 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003579 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003580 if (!temp) {
3581 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003582 Py_DECREF(reader);
3583 return NULL;
3584 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003585 buffer = temp;
3586 }
3587 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 Py_DECREF(buffer);
3589 break;
3590 }
3591
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003592 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3593 Py_DECREF(buffer);
3594 Py_DECREF(reader);
3595 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3596 return NULL;
3597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003599 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600 );
3601
3602 Py_DECREF(buffer);
3603
3604 if (!res) {
3605 Py_DECREF(reader);
3606 return NULL;
3607 }
3608 Py_DECREF(res);
3609
3610 }
3611
3612 Py_DECREF(reader);
3613
3614 res = expat_parse(self, "", 0, 1);
3615
3616 if (res && TreeBuilder_CheckExact(self->target)) {
3617 Py_DECREF(res);
3618 return treebuilder_done((TreeBuilderObject*) self->target);
3619 }
3620
3621 return res;
3622}
3623
Serhiy Storchakacb985562015-05-04 15:32:48 +03003624/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003625_elementtree.XMLParser._setevents
3626
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003627 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003628 events_to_report: object = None
3629 /
3630
3631[clinic start generated code]*/
3632
3633static PyObject *
3634_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3635 PyObject *events_queue,
3636 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003637/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638{
3639 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003640 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003641 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003642 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643
3644 if (!TreeBuilder_CheckExact(self->target)) {
3645 PyErr_SetString(
3646 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003647 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 "targets"
3649 );
3650 return NULL;
3651 }
3652
3653 target = (TreeBuilderObject*) self->target;
3654
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003655 events_append = PyObject_GetAttrString(events_queue, "append");
3656 if (events_append == NULL)
3657 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003658 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659
3660 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003661 Py_CLEAR(target->start_event_obj);
3662 Py_CLEAR(target->end_event_obj);
3663 Py_CLEAR(target->start_ns_event_obj);
3664 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003666 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003668 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669 Py_RETURN_NONE;
3670 }
3671
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003672 if (!(events_seq = PySequence_Fast(events_to_report,
3673 "events must be a sequence"))) {
3674 return NULL;
3675 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003676
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003677 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003678 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003679 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003680 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003681 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003682 } else if (PyBytes_Check(event_name_obj)) {
3683 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003684 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003685 if (event_name == NULL) {
3686 Py_DECREF(events_seq);
3687 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3688 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003689 }
3690
3691 Py_INCREF(event_name_obj);
3692 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003693 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003694 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003695 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003696 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003697 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003698 EXPAT(SetNamespaceDeclHandler)(
3699 self->parser,
3700 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3701 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3702 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003703 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003704 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003705 EXPAT(SetNamespaceDeclHandler)(
3706 self->parser,
3707 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3708 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3709 );
3710 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003711 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003712 Py_DECREF(events_seq);
3713 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003714 return NULL;
3715 }
3716 }
3717
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003718 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003719 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720}
3721
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003722static PyMemberDef xmlparser_members[] = {
3723 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3724 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3725 {NULL}
3726};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003727
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003728static PyObject*
3729xmlparser_version_getter(XMLParserObject *self, void *closure)
3730{
3731 return PyUnicode_FromFormat(
3732 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3733 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734}
3735
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003736static PyGetSetDef xmlparser_getsetlist[] = {
3737 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3738 {NULL},
3739};
3740
Serhiy Storchakacb985562015-05-04 15:32:48 +03003741#include "clinic/_elementtree.c.h"
3742
3743static PyMethodDef element_methods[] = {
3744
3745 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3746
3747 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3748 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3749
3750 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3751 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3752 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3753
3754 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3755 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3756 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3757 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3758
3759 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3760 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3761 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3762
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003763 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003764 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3765
3766 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3767 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3768
3769 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3770
3771 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3772 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3773 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3774 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3775 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3776
3777 {NULL, NULL}
3778};
3779
3780static PyMappingMethods element_as_mapping = {
3781 (lenfunc) element_length,
3782 (binaryfunc) element_subscr,
3783 (objobjargproc) element_ass_subscr,
3784};
3785
Serhiy Storchakadde08152015-11-25 15:28:13 +02003786static PyGetSetDef element_getsetlist[] = {
3787 {"tag",
3788 (getter)element_tag_getter,
3789 (setter)element_tag_setter,
3790 "A string identifying what kind of data this element represents"},
3791 {"text",
3792 (getter)element_text_getter,
3793 (setter)element_text_setter,
3794 "A string of text directly after the start tag, or None"},
3795 {"tail",
3796 (getter)element_tail_getter,
3797 (setter)element_tail_setter,
3798 "A string of text directly after the end tag, or None"},
3799 {"attrib",
3800 (getter)element_attrib_getter,
3801 (setter)element_attrib_setter,
3802 "A dictionary containing the element's attributes"},
3803 {NULL},
3804};
3805
Serhiy Storchakacb985562015-05-04 15:32:48 +03003806static PyTypeObject Element_Type = {
3807 PyVarObject_HEAD_INIT(NULL, 0)
3808 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3809 /* methods */
3810 (destructor)element_dealloc, /* tp_dealloc */
3811 0, /* tp_print */
3812 0, /* tp_getattr */
3813 0, /* tp_setattr */
3814 0, /* tp_reserved */
3815 (reprfunc)element_repr, /* tp_repr */
3816 0, /* tp_as_number */
3817 &element_as_sequence, /* tp_as_sequence */
3818 &element_as_mapping, /* tp_as_mapping */
3819 0, /* tp_hash */
3820 0, /* tp_call */
3821 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003822 PyObject_GenericGetAttr, /* tp_getattro */
3823 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003824 0, /* tp_as_buffer */
3825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3826 /* tp_flags */
3827 0, /* tp_doc */
3828 (traverseproc)element_gc_traverse, /* tp_traverse */
3829 (inquiry)element_gc_clear, /* tp_clear */
3830 0, /* tp_richcompare */
3831 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3832 0, /* tp_iter */
3833 0, /* tp_iternext */
3834 element_methods, /* tp_methods */
3835 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003836 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003837 0, /* tp_base */
3838 0, /* tp_dict */
3839 0, /* tp_descr_get */
3840 0, /* tp_descr_set */
3841 0, /* tp_dictoffset */
3842 (initproc)element_init, /* tp_init */
3843 PyType_GenericAlloc, /* tp_alloc */
3844 element_new, /* tp_new */
3845 0, /* tp_free */
3846};
3847
3848static PyMethodDef treebuilder_methods[] = {
3849 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3850 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3851 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3852 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3853 {NULL, NULL}
3854};
3855
3856static PyTypeObject TreeBuilder_Type = {
3857 PyVarObject_HEAD_INIT(NULL, 0)
3858 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3859 /* methods */
3860 (destructor)treebuilder_dealloc, /* tp_dealloc */
3861 0, /* tp_print */
3862 0, /* tp_getattr */
3863 0, /* tp_setattr */
3864 0, /* tp_reserved */
3865 0, /* tp_repr */
3866 0, /* tp_as_number */
3867 0, /* tp_as_sequence */
3868 0, /* tp_as_mapping */
3869 0, /* tp_hash */
3870 0, /* tp_call */
3871 0, /* tp_str */
3872 0, /* tp_getattro */
3873 0, /* tp_setattro */
3874 0, /* tp_as_buffer */
3875 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3876 /* tp_flags */
3877 0, /* tp_doc */
3878 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3879 (inquiry)treebuilder_gc_clear, /* tp_clear */
3880 0, /* tp_richcompare */
3881 0, /* tp_weaklistoffset */
3882 0, /* tp_iter */
3883 0, /* tp_iternext */
3884 treebuilder_methods, /* tp_methods */
3885 0, /* tp_members */
3886 0, /* tp_getset */
3887 0, /* tp_base */
3888 0, /* tp_dict */
3889 0, /* tp_descr_get */
3890 0, /* tp_descr_set */
3891 0, /* tp_dictoffset */
3892 _elementtree_TreeBuilder___init__, /* tp_init */
3893 PyType_GenericAlloc, /* tp_alloc */
3894 treebuilder_new, /* tp_new */
3895 0, /* tp_free */
3896};
3897
3898static PyMethodDef xmlparser_methods[] = {
3899 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3900 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3901 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3902 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003903 {NULL, NULL}
3904};
3905
Neal Norwitz227b5332006-03-22 09:28:35 +00003906static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003907 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003908 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003909 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003910 (destructor)xmlparser_dealloc, /* tp_dealloc */
3911 0, /* tp_print */
3912 0, /* tp_getattr */
3913 0, /* tp_setattr */
3914 0, /* tp_reserved */
3915 0, /* tp_repr */
3916 0, /* tp_as_number */
3917 0, /* tp_as_sequence */
3918 0, /* tp_as_mapping */
3919 0, /* tp_hash */
3920 0, /* tp_call */
3921 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003922 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003923 0, /* tp_setattro */
3924 0, /* tp_as_buffer */
3925 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3926 /* tp_flags */
3927 0, /* tp_doc */
3928 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3929 (inquiry)xmlparser_gc_clear, /* tp_clear */
3930 0, /* tp_richcompare */
3931 0, /* tp_weaklistoffset */
3932 0, /* tp_iter */
3933 0, /* tp_iternext */
3934 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003935 xmlparser_members, /* tp_members */
3936 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003937 0, /* tp_base */
3938 0, /* tp_dict */
3939 0, /* tp_descr_get */
3940 0, /* tp_descr_set */
3941 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003942 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003943 PyType_GenericAlloc, /* tp_alloc */
3944 xmlparser_new, /* tp_new */
3945 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003946};
3947
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003948/* ==================================================================== */
3949/* python module interface */
3950
3951static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003952 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003953 {NULL, NULL}
3954};
3955
Martin v. Löwis1a214512008-06-11 05:26:20 +00003956
Eli Bendersky532d03e2013-08-10 08:00:39 -07003957static struct PyModuleDef elementtreemodule = {
3958 PyModuleDef_HEAD_INIT,
3959 "_elementtree",
3960 NULL,
3961 sizeof(elementtreestate),
3962 _functions,
3963 NULL,
3964 elementtree_traverse,
3965 elementtree_clear,
3966 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003967};
3968
Neal Norwitzf6657e62006-12-28 04:47:50 +00003969PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003970PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003971{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003972 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003973 elementtreestate *st;
3974
3975 m = PyState_FindModule(&elementtreemodule);
3976 if (m) {
3977 Py_INCREF(m);
3978 return m;
3979 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003980
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003981 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003982 if (PyType_Ready(&ElementIter_Type) < 0)
3983 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003984 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003985 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003986 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003987 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003988 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003989 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003990
Eli Bendersky532d03e2013-08-10 08:00:39 -07003991 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003992 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003993 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003994 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003995
Eli Bendersky828efde2012-04-05 05:40:58 +03003996 if (!(temp = PyImport_ImportModule("copy")))
3997 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003998 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003999 Py_XDECREF(temp);
4000
Victor Stinnerb136f112017-07-10 22:28:02 +02004001 if (st->deepcopy_obj == NULL) {
4002 return NULL;
4003 }
4004
4005 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004006 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004007 return NULL;
4008
Eli Bendersky20d41742012-06-01 09:48:37 +03004009 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004010 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4011 if (expat_capi) {
4012 /* check that it's usable */
4013 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004014 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004015 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4016 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004017 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004018 PyErr_SetString(PyExc_ImportError,
4019 "pyexpat version is incompatible");
4020 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004021 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004022 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004023 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004024 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004025
Eli Bendersky532d03e2013-08-10 08:00:39 -07004026 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004027 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004028 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004029 Py_INCREF(st->parseerror_obj);
4030 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004031
Eli Bendersky092af1f2012-03-04 07:14:03 +02004032 Py_INCREF((PyObject *)&Element_Type);
4033 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4034
Eli Bendersky58d548d2012-05-29 15:45:16 +03004035 Py_INCREF((PyObject *)&TreeBuilder_Type);
4036 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4037
Eli Bendersky52467b12012-06-01 07:13:08 +03004038 Py_INCREF((PyObject *)&XMLParser_Type);
4039 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004040
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004041 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004042}