blob: 2f1c4c02e82af5066dcb7bec2b244d06c13e8075 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600339 if (attrib_str == NULL) {
340 return NULL;
341 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 } else {
357 attrib = PyDict_New();
358 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359
360 Py_DECREF(attrib_str);
361
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600362 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
363 Py_DECREF(attrib);
364 return NULL;
365 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 return attrib;
367}
368
Serhiy Storchakacb985562015-05-04 15:32:48 +0300369/*[clinic input]
370module _elementtree
371class _elementtree.Element "ElementObject *" "&Element_Type"
372class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
373class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
374[clinic start generated code]*/
375/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
376
Eli Bendersky092af1f2012-03-04 07:14:03 +0200377static int
378element_init(PyObject *self, PyObject *args, PyObject *kwds)
379{
380 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381 PyObject *attrib = NULL;
382 ElementObject *self_elem;
383
384 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
385 return -1;
386
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (attrib) {
388 /* attrib passed as positional arg */
389 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200390 if (!attrib)
391 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (kwds) {
393 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 return -1;
396 }
397 }
398 } else if (kwds) {
399 /* have keywords args */
400 attrib = get_attrib_from_keywords(kwds);
401 if (!attrib)
402 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 }
404
405 self_elem = (ElementObject *)self;
406
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 return -1;
411 }
412 }
413
Eli Bendersky48d358b2012-05-30 17:57:50 +0300414 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416
417 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300419 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300422 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300425 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 return 0;
428}
429
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200433 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 PyObject* *children;
435
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300436 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200445 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200456 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
457 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000458 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000459 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100460 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000461 * false alarm always assume at least one child to be safe.
462 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000463 children = PyObject_Realloc(self->extra->children,
464 size * sizeof(PyObject*));
465 if (!children)
466 goto nomemory;
467 } else {
468 children = PyObject_Malloc(size * sizeof(PyObject*));
469 if (!children)
470 goto nomemory;
471 /* copy existing children from static area to malloc buffer */
472 memcpy(children, self->extra->children,
473 self->extra->length * sizeof(PyObject*));
474 }
475 self->extra->children = children;
476 self->extra->allocated = size;
477 }
478
479 return 0;
480
481 nomemory:
482 PyErr_NoMemory();
483 return -1;
484}
485
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300486LOCAL(void)
487raise_type_error(PyObject *element)
488{
489 PyErr_Format(PyExc_TypeError,
490 "expected an Element, not \"%.200s\"",
491 Py_TYPE(element)->tp_name);
492}
493
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000494LOCAL(int)
495element_add_subelement(ElementObject* self, PyObject* element)
496{
497 /* add a child element to a parent */
498
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300499 if (!Element_Check(element)) {
500 raise_type_error(element);
501 return -1;
502 }
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504 if (element_resize(self, 1) < 0)
505 return -1;
506
507 Py_INCREF(element);
508 self->extra->children[self->extra->length] = element;
509
510 self->extra->length++;
511
512 return 0;
513}
514
515LOCAL(PyObject*)
516element_get_attrib(ElementObject* self)
517{
518 /* return borrowed reference to attrib dictionary */
519 /* note: this function assumes that the extra section exists */
520
521 PyObject* res = self->extra->attrib;
522
523 if (res == Py_None) {
524 /* create missing dictionary */
525 res = PyDict_New();
526 if (!res)
527 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200528 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000529 self->extra->attrib = res;
530 }
531
532 return res;
533}
534
535LOCAL(PyObject*)
536element_get_text(ElementObject* self)
537{
538 /* return borrowed reference to text attribute */
539
Serhiy Storchaka576def02017-03-30 09:47:31 +0300540 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000541
542 if (JOIN_GET(res)) {
543 res = JOIN_OBJ(res);
544 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300545 PyObject *tmp = list_join(res);
546 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000547 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300548 self->text = tmp;
549 Py_DECREF(res);
550 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551 }
552 }
553
554 return res;
555}
556
557LOCAL(PyObject*)
558element_get_tail(ElementObject* self)
559{
560 /* return borrowed reference to text attribute */
561
Serhiy Storchaka576def02017-03-30 09:47:31 +0300562 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
564 if (JOIN_GET(res)) {
565 res = JOIN_OBJ(res);
566 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300567 PyObject *tmp = list_join(res);
568 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000569 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300570 self->tail = tmp;
571 Py_DECREF(res);
572 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573 }
574 }
575
576 return res;
577}
578
579static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300580subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581{
582 PyObject* elem;
583
584 ElementObject* parent;
585 PyObject* tag;
586 PyObject* attrib = NULL;
587 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
588 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800589 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
Eli Bendersky737b1732012-05-29 06:02:56 +0300593 if (attrib) {
594 /* attrib passed as positional arg */
595 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 if (!attrib)
597 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600598 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
599 Py_DECREF(attrib);
600 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300601 }
602 } else if (kwds) {
603 /* have keyword args */
604 attrib = get_attrib_from_keywords(kwds);
605 if (!attrib)
606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300608 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609 Py_INCREF(Py_None);
610 attrib = Py_None;
611 }
612
Eli Bendersky092af1f2012-03-04 07:14:03 +0200613 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200615 if (elem == NULL)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000618 if (element_add_subelement(parent, elem) < 0) {
619 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000621 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000622
623 return elem;
624}
625
Eli Bendersky0192ba32012-03-30 16:38:33 +0300626static int
627element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
628{
629 Py_VISIT(self->tag);
630 Py_VISIT(JOIN_OBJ(self->text));
631 Py_VISIT(JOIN_OBJ(self->tail));
632
633 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200634 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 Py_VISIT(self->extra->attrib);
636
637 for (i = 0; i < self->extra->length; ++i)
638 Py_VISIT(self->extra->children[i]);
639 }
640 return 0;
641}
642
643static int
644element_gc_clear(ElementObject *self)
645{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700647 _clear_joined_ptr(&self->text);
648 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649
650 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300651 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300653 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 return 0;
655}
656
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000657static void
658element_dealloc(ElementObject* self)
659{
INADA Naokia6296d32017-08-24 14:55:17 +0900660 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300661 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200662 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300663
664 if (self->weakreflist != NULL)
665 PyObject_ClearWeakRefs((PyObject *) self);
666
Eli Bendersky0192ba32012-03-30 16:38:33 +0300667 /* element_gc_clear clears all references and deallocates extra
668 */
669 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
671 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200672 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200673 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000674}
675
676/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
Serhiy Storchakacb985562015-05-04 15:32:48 +0300678/*[clinic input]
679_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
Serhiy Storchakacb985562015-05-04 15:32:48 +0300681 subelement: object(subclass_of='&Element_Type')
682 /
683
684[clinic start generated code]*/
685
686static PyObject *
687_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
688/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
689{
690 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691 return NULL;
692
693 Py_RETURN_NONE;
694}
695
Serhiy Storchakacb985562015-05-04 15:32:48 +0300696/*[clinic input]
697_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698
Serhiy Storchakacb985562015-05-04 15:32:48 +0300699[clinic start generated code]*/
700
701static PyObject *
702_elementtree_Element_clear_impl(ElementObject *self)
703/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
704{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300705 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000706
707 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300708 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709
710 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300711 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 Py_RETURN_NONE;
714}
715
Serhiy Storchakacb985562015-05-04 15:32:48 +0300716/*[clinic input]
717_elementtree.Element.__copy__
718
719[clinic start generated code]*/
720
721static PyObject *
722_elementtree_Element___copy___impl(ElementObject *self)
723/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200725 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000726 ElementObject* element;
727
Eli Bendersky092af1f2012-03-04 07:14:03 +0200728 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800729 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 if (!element)
731 return NULL;
732
Oren Milman39ecb9c2017-10-10 23:26:24 +0300733 Py_INCREF(JOIN_OBJ(self->text));
734 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735
Oren Milman39ecb9c2017-10-10 23:26:24 +0300736 Py_INCREF(JOIN_OBJ(self->tail));
737 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000738
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300739 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000741 if (element_resize(element, self->extra->length) < 0) {
742 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000744 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
746 for (i = 0; i < self->extra->length; i++) {
747 Py_INCREF(self->extra->children[i]);
748 element->extra->children[i] = self->extra->children[i];
749 }
750
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300751 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 }
754
755 return (PyObject*) element;
756}
757
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200758/* Helper for a deep copy. */
759LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
760
Serhiy Storchakacb985562015-05-04 15:32:48 +0300761/*[clinic input]
762_elementtree.Element.__deepcopy__
763
Oren Milmand0568182017-09-12 17:39:15 +0300764 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300765 /
766
767[clinic start generated code]*/
768
769static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300770_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
771/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000772{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200773 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000774 ElementObject* element;
775 PyObject* tag;
776 PyObject* attrib;
777 PyObject* text;
778 PyObject* tail;
779 PyObject* id;
780
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000781 tag = deepcopy(self->tag, memo);
782 if (!tag)
783 return NULL;
784
785 if (self->extra) {
786 attrib = deepcopy(self->extra->attrib, memo);
787 if (!attrib) {
788 Py_DECREF(tag);
789 return NULL;
790 }
791 } else {
792 Py_INCREF(Py_None);
793 attrib = Py_None;
794 }
795
Eli Bendersky092af1f2012-03-04 07:14:03 +0200796 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797
798 Py_DECREF(tag);
799 Py_DECREF(attrib);
800
801 if (!element)
802 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804 text = deepcopy(JOIN_OBJ(self->text), memo);
805 if (!text)
806 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300807 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808
809 tail = deepcopy(JOIN_OBJ(self->tail), memo);
810 if (!tail)
811 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300812 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000813
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300814 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000815 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816 if (element_resize(element, self->extra->length) < 0)
817 goto error;
818
819 for (i = 0; i < self->extra->length; i++) {
820 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300821 if (!child || !Element_Check(child)) {
822 if (child) {
823 raise_type_error(child);
824 Py_DECREF(child);
825 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 element->extra->length = i;
827 goto error;
828 }
829 element->extra->children[i] = child;
830 }
831
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300832 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000833 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000834 }
835
836 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700837 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000838 if (!id)
839 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000840
841 i = PyDict_SetItem(memo, id, (PyObject*) element);
842
843 Py_DECREF(id);
844
845 if (i < 0)
846 goto error;
847
848 return (PyObject*) element;
849
850 error:
851 Py_DECREF(element);
852 return NULL;
853}
854
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200855LOCAL(PyObject *)
856deepcopy(PyObject *object, PyObject *memo)
857{
858 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200859 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200860 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200861
862 /* Fast paths */
863 if (object == Py_None || PyUnicode_CheckExact(object)) {
864 Py_INCREF(object);
865 return object;
866 }
867
868 if (Py_REFCNT(object) == 1) {
869 if (PyDict_CheckExact(object)) {
870 PyObject *key, *value;
871 Py_ssize_t pos = 0;
872 int simple = 1;
873 while (PyDict_Next(object, &pos, &key, &value)) {
874 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
875 simple = 0;
876 break;
877 }
878 }
879 if (simple)
880 return PyDict_Copy(object);
881 /* Fall through to general case */
882 }
883 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300884 return _elementtree_Element___deepcopy___impl(
885 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200886 }
887 }
888
889 /* General case */
890 st = ET_STATE_GLOBAL;
891 if (!st->deepcopy_obj) {
892 PyErr_SetString(PyExc_RuntimeError,
893 "deepcopy helper not found");
894 return NULL;
895 }
896
Victor Stinner7fbac452016-08-20 01:34:44 +0200897 stack[0] = object;
898 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200899 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200900}
901
902
Serhiy Storchakacb985562015-05-04 15:32:48 +0300903/*[clinic input]
904_elementtree.Element.__sizeof__ -> Py_ssize_t
905
906[clinic start generated code]*/
907
908static Py_ssize_t
909_elementtree_Element___sizeof___impl(ElementObject *self)
910/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200911{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200912 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200913 if (self->extra) {
914 result += sizeof(ElementObjectExtra);
915 if (self->extra->children != self->extra->_children)
916 result += sizeof(PyObject*) * self->extra->allocated;
917 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300918 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200919}
920
Eli Bendersky698bdb22013-01-10 06:01:06 -0800921/* dict keys for getstate/setstate. */
922#define PICKLED_TAG "tag"
923#define PICKLED_CHILDREN "_children"
924#define PICKLED_ATTRIB "attrib"
925#define PICKLED_TAIL "tail"
926#define PICKLED_TEXT "text"
927
928/* __getstate__ returns a fabricated instance dict as in the pure-Python
929 * Element implementation, for interoperability/interchangeability. This
930 * makes the pure-Python implementation details an API, but (a) there aren't
931 * any unnecessary structures there; and (b) it buys compatibility with 3.2
932 * pickles. See issue #16076.
933 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300934/*[clinic input]
935_elementtree.Element.__getstate__
936
937[clinic start generated code]*/
938
Eli Bendersky698bdb22013-01-10 06:01:06 -0800939static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300940_elementtree_Element___getstate___impl(ElementObject *self)
941/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200943 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944 PyObject *instancedict = NULL, *children;
945
946 /* Build a list of children. */
947 children = PyList_New(self->extra ? self->extra->length : 0);
948 if (!children)
949 return NULL;
950 for (i = 0; i < PyList_GET_SIZE(children); i++) {
951 PyObject *child = self->extra->children[i];
952 Py_INCREF(child);
953 PyList_SET_ITEM(children, i, child);
954 }
955
956 /* Construct the state object. */
957 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
958 if (noattrib)
959 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
960 PICKLED_TAG, self->tag,
961 PICKLED_CHILDREN, children,
962 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700963 PICKLED_TEXT, JOIN_OBJ(self->text),
964 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965 else
966 instancedict = Py_BuildValue("{sOsOsOsOsO}",
967 PICKLED_TAG, self->tag,
968 PICKLED_CHILDREN, children,
969 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700970 PICKLED_TEXT, JOIN_OBJ(self->text),
971 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800972 if (instancedict) {
973 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800975 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976 else {
977 for (i = 0; i < PyList_GET_SIZE(children); i++)
978 Py_DECREF(PyList_GET_ITEM(children, i));
979 Py_DECREF(children);
980
981 return NULL;
982 }
983}
984
985static PyObject *
986element_setstate_from_attributes(ElementObject *self,
987 PyObject *tag,
988 PyObject *attrib,
989 PyObject *text,
990 PyObject *tail,
991 PyObject *children)
992{
993 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300994 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995
996 if (!tag) {
997 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
998 return NULL;
999 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001000
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001001 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001002 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Oren Milman39ecb9c2017-10-10 23:26:24 +03001004 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1005 Py_INCREF(JOIN_OBJ(text));
1006 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007
Oren Milman39ecb9c2017-10-10 23:26:24 +03001008 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1009 Py_INCREF(JOIN_OBJ(tail));
1010 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011
1012 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001013 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001015 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001016
1017 /* Compute 'nchildren'. */
1018 if (children) {
1019 if (!PyList_Check(children)) {
1020 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1021 return NULL;
1022 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 nchildren = PyList_GET_SIZE(children);
1024
1025 /* (Re-)allocate 'extra'.
1026 Avoid DECREFs calling into this code again (cycles, etc.)
1027 */
1028 oldextra = self->extra;
1029 self->extra = NULL;
1030 if (element_resize(self, nchildren)) {
1031 assert(!self->extra || !self->extra->length);
1032 clear_extra(self);
1033 self->extra = oldextra;
1034 return NULL;
1035 }
1036 assert(self->extra);
1037 assert(self->extra->allocated >= nchildren);
1038 if (oldextra) {
1039 assert(self->extra->attrib == Py_None);
1040 self->extra->attrib = oldextra->attrib;
1041 oldextra->attrib = Py_None;
1042 }
1043
1044 /* Copy children */
1045 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001046 PyObject *child = PyList_GET_ITEM(children, i);
1047 if (!Element_Check(child)) {
1048 raise_type_error(child);
1049 self->extra->length = i;
1050 dealloc_extra(oldextra);
1051 return NULL;
1052 }
1053 Py_INCREF(child);
1054 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001055 }
1056
1057 assert(!self->extra->length);
1058 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001059 }
1060 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001061 if (element_resize(self, 0)) {
1062 return NULL;
1063 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001064 }
1065
Eli Bendersky698bdb22013-01-10 06:01:06 -08001066 /* Stash attrib. */
1067 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001068 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001069 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001070 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001072
1073 Py_RETURN_NONE;
1074}
1075
1076/* __setstate__ for Element instance from the Python implementation.
1077 * 'state' should be the instance dict.
1078 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001079
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080static PyObject *
1081element_setstate_from_Python(ElementObject *self, PyObject *state)
1082{
1083 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1084 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1085 PyObject *args;
1086 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001087 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001088
Eli Bendersky698bdb22013-01-10 06:01:06 -08001089 tag = attrib = text = tail = children = NULL;
1090 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001091 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001093
1094 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1095 &attrib, &text, &tail, &children))
1096 retval = element_setstate_from_attributes(self, tag, attrib, text,
1097 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001099 retval = NULL;
1100
1101 Py_DECREF(args);
1102 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001103}
1104
Serhiy Storchakacb985562015-05-04 15:32:48 +03001105/*[clinic input]
1106_elementtree.Element.__setstate__
1107
1108 state: object
1109 /
1110
1111[clinic start generated code]*/
1112
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001114_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1115/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001116{
1117 if (!PyDict_CheckExact(state)) {
1118 PyErr_Format(PyExc_TypeError,
1119 "Don't know how to unpickle \"%.200R\" as an Element",
1120 state);
1121 return NULL;
1122 }
1123 else
1124 return element_setstate_from_Python(self, state);
1125}
1126
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001127LOCAL(int)
1128checkpath(PyObject* tag)
1129{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001130 Py_ssize_t i;
1131 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132
1133 /* check if a tag contains an xpath character */
1134
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135#define PATHCHAR(ch) \
1136 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1140 void *data = PyUnicode_DATA(tag);
1141 unsigned int kind = PyUnicode_KIND(tag);
1142 for (i = 0; i < len; i++) {
1143 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1144 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001145 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 return 1;
1150 }
1151 return 0;
1152 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001153 if (PyBytes_Check(tag)) {
1154 char *p = PyBytes_AS_STRING(tag);
1155 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156 if (p[i] == '{')
1157 check = 0;
1158 else if (p[i] == '}')
1159 check = 1;
1160 else if (check && PATHCHAR(p[i]))
1161 return 1;
1162 }
1163 return 0;
1164 }
1165
1166 return 1; /* unknown type; might be path expression */
1167}
1168
Serhiy Storchakacb985562015-05-04 15:32:48 +03001169/*[clinic input]
1170_elementtree.Element.extend
1171
1172 elements: object
1173 /
1174
1175[clinic start generated code]*/
1176
1177static PyObject *
1178_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1179/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001180{
1181 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001182 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001183
Serhiy Storchakacb985562015-05-04 15:32:48 +03001184 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001185 if (!seq) {
1186 PyErr_Format(
1187 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001188 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001189 );
1190 return NULL;
1191 }
1192
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001194 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001196 if (element_add_subelement(self, element) < 0) {
1197 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001198 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 return NULL;
1200 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001201 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202 }
1203
1204 Py_DECREF(seq);
1205
1206 Py_RETURN_NONE;
1207}
1208
Serhiy Storchakacb985562015-05-04 15:32:48 +03001209/*[clinic input]
1210_elementtree.Element.find
1211
1212 path: object
1213 namespaces: object = None
1214
1215[clinic start generated code]*/
1216
1217static PyObject *
1218_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1219 PyObject *namespaces)
1220/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001222 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001223 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001224
Serhiy Storchakacb985562015-05-04 15:32:48 +03001225 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001226 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001227 return _PyObject_CallMethodIdObjArgs(
1228 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001230 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231
1232 if (!self->extra)
1233 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001234
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001235 for (i = 0; i < self->extra->length; i++) {
1236 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001238 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001240 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 Py_DECREF(item);
1244 if (rc < 0)
1245 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 }
1247
1248 Py_RETURN_NONE;
1249}
1250
Serhiy Storchakacb985562015-05-04 15:32:48 +03001251/*[clinic input]
1252_elementtree.Element.findtext
1253
1254 path: object
1255 default: object = None
1256 namespaces: object = None
1257
1258[clinic start generated code]*/
1259
1260static PyObject *
1261_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1262 PyObject *default_value,
1263 PyObject *namespaces)
1264/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001265{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001266 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001267 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001268 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001269
Serhiy Storchakacb985562015-05-04 15:32:48 +03001270 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001271 return _PyObject_CallMethodIdObjArgs(
1272 st->elementpath_obj, &PyId_findtext,
1273 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274 );
1275
1276 if (!self->extra) {
1277 Py_INCREF(default_value);
1278 return default_value;
1279 }
1280
1281 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001282 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001283 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001284 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001285 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001286 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001287 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001288 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001289 if (text == Py_None) {
1290 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001291 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001293 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295 return text;
1296 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 Py_DECREF(item);
1298 if (rc < 0)
1299 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 }
1301
1302 Py_INCREF(default_value);
1303 return default_value;
1304}
1305
Serhiy Storchakacb985562015-05-04 15:32:48 +03001306/*[clinic input]
1307_elementtree.Element.findall
1308
1309 path: object
1310 namespaces: object = None
1311
1312[clinic start generated code]*/
1313
1314static PyObject *
1315_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1316 PyObject *namespaces)
1317/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001319 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001321 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001322
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001323 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001324 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001325 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001326 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001327 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001328 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001329
1330 out = PyList_New(0);
1331 if (!out)
1332 return NULL;
1333
1334 if (!self->extra)
1335 return out;
1336
1337 for (i = 0; i < self->extra->length; i++) {
1338 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001339 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001340 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001341 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001342 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001343 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1344 Py_DECREF(item);
1345 Py_DECREF(out);
1346 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001348 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001349 }
1350
1351 return out;
1352}
1353
Serhiy Storchakacb985562015-05-04 15:32:48 +03001354/*[clinic input]
1355_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001356
Serhiy Storchakacb985562015-05-04 15:32:48 +03001357 path: object
1358 namespaces: object = None
1359
1360[clinic start generated code]*/
1361
1362static PyObject *
1363_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1364 PyObject *namespaces)
1365/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1366{
1367 PyObject* tag = path;
1368 _Py_IDENTIFIER(iterfind);
1369 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001370
Victor Stinnerf5616342016-12-09 15:26:00 +01001371 return _PyObject_CallMethodIdObjArgs(
1372 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001373}
1374
Serhiy Storchakacb985562015-05-04 15:32:48 +03001375/*[clinic input]
1376_elementtree.Element.get
1377
1378 key: object
1379 default: object = None
1380
1381[clinic start generated code]*/
1382
1383static PyObject *
1384_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1385 PyObject *default_value)
1386/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001387{
1388 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001389
1390 if (!self->extra || self->extra->attrib == Py_None)
1391 value = default_value;
1392 else {
1393 value = PyDict_GetItem(self->extra->attrib, key);
1394 if (!value)
1395 value = default_value;
1396 }
1397
1398 Py_INCREF(value);
1399 return value;
1400}
1401
Serhiy Storchakacb985562015-05-04 15:32:48 +03001402/*[clinic input]
1403_elementtree.Element.getchildren
1404
1405[clinic start generated code]*/
1406
1407static PyObject *
1408_elementtree_Element_getchildren_impl(ElementObject *self)
1409/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001411 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001412 PyObject* list;
1413
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001414 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1415 "This method will be removed in future versions. "
1416 "Use 'list(elem)' or iteration over elem instead.",
1417 1) < 0) {
1418 return NULL;
1419 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001420
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001421 if (!self->extra)
1422 return PyList_New(0);
1423
1424 list = PyList_New(self->extra->length);
1425 if (!list)
1426 return NULL;
1427
1428 for (i = 0; i < self->extra->length; i++) {
1429 PyObject* item = self->extra->children[i];
1430 Py_INCREF(item);
1431 PyList_SET_ITEM(list, i, item);
1432 }
1433
1434 return list;
1435}
1436
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001437
Eli Bendersky64d11e62012-06-15 07:42:50 +03001438static PyObject *
1439create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1440
1441
Serhiy Storchakacb985562015-05-04 15:32:48 +03001442/*[clinic input]
1443_elementtree.Element.iter
1444
1445 tag: object = None
1446
1447[clinic start generated code]*/
1448
Eli Bendersky64d11e62012-06-15 07:42:50 +03001449static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001450_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1451/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001453 if (PyUnicode_Check(tag)) {
1454 if (PyUnicode_READY(tag) < 0)
1455 return NULL;
1456 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1457 tag = Py_None;
1458 }
1459 else if (PyBytes_Check(tag)) {
1460 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1461 tag = Py_None;
1462 }
1463
Eli Bendersky64d11e62012-06-15 07:42:50 +03001464 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001465}
1466
1467
Serhiy Storchakacb985562015-05-04 15:32:48 +03001468/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001469_elementtree.Element.getiterator
1470
1471 tag: object = None
1472
1473[clinic start generated code]*/
1474
1475static PyObject *
1476_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1477/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1478{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001479 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001480 "This method will be removed in future versions. "
1481 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1482 1) < 0) {
1483 return NULL;
1484 }
1485 return _elementtree_Element_iter_impl(self, tag);
1486}
1487
1488
1489/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001491
Serhiy Storchakacb985562015-05-04 15:32:48 +03001492[clinic start generated code]*/
1493
1494static PyObject *
1495_elementtree_Element_itertext_impl(ElementObject *self)
1496/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1497{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001498 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499}
1500
Eli Bendersky64d11e62012-06-15 07:42:50 +03001501
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001503element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001504{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001505 ElementObject* self = (ElementObject*) self_;
1506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507 if (!self->extra || index < 0 || index >= self->extra->length) {
1508 PyErr_SetString(
1509 PyExc_IndexError,
1510 "child index out of range"
1511 );
1512 return NULL;
1513 }
1514
1515 Py_INCREF(self->extra->children[index]);
1516 return self->extra->children[index];
1517}
1518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519/*[clinic input]
1520_elementtree.Element.insert
1521
1522 index: Py_ssize_t
1523 subelement: object(subclass_of='&Element_Type')
1524 /
1525
1526[clinic start generated code]*/
1527
1528static PyObject *
1529_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1530 PyObject *subelement)
1531/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001533 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534
Victor Stinner5f0af232013-07-11 23:01:36 +02001535 if (!self->extra) {
1536 if (create_extra(self, NULL) < 0)
1537 return NULL;
1538 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001540 if (index < 0) {
1541 index += self->extra->length;
1542 if (index < 0)
1543 index = 0;
1544 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 if (index > self->extra->length)
1546 index = self->extra->length;
1547
1548 if (element_resize(self, 1) < 0)
1549 return NULL;
1550
1551 for (i = self->extra->length; i > index; i--)
1552 self->extra->children[i] = self->extra->children[i-1];
1553
Serhiy Storchakacb985562015-05-04 15:32:48 +03001554 Py_INCREF(subelement);
1555 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001556
1557 self->extra->length++;
1558
1559 Py_RETURN_NONE;
1560}
1561
Serhiy Storchakacb985562015-05-04 15:32:48 +03001562/*[clinic input]
1563_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564
Serhiy Storchakacb985562015-05-04 15:32:48 +03001565[clinic start generated code]*/
1566
1567static PyObject *
1568_elementtree_Element_items_impl(ElementObject *self)
1569/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1570{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571 if (!self->extra || self->extra->attrib == Py_None)
1572 return PyList_New(0);
1573
1574 return PyDict_Items(self->extra->attrib);
1575}
1576
Serhiy Storchakacb985562015-05-04 15:32:48 +03001577/*[clinic input]
1578_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579
Serhiy Storchakacb985562015-05-04 15:32:48 +03001580[clinic start generated code]*/
1581
1582static PyObject *
1583_elementtree_Element_keys_impl(ElementObject *self)
1584/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1585{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 if (!self->extra || self->extra->attrib == Py_None)
1587 return PyList_New(0);
1588
1589 return PyDict_Keys(self->extra->attrib);
1590}
1591
Martin v. Löwis18e16552006-02-15 17:27:45 +00001592static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593element_length(ElementObject* self)
1594{
1595 if (!self->extra)
1596 return 0;
1597
1598 return self->extra->length;
1599}
1600
Serhiy Storchakacb985562015-05-04 15:32:48 +03001601/*[clinic input]
1602_elementtree.Element.makeelement
1603
1604 tag: object
1605 attrib: object
1606 /
1607
1608[clinic start generated code]*/
1609
1610static PyObject *
1611_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1612 PyObject *attrib)
1613/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614{
1615 PyObject* elem;
1616
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617 attrib = PyDict_Copy(attrib);
1618 if (!attrib)
1619 return NULL;
1620
Eli Bendersky092af1f2012-03-04 07:14:03 +02001621 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622
1623 Py_DECREF(attrib);
1624
1625 return elem;
1626}
1627
Serhiy Storchakacb985562015-05-04 15:32:48 +03001628/*[clinic input]
1629_elementtree.Element.remove
1630
1631 subelement: object(subclass_of='&Element_Type')
1632 /
1633
1634[clinic start generated code]*/
1635
1636static PyObject *
1637_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1638/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001640 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001641 int rc;
1642 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644 if (!self->extra) {
1645 /* element has no children, so raise exception */
1646 PyErr_SetString(
1647 PyExc_ValueError,
1648 "list.remove(x): x not in list"
1649 );
1650 return NULL;
1651 }
1652
1653 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001654 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001656 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001657 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001659 if (rc < 0)
1660 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661 }
1662
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001663 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001664 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665 PyErr_SetString(
1666 PyExc_ValueError,
1667 "list.remove(x): x not in list"
1668 );
1669 return NULL;
1670 }
1671
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001672 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673
1674 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001675 for (; i < self->extra->length; i++)
1676 self->extra->children[i] = self->extra->children[i+1];
1677
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001678 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001679 Py_RETURN_NONE;
1680}
1681
1682static PyObject*
1683element_repr(ElementObject* self)
1684{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001685 int status;
1686
1687 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001688 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001689
1690 status = Py_ReprEnter((PyObject *)self);
1691 if (status == 0) {
1692 PyObject *res;
1693 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1694 Py_ReprLeave((PyObject *)self);
1695 return res;
1696 }
1697 if (status > 0)
1698 PyErr_Format(PyExc_RuntimeError,
1699 "reentrant call inside %s.__repr__",
1700 Py_TYPE(self)->tp_name);
1701 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001702}
1703
Serhiy Storchakacb985562015-05-04 15:32:48 +03001704/*[clinic input]
1705_elementtree.Element.set
1706
1707 key: object
1708 value: object
1709 /
1710
1711[clinic start generated code]*/
1712
1713static PyObject *
1714_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1715 PyObject *value)
1716/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001717{
1718 PyObject* attrib;
1719
Victor Stinner5f0af232013-07-11 23:01:36 +02001720 if (!self->extra) {
1721 if (create_extra(self, NULL) < 0)
1722 return NULL;
1723 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001724
1725 attrib = element_get_attrib(self);
1726 if (!attrib)
1727 return NULL;
1728
1729 if (PyDict_SetItem(attrib, key, value) < 0)
1730 return NULL;
1731
1732 Py_RETURN_NONE;
1733}
1734
1735static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001736element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001737{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001738 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001739 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740 PyObject* old;
1741
1742 if (!self->extra || index < 0 || index >= self->extra->length) {
1743 PyErr_SetString(
1744 PyExc_IndexError,
1745 "child assignment index out of range");
1746 return -1;
1747 }
1748
1749 old = self->extra->children[index];
1750
1751 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001752 if (!Element_Check(item)) {
1753 raise_type_error(item);
1754 return -1;
1755 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001756 Py_INCREF(item);
1757 self->extra->children[index] = item;
1758 } else {
1759 self->extra->length--;
1760 for (i = index; i < self->extra->length; i++)
1761 self->extra->children[i] = self->extra->children[i+1];
1762 }
1763
1764 Py_DECREF(old);
1765
1766 return 0;
1767}
1768
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769static PyObject*
1770element_subscr(PyObject* self_, PyObject* item)
1771{
1772 ElementObject* self = (ElementObject*) self_;
1773
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001774 if (PyIndex_Check(item)) {
1775 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776
1777 if (i == -1 && PyErr_Occurred()) {
1778 return NULL;
1779 }
1780 if (i < 0 && self->extra)
1781 i += self->extra->length;
1782 return element_getitem(self_, i);
1783 }
1784 else if (PySlice_Check(item)) {
1785 Py_ssize_t start, stop, step, slicelen, cur, i;
1786 PyObject* list;
1787
1788 if (!self->extra)
1789 return PyList_New(0);
1790
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001791 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001792 return NULL;
1793 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001794 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1795 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796
1797 if (slicelen <= 0)
1798 return PyList_New(0);
1799 else {
1800 list = PyList_New(slicelen);
1801 if (!list)
1802 return NULL;
1803
1804 for (cur = start, i = 0; i < slicelen;
1805 cur += step, i++) {
1806 PyObject* item = self->extra->children[cur];
1807 Py_INCREF(item);
1808 PyList_SET_ITEM(list, i, item);
1809 }
1810
1811 return list;
1812 }
1813 }
1814 else {
1815 PyErr_SetString(PyExc_TypeError,
1816 "element indices must be integers");
1817 return NULL;
1818 }
1819}
1820
1821static int
1822element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1823{
1824 ElementObject* self = (ElementObject*) self_;
1825
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001826 if (PyIndex_Check(item)) {
1827 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001828
1829 if (i == -1 && PyErr_Occurred()) {
1830 return -1;
1831 }
1832 if (i < 0 && self->extra)
1833 i += self->extra->length;
1834 return element_setitem(self_, i, value);
1835 }
1836 else if (PySlice_Check(item)) {
1837 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1838
1839 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001840 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841
Victor Stinner5f0af232013-07-11 23:01:36 +02001842 if (!self->extra) {
1843 if (create_extra(self, NULL) < 0)
1844 return -1;
1845 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001846
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001847 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001848 return -1;
1849 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001850 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1851 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852
Eli Bendersky865756a2012-03-09 13:38:15 +02001853 if (value == NULL) {
1854 /* Delete slice */
1855 size_t cur;
1856 Py_ssize_t i;
1857
1858 if (slicelen <= 0)
1859 return 0;
1860
1861 /* Since we're deleting, the direction of the range doesn't matter,
1862 * so for simplicity make it always ascending.
1863 */
1864 if (step < 0) {
1865 stop = start + 1;
1866 start = stop + step * (slicelen - 1) - 1;
1867 step = -step;
1868 }
1869
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001870 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001871
1872 /* recycle is a list that will contain all the children
1873 * scheduled for removal.
1874 */
1875 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001876 return -1;
1877 }
1878
1879 /* This loop walks over all the children that have to be deleted,
1880 * with cur pointing at them. num_moved is the amount of children
1881 * until the next deleted child that have to be "shifted down" to
1882 * occupy the deleted's places.
1883 * Note that in the ith iteration, shifting is done i+i places down
1884 * because i children were already removed.
1885 */
1886 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1887 /* Compute how many children have to be moved, clipping at the
1888 * list end.
1889 */
1890 Py_ssize_t num_moved = step - 1;
1891 if (cur + step >= (size_t)self->extra->length) {
1892 num_moved = self->extra->length - cur - 1;
1893 }
1894
1895 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1896
1897 memmove(
1898 self->extra->children + cur - i,
1899 self->extra->children + cur + 1,
1900 num_moved * sizeof(PyObject *));
1901 }
1902
1903 /* Leftover "tail" after the last removed child */
1904 cur = start + (size_t)slicelen * step;
1905 if (cur < (size_t)self->extra->length) {
1906 memmove(
1907 self->extra->children + cur - slicelen,
1908 self->extra->children + cur,
1909 (self->extra->length - cur) * sizeof(PyObject *));
1910 }
1911
1912 self->extra->length -= slicelen;
1913
1914 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001915 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001916 return 0;
1917 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001918
1919 /* A new slice is actually being assigned */
1920 seq = PySequence_Fast(value, "");
1921 if (!seq) {
1922 PyErr_Format(
1923 PyExc_TypeError,
1924 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1925 );
1926 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001927 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001928 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001929
1930 if (step != 1 && newlen != slicelen)
1931 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001932 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001933 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001934 "attempt to assign sequence of size %zd "
1935 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001936 newlen, slicelen
1937 );
1938 return -1;
1939 }
1940
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001941 /* Resize before creating the recycle bin, to prevent refleaks. */
1942 if (newlen > slicelen) {
1943 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001944 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001945 return -1;
1946 }
1947 }
1948
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001949 for (i = 0; i < newlen; i++) {
1950 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1951 if (!Element_Check(element)) {
1952 raise_type_error(element);
1953 Py_DECREF(seq);
1954 return -1;
1955 }
1956 }
1957
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001958 if (slicelen > 0) {
1959 /* to avoid recursive calls to this method (via decref), move
1960 old items to the recycle bin here, and get rid of them when
1961 we're done modifying the element */
1962 recycle = PyList_New(slicelen);
1963 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001964 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001965 return -1;
1966 }
1967 for (cur = start, i = 0; i < slicelen;
1968 cur += step, i++)
1969 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1970 }
1971
1972 if (newlen < slicelen) {
1973 /* delete slice */
1974 for (i = stop; i < self->extra->length; i++)
1975 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1976 } else if (newlen > slicelen) {
1977 /* insert slice */
1978 for (i = self->extra->length-1; i >= stop; i--)
1979 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1980 }
1981
1982 /* replace the slice */
1983 for (cur = start, i = 0; i < newlen;
1984 cur += step, i++) {
1985 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1986 Py_INCREF(element);
1987 self->extra->children[cur] = element;
1988 }
1989
1990 self->extra->length += newlen - slicelen;
1991
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001992 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001993
1994 /* discard the recycle bin, and everything in it */
1995 Py_XDECREF(recycle);
1996
1997 return 0;
1998 }
1999 else {
2000 PyErr_SetString(PyExc_TypeError,
2001 "element indices must be integers");
2002 return -1;
2003 }
2004}
2005
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002006static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002007element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002009 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002010 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002011 return res;
2012}
2013
Serhiy Storchakadde08152015-11-25 15:28:13 +02002014static PyObject*
2015element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002016{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002017 PyObject *res = element_get_text(self);
2018 Py_XINCREF(res);
2019 return res;
2020}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002021
Serhiy Storchakadde08152015-11-25 15:28:13 +02002022static PyObject*
2023element_tail_getter(ElementObject *self, void *closure)
2024{
2025 PyObject *res = element_get_tail(self);
2026 Py_XINCREF(res);
2027 return res;
2028}
2029
2030static PyObject*
2031element_attrib_getter(ElementObject *self, void *closure)
2032{
2033 PyObject *res;
2034 if (!self->extra) {
2035 if (create_extra(self, NULL) < 0)
2036 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002037 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002038 res = element_get_attrib(self);
2039 Py_XINCREF(res);
2040 return res;
2041}
Victor Stinner4d463432013-07-11 23:05:03 +02002042
Serhiy Storchakadde08152015-11-25 15:28:13 +02002043/* macro for setter validation */
2044#define _VALIDATE_ATTR_VALUE(V) \
2045 if ((V) == NULL) { \
2046 PyErr_SetString( \
2047 PyExc_AttributeError, \
2048 "can't delete element attribute"); \
2049 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002050 }
2051
Serhiy Storchakadde08152015-11-25 15:28:13 +02002052static int
2053element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2054{
2055 _VALIDATE_ATTR_VALUE(value);
2056 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002057 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002058 return 0;
2059}
2060
2061static int
2062element_text_setter(ElementObject *self, PyObject *value, void *closure)
2063{
2064 _VALIDATE_ATTR_VALUE(value);
2065 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002066 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002067 return 0;
2068}
2069
2070static int
2071element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2072{
2073 _VALIDATE_ATTR_VALUE(value);
2074 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002075 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002076 return 0;
2077}
2078
2079static int
2080element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2081{
2082 _VALIDATE_ATTR_VALUE(value);
2083 if (!self->extra) {
2084 if (create_extra(self, NULL) < 0)
2085 return -1;
2086 }
2087 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002088 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002089 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002090}
2091
2092static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002093 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002094 0, /* sq_concat */
2095 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002096 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002097 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002098 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002099 0,
2100};
2101
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102/******************************* Element iterator ****************************/
2103
2104/* ElementIterObject represents the iteration state over an XML element in
2105 * pre-order traversal. To keep track of which sub-element should be returned
2106 * next, a stack of parents is maintained. This is a standard stack-based
2107 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002108 * The stack is managed using a continuous array.
2109 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002110 * the current one is exhausted, and the next child to examine in that parent.
2111 */
2112typedef struct ParentLocator_t {
2113 ElementObject *parent;
2114 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115} ParentLocator;
2116
2117typedef struct {
2118 PyObject_HEAD
2119 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002120 Py_ssize_t parent_stack_used;
2121 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002122 ElementObject *root_element;
2123 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 int gettext;
2125} ElementIterObject;
2126
2127
2128static void
2129elementiter_dealloc(ElementIterObject *it)
2130{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002131 Py_ssize_t i = it->parent_stack_used;
2132 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002133 /* bpo-31095: UnTrack is needed before calling any callbacks */
2134 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002135 while (i--)
2136 Py_XDECREF(it->parent_stack[i].parent);
2137 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138
2139 Py_XDECREF(it->sought_tag);
2140 Py_XDECREF(it->root_element);
2141
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142 PyObject_GC_Del(it);
2143}
2144
2145static int
2146elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2147{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002148 Py_ssize_t i = it->parent_stack_used;
2149 while (i--)
2150 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002151
2152 Py_VISIT(it->root_element);
2153 Py_VISIT(it->sought_tag);
2154 return 0;
2155}
2156
2157/* Helper function for elementiter_next. Add a new parent to the parent stack.
2158 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002159static int
2160parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002161{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002162 ParentLocator *item;
2163
2164 if (it->parent_stack_used >= it->parent_stack_size) {
2165 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2166 ParentLocator *parent_stack = it->parent_stack;
2167 PyMem_Resize(parent_stack, ParentLocator, new_size);
2168 if (parent_stack == NULL)
2169 return -1;
2170 it->parent_stack = parent_stack;
2171 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002172 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002173 item = it->parent_stack + it->parent_stack_used++;
2174 Py_INCREF(parent);
2175 item->parent = parent;
2176 item->child_index = 0;
2177 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002178}
2179
2180static PyObject *
2181elementiter_next(ElementIterObject *it)
2182{
2183 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002184 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002185 * A short note on gettext: this function serves both the iter() and
2186 * itertext() methods to avoid code duplication. However, there are a few
2187 * small differences in the way these iterations work. Namely:
2188 * - itertext() only yields text from nodes that have it, and continues
2189 * iterating when a node doesn't have text (so it doesn't return any
2190 * node like iter())
2191 * - itertext() also has to handle tail, after finishing with all the
2192 * children of a node.
2193 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002194 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002195 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002196 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002197
2198 while (1) {
2199 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002200 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002201 * iterator is exhausted.
2202 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002203 if (!it->parent_stack_used) {
2204 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002205 PyErr_SetNone(PyExc_StopIteration);
2206 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207 }
2208
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002209 elem = it->root_element; /* steals a reference */
2210 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002211 }
2212 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002213 /* See if there are children left to traverse in the current parent. If
2214 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002215 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002216 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2217 Py_ssize_t child_index = item->child_index;
2218 ElementObjectExtra *extra;
2219 elem = item->parent;
2220 extra = elem->extra;
2221 if (!extra || child_index >= extra->length) {
2222 it->parent_stack_used--;
2223 /* Note that extra condition on it->parent_stack_used here;
2224 * this is because itertext() is supposed to only return *inner*
2225 * text, not text following the element it began iteration with.
2226 */
2227 if (it->gettext && it->parent_stack_used) {
2228 text = element_get_tail(elem);
2229 goto gettext;
2230 }
2231 Py_DECREF(elem);
2232 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002233 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002234
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002235 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002236 elem = (ElementObject *)extra->children[child_index];
2237 item->child_index++;
2238 Py_INCREF(elem);
2239 }
2240
2241 if (parent_stack_push_new(it, elem) < 0) {
2242 Py_DECREF(elem);
2243 PyErr_NoMemory();
2244 return NULL;
2245 }
2246 if (it->gettext) {
2247 text = element_get_text(elem);
2248 goto gettext;
2249 }
2250
2251 if (it->sought_tag == Py_None)
2252 return (PyObject *)elem;
2253
2254 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2255 if (rc > 0)
2256 return (PyObject *)elem;
2257
2258 Py_DECREF(elem);
2259 if (rc < 0)
2260 return NULL;
2261 continue;
2262
2263gettext:
2264 if (!text) {
2265 Py_DECREF(elem);
2266 return NULL;
2267 }
2268 if (text == Py_None) {
2269 Py_DECREF(elem);
2270 }
2271 else {
2272 Py_INCREF(text);
2273 Py_DECREF(elem);
2274 rc = PyObject_IsTrue(text);
2275 if (rc > 0)
2276 return text;
2277 Py_DECREF(text);
2278 if (rc < 0)
2279 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002280 }
2281 }
2282
2283 return NULL;
2284}
2285
2286
2287static PyTypeObject ElementIter_Type = {
2288 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002289 /* Using the module's name since the pure-Python implementation does not
2290 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002291 "_elementtree._element_iterator", /* tp_name */
2292 sizeof(ElementIterObject), /* tp_basicsize */
2293 0, /* tp_itemsize */
2294 /* methods */
2295 (destructor)elementiter_dealloc, /* tp_dealloc */
2296 0, /* tp_print */
2297 0, /* tp_getattr */
2298 0, /* tp_setattr */
2299 0, /* tp_reserved */
2300 0, /* tp_repr */
2301 0, /* tp_as_number */
2302 0, /* tp_as_sequence */
2303 0, /* tp_as_mapping */
2304 0, /* tp_hash */
2305 0, /* tp_call */
2306 0, /* tp_str */
2307 0, /* tp_getattro */
2308 0, /* tp_setattro */
2309 0, /* tp_as_buffer */
2310 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2311 0, /* tp_doc */
2312 (traverseproc)elementiter_traverse, /* tp_traverse */
2313 0, /* tp_clear */
2314 0, /* tp_richcompare */
2315 0, /* tp_weaklistoffset */
2316 PyObject_SelfIter, /* tp_iter */
2317 (iternextfunc)elementiter_next, /* tp_iternext */
2318 0, /* tp_methods */
2319 0, /* tp_members */
2320 0, /* tp_getset */
2321 0, /* tp_base */
2322 0, /* tp_dict */
2323 0, /* tp_descr_get */
2324 0, /* tp_descr_set */
2325 0, /* tp_dictoffset */
2326 0, /* tp_init */
2327 0, /* tp_alloc */
2328 0, /* tp_new */
2329};
2330
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002331#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002332
2333static PyObject *
2334create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2335{
2336 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002337
2338 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2339 if (!it)
2340 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002341
Victor Stinner4d463432013-07-11 23:05:03 +02002342 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002343 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002344 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002345 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002346 it->root_element = self;
2347
Eli Bendersky64d11e62012-06-15 07:42:50 +03002348 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002349
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002350 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002351 if (it->parent_stack == NULL) {
2352 Py_DECREF(it);
2353 PyErr_NoMemory();
2354 return NULL;
2355 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002356 it->parent_stack_used = 0;
2357 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002358
Eli Bendersky64d11e62012-06-15 07:42:50 +03002359 return (PyObject *)it;
2360}
2361
2362
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363/* ==================================================================== */
2364/* the tree builder type */
2365
2366typedef struct {
2367 PyObject_HEAD
2368
Eli Bendersky58d548d2012-05-29 15:45:16 +03002369 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370
Antoine Pitrouee329312012-10-04 19:53:29 +02002371 PyObject *this; /* current node */
2372 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373
Eli Bendersky58d548d2012-05-29 15:45:16 +03002374 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002375
Eli Bendersky58d548d2012-05-29 15:45:16 +03002376 PyObject *stack; /* element stack */
2377 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378
Eli Bendersky48d358b2012-05-30 17:57:50 +03002379 PyObject *element_factory;
2380
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002382 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2384 PyObject *end_event_obj;
2385 PyObject *start_ns_event_obj;
2386 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387} TreeBuilderObject;
2388
Christian Heimes90aa7642007-12-19 02:45:37 +00002389#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390
2391/* -------------------------------------------------------------------- */
2392/* constructor and destructor */
2393
Eli Bendersky58d548d2012-05-29 15:45:16 +03002394static PyObject *
2395treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002397 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2398 if (t != NULL) {
2399 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400
Eli Bendersky58d548d2012-05-29 15:45:16 +03002401 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002402 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002403 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002404 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405
Eli Bendersky58d548d2012-05-29 15:45:16 +03002406 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002407 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002408 t->stack = PyList_New(20);
2409 if (!t->stack) {
2410 Py_DECREF(t->this);
2411 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002412 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002413 return NULL;
2414 }
2415 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002416
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002417 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002418 t->start_event_obj = t->end_event_obj = NULL;
2419 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2420 }
2421 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422}
2423
Serhiy Storchakacb985562015-05-04 15:32:48 +03002424/*[clinic input]
2425_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002426
Serhiy Storchakacb985562015-05-04 15:32:48 +03002427 element_factory: object = NULL
2428
2429[clinic start generated code]*/
2430
2431static int
2432_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2433 PyObject *element_factory)
2434/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2435{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002436 if (element_factory) {
2437 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002438 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002439 }
2440
Eli Bendersky58d548d2012-05-29 15:45:16 +03002441 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442}
2443
Eli Bendersky48d358b2012-05-30 17:57:50 +03002444static int
2445treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2446{
2447 Py_VISIT(self->root);
2448 Py_VISIT(self->this);
2449 Py_VISIT(self->last);
2450 Py_VISIT(self->data);
2451 Py_VISIT(self->stack);
2452 Py_VISIT(self->element_factory);
2453 return 0;
2454}
2455
2456static int
2457treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002458{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002459 Py_CLEAR(self->end_ns_event_obj);
2460 Py_CLEAR(self->start_ns_event_obj);
2461 Py_CLEAR(self->end_event_obj);
2462 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002463 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002464 Py_CLEAR(self->stack);
2465 Py_CLEAR(self->data);
2466 Py_CLEAR(self->last);
2467 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002468 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002469 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002470 return 0;
2471}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472
Eli Bendersky48d358b2012-05-30 17:57:50 +03002473static void
2474treebuilder_dealloc(TreeBuilderObject *self)
2475{
2476 PyObject_GC_UnTrack(self);
2477 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002478 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479}
2480
2481/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002482/* helpers for handling of arbitrary element-like objects */
2483
2484static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002485treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002486 PyObject **dest, _Py_Identifier *name)
2487{
2488 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002489 PyObject *tmp = JOIN_OBJ(*dest);
2490 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2491 *data = NULL;
2492 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002493 return 0;
2494 }
2495 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002496 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002497 int r;
2498 if (joined == NULL)
2499 return -1;
2500 r = _PyObject_SetAttrId(element, name, joined);
2501 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002502 if (r < 0)
2503 return -1;
2504 Py_CLEAR(*data);
2505 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002506 }
2507}
2508
Serhiy Storchaka576def02017-03-30 09:47:31 +03002509LOCAL(int)
2510treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002511{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002512 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002513
Serhiy Storchaka576def02017-03-30 09:47:31 +03002514 if (!self->data) {
2515 return 0;
2516 }
2517
2518 if (self->this == element) {
2519 _Py_IDENTIFIER(text);
2520 return treebuilder_set_element_text_or_tail(
2521 element, &self->data,
2522 &((ElementObject *) element)->text, &PyId_text);
2523 }
2524 else {
2525 _Py_IDENTIFIER(tail);
2526 return treebuilder_set_element_text_or_tail(
2527 element, &self->data,
2528 &((ElementObject *) element)->tail, &PyId_tail);
2529 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002530}
2531
2532static int
2533treebuilder_add_subelement(PyObject *element, PyObject *child)
2534{
2535 _Py_IDENTIFIER(append);
2536 if (Element_CheckExact(element)) {
2537 ElementObject *elem = (ElementObject *) element;
2538 return element_add_subelement(elem, child);
2539 }
2540 else {
2541 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002542 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002543 if (res == NULL)
2544 return -1;
2545 Py_DECREF(res);
2546 return 0;
2547 }
2548}
2549
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002550LOCAL(int)
2551treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2552 PyObject *node)
2553{
2554 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002555 PyObject *res;
2556 PyObject *event = PyTuple_Pack(2, action, node);
2557 if (event == NULL)
2558 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002559 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002560 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002561 if (res == NULL)
2562 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002563 Py_DECREF(res);
2564 }
2565 return 0;
2566}
2567
Antoine Pitrouee329312012-10-04 19:53:29 +02002568/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569/* handlers */
2570
2571LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2573 PyObject* attrib)
2574{
2575 PyObject* node;
2576 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002577 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578
Serhiy Storchaka576def02017-03-30 09:47:31 +03002579 if (treebuilder_flush_data(self) < 0) {
2580 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581 }
2582
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002583 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002584 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002585 } else if (attrib == Py_None) {
2586 attrib = PyDict_New();
2587 if (!attrib)
2588 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002589 node = PyObject_CallFunctionObjArgs(self->element_factory,
2590 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002591 Py_DECREF(attrib);
2592 }
2593 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002594 node = PyObject_CallFunctionObjArgs(self->element_factory,
2595 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002596 }
2597 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002599 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600
Antoine Pitrouee329312012-10-04 19:53:29 +02002601 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602
2603 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002604 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002605 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 } else {
2607 if (self->root) {
2608 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002609 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 "multiple elements on top level"
2611 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002612 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613 }
2614 Py_INCREF(node);
2615 self->root = node;
2616 }
2617
2618 if (self->index < PyList_GET_SIZE(self->stack)) {
2619 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002620 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 Py_INCREF(this);
2622 } else {
2623 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002624 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625 }
2626 self->index++;
2627
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002629 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002631 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002633 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2634 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635
2636 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002637
2638 error:
2639 Py_DECREF(node);
2640 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641}
2642
2643LOCAL(PyObject*)
2644treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2645{
2646 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002647 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002648 /* ignore calls to data before the first call to start */
2649 Py_RETURN_NONE;
2650 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651 /* store the first item as is */
2652 Py_INCREF(data); self->data = data;
2653 } else {
2654 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002655 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2656 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002657 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658 /* expat often generates single character data sections; handle
2659 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002660 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2661 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002663 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664 } else if (PyList_CheckExact(self->data)) {
2665 if (PyList_Append(self->data, data) < 0)
2666 return NULL;
2667 } else {
2668 PyObject* list = PyList_New(2);
2669 if (!list)
2670 return NULL;
2671 PyList_SET_ITEM(list, 0, self->data);
2672 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2673 self->data = list;
2674 }
2675 }
2676
2677 Py_RETURN_NONE;
2678}
2679
2680LOCAL(PyObject*)
2681treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2682{
2683 PyObject* item;
2684
Serhiy Storchaka576def02017-03-30 09:47:31 +03002685 if (treebuilder_flush_data(self) < 0) {
2686 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 }
2688
2689 if (self->index == 0) {
2690 PyErr_SetString(
2691 PyExc_IndexError,
2692 "pop from empty stack"
2693 );
2694 return NULL;
2695 }
2696
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002697 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002698 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002699 self->index--;
2700 self->this = PyList_GET_ITEM(self->stack, self->index);
2701 Py_INCREF(self->this);
2702 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002704 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2705 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
2707 Py_INCREF(self->last);
2708 return (PyObject*) self->last;
2709}
2710
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711/* -------------------------------------------------------------------- */
2712/* methods (in alphabetical order) */
2713
Serhiy Storchakacb985562015-05-04 15:32:48 +03002714/*[clinic input]
2715_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Serhiy Storchakacb985562015-05-04 15:32:48 +03002717 data: object
2718 /
2719
2720[clinic start generated code]*/
2721
2722static PyObject *
2723_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2724/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2725{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726 return treebuilder_handle_data(self, data);
2727}
2728
Serhiy Storchakacb985562015-05-04 15:32:48 +03002729/*[clinic input]
2730_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Serhiy Storchakacb985562015-05-04 15:32:48 +03002732 tag: object
2733 /
2734
2735[clinic start generated code]*/
2736
2737static PyObject *
2738_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2739/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2740{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741 return treebuilder_handle_end(self, tag);
2742}
2743
2744LOCAL(PyObject*)
2745treebuilder_done(TreeBuilderObject* self)
2746{
2747 PyObject* res;
2748
2749 /* FIXME: check stack size? */
2750
2751 if (self->root)
2752 res = self->root;
2753 else
2754 res = Py_None;
2755
2756 Py_INCREF(res);
2757 return res;
2758}
2759
Serhiy Storchakacb985562015-05-04 15:32:48 +03002760/*[clinic input]
2761_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762
Serhiy Storchakacb985562015-05-04 15:32:48 +03002763[clinic start generated code]*/
2764
2765static PyObject *
2766_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2767/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2768{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769 return treebuilder_done(self);
2770}
2771
Serhiy Storchakacb985562015-05-04 15:32:48 +03002772/*[clinic input]
2773_elementtree.TreeBuilder.start
2774
2775 tag: object
2776 attrs: object = None
2777 /
2778
2779[clinic start generated code]*/
2780
2781static PyObject *
2782_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2783 PyObject *attrs)
2784/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002786 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787}
2788
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789/* ==================================================================== */
2790/* the expat interface */
2791
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002794
2795/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2796 * cached globally without being in per-module state.
2797 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002798static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800
Eli Bendersky52467b12012-06-01 07:13:08 +03002801static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2802 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804typedef struct {
2805 PyObject_HEAD
2806
2807 XML_Parser parser;
2808
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002809 PyObject *target;
2810 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002812 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002814 PyObject *handle_start;
2815 PyObject *handle_data;
2816 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002817
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002818 PyObject *handle_comment;
2819 PyObject *handle_pi;
2820 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002822 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824} XMLParserObject;
2825
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826/* helpers */
2827
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828LOCAL(PyObject*)
2829makeuniversal(XMLParserObject* self, const char* string)
2830{
2831 /* convert a UTF-8 tag/attribute name from the expat parser
2832 to a universal name string */
2833
Antoine Pitrouc1948842012-10-01 23:40:37 +02002834 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835 PyObject* key;
2836 PyObject* value;
2837
2838 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002839 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002840 if (!key)
2841 return NULL;
2842
2843 value = PyDict_GetItem(self->names, key);
2844
2845 if (value) {
2846 Py_INCREF(value);
2847 } else {
2848 /* new name. convert to universal name, and decode as
2849 necessary */
2850
2851 PyObject* tag;
2852 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002853 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854
2855 /* look for namespace separator */
2856 for (i = 0; i < size; i++)
2857 if (string[i] == '}')
2858 break;
2859 if (i != size) {
2860 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002861 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002862 if (tag == NULL) {
2863 Py_DECREF(key);
2864 return NULL;
2865 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002866 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002867 p[0] = '{';
2868 memcpy(p+1, string, size);
2869 size++;
2870 } else {
2871 /* plain name; use key as tag */
2872 Py_INCREF(key);
2873 tag = key;
2874 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002875
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002876 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002877 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002878 value = PyUnicode_DecodeUTF8(p, size, "strict");
2879 Py_DECREF(tag);
2880 if (!value) {
2881 Py_DECREF(key);
2882 return NULL;
2883 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002884
2885 /* add to names dictionary */
2886 if (PyDict_SetItem(self->names, key, value) < 0) {
2887 Py_DECREF(key);
2888 Py_DECREF(value);
2889 return NULL;
2890 }
2891 }
2892
2893 Py_DECREF(key);
2894 return value;
2895}
2896
Eli Bendersky5b77d812012-03-16 08:20:05 +02002897/* Set the ParseError exception with the given parameters.
2898 * If message is not NULL, it's used as the error string. Otherwise, the
2899 * message string is the default for the given error_code.
2900*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002901static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002902expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2903 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002905 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002906 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002907
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002908 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002909 message ? message : EXPAT(ErrorString)(error_code),
2910 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002911 if (errmsg == NULL)
2912 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002913
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002914 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002915 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002916 if (!error)
2917 return;
2918
Eli Bendersky5b77d812012-03-16 08:20:05 +02002919 /* Add code and position attributes */
2920 code = PyLong_FromLong((long)error_code);
2921 if (!code) {
2922 Py_DECREF(error);
2923 return;
2924 }
2925 if (PyObject_SetAttrString(error, "code", code) == -1) {
2926 Py_DECREF(error);
2927 Py_DECREF(code);
2928 return;
2929 }
2930 Py_DECREF(code);
2931
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002932 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002933 if (!position) {
2934 Py_DECREF(error);
2935 return;
2936 }
2937 if (PyObject_SetAttrString(error, "position", position) == -1) {
2938 Py_DECREF(error);
2939 Py_DECREF(position);
2940 return;
2941 }
2942 Py_DECREF(position);
2943
Eli Bendersky532d03e2013-08-10 08:00:39 -07002944 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002945 Py_DECREF(error);
2946}
2947
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948/* -------------------------------------------------------------------- */
2949/* handlers */
2950
2951static void
2952expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2953 int data_len)
2954{
2955 PyObject* key;
2956 PyObject* value;
2957 PyObject* res;
2958
2959 if (data_len < 2 || data_in[0] != '&')
2960 return;
2961
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002962 if (PyErr_Occurred())
2963 return;
2964
Neal Norwitz0269b912007-08-08 06:56:02 +00002965 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 if (!key)
2967 return;
2968
2969 value = PyDict_GetItem(self->entity, key);
2970
2971 if (value) {
2972 if (TreeBuilder_CheckExact(self->target))
2973 res = treebuilder_handle_data(
2974 (TreeBuilderObject*) self->target, value
2975 );
2976 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002977 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 else
2979 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002981 } else if (!PyErr_Occurred()) {
2982 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002983 char message[128] = "undefined entity ";
2984 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002985 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002986 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002988 EXPAT(GetErrorColumnNumber)(self->parser),
2989 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 );
2991 }
2992
2993 Py_DECREF(key);
2994}
2995
2996static void
2997expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2998 const XML_Char **attrib_in)
2999{
3000 PyObject* res;
3001 PyObject* tag;
3002 PyObject* attrib;
3003 int ok;
3004
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003005 if (PyErr_Occurred())
3006 return;
3007
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 /* tag name */
3009 tag = makeuniversal(self, tag_in);
3010 if (!tag)
3011 return; /* parser will look for errors */
3012
3013 /* attributes */
3014 if (attrib_in[0]) {
3015 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003016 if (!attrib) {
3017 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003019 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003020 while (attrib_in[0] && attrib_in[1]) {
3021 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003022 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 if (!key || !value) {
3024 Py_XDECREF(value);
3025 Py_XDECREF(key);
3026 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003027 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028 return;
3029 }
3030 ok = PyDict_SetItem(attrib, key, value);
3031 Py_DECREF(value);
3032 Py_DECREF(key);
3033 if (ok < 0) {
3034 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003035 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 return;
3037 }
3038 attrib_in += 2;
3039 }
3040 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003041 Py_INCREF(Py_None);
3042 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003043 }
3044
3045 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003046 /* shortcut */
3047 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3048 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003049 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003050 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003051 if (attrib == Py_None) {
3052 Py_DECREF(attrib);
3053 attrib = PyDict_New();
3054 if (!attrib) {
3055 Py_DECREF(tag);
3056 return;
3057 }
3058 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003059 res = PyObject_CallFunctionObjArgs(self->handle_start,
3060 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003061 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 res = NULL;
3063
3064 Py_DECREF(tag);
3065 Py_DECREF(attrib);
3066
3067 Py_XDECREF(res);
3068}
3069
3070static void
3071expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3072 int data_len)
3073{
3074 PyObject* data;
3075 PyObject* res;
3076
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003077 if (PyErr_Occurred())
3078 return;
3079
Neal Norwitz0269b912007-08-08 06:56:02 +00003080 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003081 if (!data)
3082 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083
3084 if (TreeBuilder_CheckExact(self->target))
3085 /* shortcut */
3086 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3087 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003088 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089 else
3090 res = NULL;
3091
3092 Py_DECREF(data);
3093
3094 Py_XDECREF(res);
3095}
3096
3097static void
3098expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3099{
3100 PyObject* tag;
3101 PyObject* res = NULL;
3102
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003103 if (PyErr_Occurred())
3104 return;
3105
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106 if (TreeBuilder_CheckExact(self->target))
3107 /* shortcut */
3108 /* the standard tree builder doesn't look at the end tag */
3109 res = treebuilder_handle_end(
3110 (TreeBuilderObject*) self->target, Py_None
3111 );
3112 else if (self->handle_end) {
3113 tag = makeuniversal(self, tag_in);
3114 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003115 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116 Py_DECREF(tag);
3117 }
3118 }
3119
3120 Py_XDECREF(res);
3121}
3122
3123static void
3124expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3125 const XML_Char *uri)
3126{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003127 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3128 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003129
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003130 if (PyErr_Occurred())
3131 return;
3132
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003133 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003134 return;
3135
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003136 if (!uri)
3137 uri = "";
3138 if (!prefix)
3139 prefix = "";
3140
3141 parcel = Py_BuildValue("ss", prefix, uri);
3142 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003143 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003144 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3145 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003146}
3147
3148static void
3149expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3150{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003151 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3152
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003153 if (PyErr_Occurred())
3154 return;
3155
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003156 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003157 return;
3158
3159 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160}
3161
3162static void
3163expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3164{
3165 PyObject* comment;
3166 PyObject* res;
3167
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003168 if (PyErr_Occurred())
3169 return;
3170
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003172 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003173 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003174 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3175 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176 Py_XDECREF(res);
3177 Py_DECREF(comment);
3178 }
3179 }
3180}
3181
Eli Bendersky45839902013-01-13 05:14:47 -08003182static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003183expat_start_doctype_handler(XMLParserObject *self,
3184 const XML_Char *doctype_name,
3185 const XML_Char *sysid,
3186 const XML_Char *pubid,
3187 int has_internal_subset)
3188{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003189 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003190 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003191 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003192
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003193 if (PyErr_Occurred())
3194 return;
3195
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003196 doctype_name_obj = makeuniversal(self, doctype_name);
3197 if (!doctype_name_obj)
3198 return;
3199
3200 if (sysid) {
3201 sysid_obj = makeuniversal(self, sysid);
3202 if (!sysid_obj) {
3203 Py_DECREF(doctype_name_obj);
3204 return;
3205 }
3206 } else {
3207 Py_INCREF(Py_None);
3208 sysid_obj = Py_None;
3209 }
3210
3211 if (pubid) {
3212 pubid_obj = makeuniversal(self, pubid);
3213 if (!pubid_obj) {
3214 Py_DECREF(doctype_name_obj);
3215 Py_DECREF(sysid_obj);
3216 return;
3217 }
3218 } else {
3219 Py_INCREF(Py_None);
3220 pubid_obj = Py_None;
3221 }
3222
3223 /* If the target has a handler for doctype, call it. */
3224 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003225 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3226 doctype_name_obj, pubid_obj,
3227 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003228 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003229 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003230 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3231 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3232 "The doctype() method of XMLParser is ignored. "
3233 "Define doctype() method on the TreeBuilder target.",
3234 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003235 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003236 }
3237
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003238 Py_DECREF(doctype_name_obj);
3239 Py_DECREF(pubid_obj);
3240 Py_DECREF(sysid_obj);
3241}
3242
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243static void
3244expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3245 const XML_Char* data_in)
3246{
3247 PyObject* target;
3248 PyObject* data;
3249 PyObject* res;
3250
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003251 if (PyErr_Occurred())
3252 return;
3253
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003255 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3256 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003258 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3259 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260 Py_XDECREF(res);
3261 Py_DECREF(data);
3262 Py_DECREF(target);
3263 } else {
3264 Py_XDECREF(data);
3265 Py_XDECREF(target);
3266 }
3267 }
3268}
3269
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271
Eli Bendersky52467b12012-06-01 07:13:08 +03003272static PyObject *
3273xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274{
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3276 if (self) {
3277 self->parser = NULL;
3278 self->target = self->entity = self->names = NULL;
3279 self->handle_start = self->handle_data = self->handle_end = NULL;
3280 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003281 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 return (PyObject *)self;
3284}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
scoderc8d8e152017-09-14 22:00:03 +02003286static int
3287ignore_attribute_error(PyObject *value)
3288{
3289 if (value == NULL) {
3290 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3291 return -1;
3292 }
3293 PyErr_Clear();
3294 }
3295 return 0;
3296}
3297
Serhiy Storchakacb985562015-05-04 15:32:48 +03003298/*[clinic input]
3299_elementtree.XMLParser.__init__
3300
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003301 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003303 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304
3305[clinic start generated code]*/
3306
Eli Bendersky52467b12012-06-01 07:13:08 +03003307static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003308_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3309 const char *encoding)
3310/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003311{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312 self->entity = PyDict_New();
3313 if (!self->entity)
3314 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315
Serhiy Storchakacb985562015-05-04 15:32:48 +03003316 self->names = PyDict_New();
3317 if (!self->names) {
3318 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003319 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003321
Serhiy Storchakacb985562015-05-04 15:32:48 +03003322 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3323 if (!self->parser) {
3324 Py_CLEAR(self->entity);
3325 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003327 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003328 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003329 /* expat < 2.1.0 has no XML_SetHashSalt() */
3330 if (EXPAT(SetHashSalt) != NULL) {
3331 EXPAT(SetHashSalt)(self->parser,
3332 (unsigned long)_Py_HashSecret.expat.hashsalt);
3333 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334
Eli Bendersky52467b12012-06-01 07:13:08 +03003335 if (target) {
3336 Py_INCREF(target);
3337 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003338 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 Py_CLEAR(self->entity);
3341 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003342 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003344 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003345 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346
Serhiy Storchakacb985562015-05-04 15:32:48 +03003347 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003348 if (ignore_attribute_error(self->handle_start)) {
3349 return -1;
3350 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003351 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003352 if (ignore_attribute_error(self->handle_data)) {
3353 return -1;
3354 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003355 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003356 if (ignore_attribute_error(self->handle_end)) {
3357 return -1;
3358 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003360 if (ignore_attribute_error(self->handle_comment)) {
3361 return -1;
3362 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003363 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003364 if (ignore_attribute_error(self->handle_pi)) {
3365 return -1;
3366 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003367 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003368 if (ignore_attribute_error(self->handle_close)) {
3369 return -1;
3370 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003371 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003372 if (ignore_attribute_error(self->handle_doctype)) {
3373 return -1;
3374 }
Eli Bendersky45839902013-01-13 05:14:47 -08003375
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003376 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 (XML_StartElementHandler) expat_start_handler,
3381 (XML_EndElementHandler) expat_end_handler
3382 );
3383 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003384 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385 (XML_DefaultHandler) expat_default_handler
3386 );
3387 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003388 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 (XML_CharacterDataHandler) expat_data_handler
3390 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003393 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 (XML_CommentHandler) expat_comment_handler
3395 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003396 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003398 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 (XML_ProcessingInstructionHandler) expat_pi_handler
3400 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003401 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003402 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003403 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3404 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003406 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003407 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409
Eli Bendersky52467b12012-06-01 07:13:08 +03003410 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411}
3412
Eli Bendersky52467b12012-06-01 07:13:08 +03003413static int
3414xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3415{
3416 Py_VISIT(self->handle_close);
3417 Py_VISIT(self->handle_pi);
3418 Py_VISIT(self->handle_comment);
3419 Py_VISIT(self->handle_end);
3420 Py_VISIT(self->handle_data);
3421 Py_VISIT(self->handle_start);
3422
3423 Py_VISIT(self->target);
3424 Py_VISIT(self->entity);
3425 Py_VISIT(self->names);
3426
3427 return 0;
3428}
3429
3430static int
3431xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432{
Victor Stinnere727d412017-09-18 05:29:37 -07003433 if (self->parser != NULL) {
3434 XML_Parser parser = self->parser;
3435 self->parser = NULL;
3436 EXPAT(ParserFree)(parser);
3437 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003438
Antoine Pitrouc1948842012-10-01 23:40:37 +02003439 Py_CLEAR(self->handle_close);
3440 Py_CLEAR(self->handle_pi);
3441 Py_CLEAR(self->handle_comment);
3442 Py_CLEAR(self->handle_end);
3443 Py_CLEAR(self->handle_data);
3444 Py_CLEAR(self->handle_start);
3445 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446
Antoine Pitrouc1948842012-10-01 23:40:37 +02003447 Py_CLEAR(self->target);
3448 Py_CLEAR(self->entity);
3449 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003450
Eli Bendersky52467b12012-06-01 07:13:08 +03003451 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452}
3453
Eli Bendersky52467b12012-06-01 07:13:08 +03003454static void
3455xmlparser_dealloc(XMLParserObject* self)
3456{
3457 PyObject_GC_UnTrack(self);
3458 xmlparser_gc_clear(self);
3459 Py_TYPE(self)->tp_free((PyObject *)self);
3460}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461
3462LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003463expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464{
3465 int ok;
3466
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003467 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3469
3470 if (PyErr_Occurred())
3471 return NULL;
3472
3473 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003474 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003475 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003477 EXPAT(GetErrorColumnNumber)(self->parser),
3478 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 );
3480 return NULL;
3481 }
3482
3483 Py_RETURN_NONE;
3484}
3485
Serhiy Storchakacb985562015-05-04 15:32:48 +03003486/*[clinic input]
3487_elementtree.XMLParser.close
3488
3489[clinic start generated code]*/
3490
3491static PyObject *
3492_elementtree_XMLParser_close_impl(XMLParserObject *self)
3493/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494{
3495 /* end feeding data to parser */
3496
3497 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003499 if (!res)
3500 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003502 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503 Py_DECREF(res);
3504 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003505 }
3506 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003507 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003508 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003509 }
3510 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003511 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003512 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003513}
3514
Serhiy Storchakacb985562015-05-04 15:32:48 +03003515/*[clinic input]
3516_elementtree.XMLParser.feed
3517
3518 data: object
3519 /
3520
3521[clinic start generated code]*/
3522
3523static PyObject *
3524_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3525/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003526{
3527 /* feed data to parser */
3528
Serhiy Storchakacb985562015-05-04 15:32:48 +03003529 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003530 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003531 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3532 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003533 return NULL;
3534 if (data_len > INT_MAX) {
3535 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3536 return NULL;
3537 }
3538 /* Explicitly set UTF-8 encoding. Return code ignored. */
3539 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003540 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003541 }
3542 else {
3543 Py_buffer view;
3544 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003546 return NULL;
3547 if (view.len > INT_MAX) {
3548 PyBuffer_Release(&view);
3549 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3550 return NULL;
3551 }
3552 res = expat_parse(self, view.buf, (int)view.len, 0);
3553 PyBuffer_Release(&view);
3554 return res;
3555 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003556}
3557
Serhiy Storchakacb985562015-05-04 15:32:48 +03003558/*[clinic input]
3559_elementtree.XMLParser._parse_whole
3560
3561 file: object
3562 /
3563
3564[clinic start generated code]*/
3565
3566static PyObject *
3567_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3568/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003569{
Eli Benderskya3699232013-05-19 18:47:23 -07003570 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003571 PyObject* reader;
3572 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003573 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003574 PyObject* res;
3575
Serhiy Storchakacb985562015-05-04 15:32:48 +03003576 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577 if (!reader)
3578 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003579
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580 /* read from open file object */
3581 for (;;) {
3582
3583 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3584
3585 if (!buffer) {
3586 /* read failed (e.g. due to KeyboardInterrupt) */
3587 Py_DECREF(reader);
3588 return NULL;
3589 }
3590
Eli Benderskyf996e772012-03-16 05:53:30 +02003591 if (PyUnicode_CheckExact(buffer)) {
3592 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003593 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003594 Py_DECREF(buffer);
3595 break;
3596 }
3597 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003598 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003599 if (!temp) {
3600 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003601 Py_DECREF(reader);
3602 return NULL;
3603 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003604 buffer = temp;
3605 }
3606 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607 Py_DECREF(buffer);
3608 break;
3609 }
3610
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003611 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3612 Py_DECREF(buffer);
3613 Py_DECREF(reader);
3614 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3615 return NULL;
3616 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003617 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003618 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619 );
3620
3621 Py_DECREF(buffer);
3622
3623 if (!res) {
3624 Py_DECREF(reader);
3625 return NULL;
3626 }
3627 Py_DECREF(res);
3628
3629 }
3630
3631 Py_DECREF(reader);
3632
3633 res = expat_parse(self, "", 0, 1);
3634
3635 if (res && TreeBuilder_CheckExact(self->target)) {
3636 Py_DECREF(res);
3637 return treebuilder_done((TreeBuilderObject*) self->target);
3638 }
3639
3640 return res;
3641}
3642
Serhiy Storchakacb985562015-05-04 15:32:48 +03003643/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003644_elementtree.XMLParser._setevents
3645
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003646 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003647 events_to_report: object = None
3648 /
3649
3650[clinic start generated code]*/
3651
3652static PyObject *
3653_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3654 PyObject *events_queue,
3655 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003656/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657{
3658 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003659 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003660 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003661 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662
3663 if (!TreeBuilder_CheckExact(self->target)) {
3664 PyErr_SetString(
3665 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003666 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667 "targets"
3668 );
3669 return NULL;
3670 }
3671
3672 target = (TreeBuilderObject*) self->target;
3673
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003674 events_append = PyObject_GetAttrString(events_queue, "append");
3675 if (events_append == NULL)
3676 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003677 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678
3679 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003680 Py_CLEAR(target->start_event_obj);
3681 Py_CLEAR(target->end_event_obj);
3682 Py_CLEAR(target->start_ns_event_obj);
3683 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003685 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003687 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688 Py_RETURN_NONE;
3689 }
3690
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003691 if (!(events_seq = PySequence_Fast(events_to_report,
3692 "events must be a sequence"))) {
3693 return NULL;
3694 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003696 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003697 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003698 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003699 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003700 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003701 } else if (PyBytes_Check(event_name_obj)) {
3702 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003703 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003704 if (event_name == NULL) {
3705 Py_DECREF(events_seq);
3706 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3707 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003708 }
3709
3710 Py_INCREF(event_name_obj);
3711 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003712 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003713 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003714 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003715 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003716 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003717 EXPAT(SetNamespaceDeclHandler)(
3718 self->parser,
3719 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3720 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3721 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003722 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003723 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724 EXPAT(SetNamespaceDeclHandler)(
3725 self->parser,
3726 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3727 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3728 );
3729 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003730 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003731 Py_DECREF(events_seq);
3732 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733 return NULL;
3734 }
3735 }
3736
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003737 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003739}
3740
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003741static PyMemberDef xmlparser_members[] = {
3742 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3743 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3744 {NULL}
3745};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003746
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003747static PyObject*
3748xmlparser_version_getter(XMLParserObject *self, void *closure)
3749{
3750 return PyUnicode_FromFormat(
3751 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3752 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003753}
3754
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003755static PyGetSetDef xmlparser_getsetlist[] = {
3756 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3757 {NULL},
3758};
3759
Serhiy Storchakacb985562015-05-04 15:32:48 +03003760#include "clinic/_elementtree.c.h"
3761
3762static PyMethodDef element_methods[] = {
3763
3764 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3765
3766 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3767 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3768
3769 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3770 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3771 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3772
3773 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3774 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3775 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3776 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3777
3778 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3779 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3780 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3781
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003782 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003783 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3784
3785 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3786 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3787
3788 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3789
3790 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3791 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3792 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3793 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3794 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3795
3796 {NULL, NULL}
3797};
3798
3799static PyMappingMethods element_as_mapping = {
3800 (lenfunc) element_length,
3801 (binaryfunc) element_subscr,
3802 (objobjargproc) element_ass_subscr,
3803};
3804
Serhiy Storchakadde08152015-11-25 15:28:13 +02003805static PyGetSetDef element_getsetlist[] = {
3806 {"tag",
3807 (getter)element_tag_getter,
3808 (setter)element_tag_setter,
3809 "A string identifying what kind of data this element represents"},
3810 {"text",
3811 (getter)element_text_getter,
3812 (setter)element_text_setter,
3813 "A string of text directly after the start tag, or None"},
3814 {"tail",
3815 (getter)element_tail_getter,
3816 (setter)element_tail_setter,
3817 "A string of text directly after the end tag, or None"},
3818 {"attrib",
3819 (getter)element_attrib_getter,
3820 (setter)element_attrib_setter,
3821 "A dictionary containing the element's attributes"},
3822 {NULL},
3823};
3824
Serhiy Storchakacb985562015-05-04 15:32:48 +03003825static PyTypeObject Element_Type = {
3826 PyVarObject_HEAD_INIT(NULL, 0)
3827 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3828 /* methods */
3829 (destructor)element_dealloc, /* tp_dealloc */
3830 0, /* tp_print */
3831 0, /* tp_getattr */
3832 0, /* tp_setattr */
3833 0, /* tp_reserved */
3834 (reprfunc)element_repr, /* tp_repr */
3835 0, /* tp_as_number */
3836 &element_as_sequence, /* tp_as_sequence */
3837 &element_as_mapping, /* tp_as_mapping */
3838 0, /* tp_hash */
3839 0, /* tp_call */
3840 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003841 PyObject_GenericGetAttr, /* tp_getattro */
3842 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003843 0, /* tp_as_buffer */
3844 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3845 /* tp_flags */
3846 0, /* tp_doc */
3847 (traverseproc)element_gc_traverse, /* tp_traverse */
3848 (inquiry)element_gc_clear, /* tp_clear */
3849 0, /* tp_richcompare */
3850 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3851 0, /* tp_iter */
3852 0, /* tp_iternext */
3853 element_methods, /* tp_methods */
3854 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003855 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003856 0, /* tp_base */
3857 0, /* tp_dict */
3858 0, /* tp_descr_get */
3859 0, /* tp_descr_set */
3860 0, /* tp_dictoffset */
3861 (initproc)element_init, /* tp_init */
3862 PyType_GenericAlloc, /* tp_alloc */
3863 element_new, /* tp_new */
3864 0, /* tp_free */
3865};
3866
3867static PyMethodDef treebuilder_methods[] = {
3868 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3869 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3870 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3871 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3872 {NULL, NULL}
3873};
3874
3875static PyTypeObject TreeBuilder_Type = {
3876 PyVarObject_HEAD_INIT(NULL, 0)
3877 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3878 /* methods */
3879 (destructor)treebuilder_dealloc, /* tp_dealloc */
3880 0, /* tp_print */
3881 0, /* tp_getattr */
3882 0, /* tp_setattr */
3883 0, /* tp_reserved */
3884 0, /* tp_repr */
3885 0, /* tp_as_number */
3886 0, /* tp_as_sequence */
3887 0, /* tp_as_mapping */
3888 0, /* tp_hash */
3889 0, /* tp_call */
3890 0, /* tp_str */
3891 0, /* tp_getattro */
3892 0, /* tp_setattro */
3893 0, /* tp_as_buffer */
3894 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3895 /* tp_flags */
3896 0, /* tp_doc */
3897 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3898 (inquiry)treebuilder_gc_clear, /* tp_clear */
3899 0, /* tp_richcompare */
3900 0, /* tp_weaklistoffset */
3901 0, /* tp_iter */
3902 0, /* tp_iternext */
3903 treebuilder_methods, /* tp_methods */
3904 0, /* tp_members */
3905 0, /* tp_getset */
3906 0, /* tp_base */
3907 0, /* tp_dict */
3908 0, /* tp_descr_get */
3909 0, /* tp_descr_set */
3910 0, /* tp_dictoffset */
3911 _elementtree_TreeBuilder___init__, /* tp_init */
3912 PyType_GenericAlloc, /* tp_alloc */
3913 treebuilder_new, /* tp_new */
3914 0, /* tp_free */
3915};
3916
3917static PyMethodDef xmlparser_methods[] = {
3918 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3919 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3920 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3921 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003922 {NULL, NULL}
3923};
3924
Neal Norwitz227b5332006-03-22 09:28:35 +00003925static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003926 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003927 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003929 (destructor)xmlparser_dealloc, /* tp_dealloc */
3930 0, /* tp_print */
3931 0, /* tp_getattr */
3932 0, /* tp_setattr */
3933 0, /* tp_reserved */
3934 0, /* tp_repr */
3935 0, /* tp_as_number */
3936 0, /* tp_as_sequence */
3937 0, /* tp_as_mapping */
3938 0, /* tp_hash */
3939 0, /* tp_call */
3940 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003941 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003942 0, /* tp_setattro */
3943 0, /* tp_as_buffer */
3944 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3945 /* tp_flags */
3946 0, /* tp_doc */
3947 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3948 (inquiry)xmlparser_gc_clear, /* tp_clear */
3949 0, /* tp_richcompare */
3950 0, /* tp_weaklistoffset */
3951 0, /* tp_iter */
3952 0, /* tp_iternext */
3953 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003954 xmlparser_members, /* tp_members */
3955 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003956 0, /* tp_base */
3957 0, /* tp_dict */
3958 0, /* tp_descr_get */
3959 0, /* tp_descr_set */
3960 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003961 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003962 PyType_GenericAlloc, /* tp_alloc */
3963 xmlparser_new, /* tp_new */
3964 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003965};
3966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003967/* ==================================================================== */
3968/* python module interface */
3969
3970static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003971 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972 {NULL, NULL}
3973};
3974
Martin v. Löwis1a214512008-06-11 05:26:20 +00003975
Eli Bendersky532d03e2013-08-10 08:00:39 -07003976static struct PyModuleDef elementtreemodule = {
3977 PyModuleDef_HEAD_INIT,
3978 "_elementtree",
3979 NULL,
3980 sizeof(elementtreestate),
3981 _functions,
3982 NULL,
3983 elementtree_traverse,
3984 elementtree_clear,
3985 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003986};
3987
Neal Norwitzf6657e62006-12-28 04:47:50 +00003988PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003989PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003990{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003991 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003992 elementtreestate *st;
3993
3994 m = PyState_FindModule(&elementtreemodule);
3995 if (m) {
3996 Py_INCREF(m);
3997 return m;
3998 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003999
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004000 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004001 if (PyType_Ready(&ElementIter_Type) < 0)
4002 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004003 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004004 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004005 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004006 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004007 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004008 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004009
Eli Bendersky532d03e2013-08-10 08:00:39 -07004010 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004011 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004012 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004013 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004014
Eli Bendersky828efde2012-04-05 05:40:58 +03004015 if (!(temp = PyImport_ImportModule("copy")))
4016 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004017 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004018 Py_XDECREF(temp);
4019
Victor Stinnerb136f112017-07-10 22:28:02 +02004020 if (st->deepcopy_obj == NULL) {
4021 return NULL;
4022 }
4023
4024 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004025 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004026 return NULL;
4027
Eli Bendersky20d41742012-06-01 09:48:37 +03004028 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004029 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4030 if (expat_capi) {
4031 /* check that it's usable */
4032 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004033 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004034 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4035 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004036 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004037 PyErr_SetString(PyExc_ImportError,
4038 "pyexpat version is incompatible");
4039 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004040 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004041 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004042 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004043 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004044
Eli Bendersky532d03e2013-08-10 08:00:39 -07004045 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004046 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004047 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004048 Py_INCREF(st->parseerror_obj);
4049 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004050
Eli Bendersky092af1f2012-03-04 07:14:03 +02004051 Py_INCREF((PyObject *)&Element_Type);
4052 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4053
Eli Bendersky58d548d2012-05-29 15:45:16 +03004054 Py_INCREF((PyObject *)&TreeBuilder_Type);
4055 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4056
Eli Bendersky52467b12012-06-01 07:13:08 +03004057 Py_INCREF((PyObject *)&XMLParser_Type);
4058 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004059
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004060 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004061}