blob: d13c6dd4db649755ea8b3595dae263702d4d93d9 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Miss Islington (bot)b1c80032018-10-14 00:55:49 -0700207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
339 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300340
341 if (attrib) {
342 /* If attrib was found in kwds, copy its value and remove it from
343 * kwds
344 */
345 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700346 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
348 Py_TYPE(attrib)->tp_name);
349 return NULL;
350 }
351 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700352 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300353 } else {
354 attrib = PyDict_New();
355 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700356
357 Py_DECREF(attrib_str);
358
359 /* attrib can be NULL if PyDict_New failed */
360 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200361 if (PyDict_Update(attrib, kwds) < 0)
362 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300363 return attrib;
364}
365
Serhiy Storchakacb985562015-05-04 15:32:48 +0300366/*[clinic input]
367module _elementtree
368class _elementtree.Element "ElementObject *" "&Element_Type"
369class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
370class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
371[clinic start generated code]*/
372/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
373
Eli Bendersky092af1f2012-03-04 07:14:03 +0200374static int
375element_init(PyObject *self, PyObject *args, PyObject *kwds)
376{
377 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200378 PyObject *attrib = NULL;
379 ElementObject *self_elem;
380
381 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
382 return -1;
383
Eli Bendersky737b1732012-05-29 06:02:56 +0300384 if (attrib) {
385 /* attrib passed as positional arg */
386 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (!attrib)
388 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300389 if (kwds) {
390 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200391 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 return -1;
393 }
394 }
395 } else if (kwds) {
396 /* have keywords args */
397 attrib = get_attrib_from_keywords(kwds);
398 if (!attrib)
399 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 }
401
402 self_elem = (ElementObject *)self;
403
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 return -1;
408 }
409 }
410
Eli Bendersky48d358b2012-05-30 17:57:50 +0300411 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413
414 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300416 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200417
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300419 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300422 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
424 return 0;
425}
426
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000427LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200428element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000429{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200430 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431 PyObject* *children;
432
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700433 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 /* make sure self->children can hold the given number of extra
435 elements. set an exception and return -1 if allocation failed */
436
Victor Stinner5f0af232013-07-11 23:01:36 +0200437 if (!self->extra) {
438 if (create_extra(self, NULL) < 0)
439 return -1;
440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443
444 if (size > self->extra->allocated) {
445 /* use Python 2.4's list growth strategy */
446 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100448 * which needs at least 4 bytes.
449 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 * be safe.
451 */
452 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200453 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
454 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000455 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100457 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000458 * false alarm always assume at least one child to be safe.
459 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000460 children = PyObject_Realloc(self->extra->children,
461 size * sizeof(PyObject*));
462 if (!children)
463 goto nomemory;
464 } else {
465 children = PyObject_Malloc(size * sizeof(PyObject*));
466 if (!children)
467 goto nomemory;
468 /* copy existing children from static area to malloc buffer */
469 memcpy(children, self->extra->children,
470 self->extra->length * sizeof(PyObject*));
471 }
472 self->extra->children = children;
473 self->extra->allocated = size;
474 }
475
476 return 0;
477
478 nomemory:
479 PyErr_NoMemory();
480 return -1;
481}
482
483LOCAL(int)
484element_add_subelement(ElementObject* self, PyObject* element)
485{
486 /* add a child element to a parent */
487
488 if (element_resize(self, 1) < 0)
489 return -1;
490
491 Py_INCREF(element);
492 self->extra->children[self->extra->length] = element;
493
494 self->extra->length++;
495
496 return 0;
497}
498
499LOCAL(PyObject*)
500element_get_attrib(ElementObject* self)
501{
502 /* return borrowed reference to attrib dictionary */
503 /* note: this function assumes that the extra section exists */
504
505 PyObject* res = self->extra->attrib;
506
507 if (res == Py_None) {
508 /* create missing dictionary */
509 res = PyDict_New();
510 if (!res)
511 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200512 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000513 self->extra->attrib = res;
514 }
515
516 return res;
517}
518
519LOCAL(PyObject*)
520element_get_text(ElementObject* self)
521{
522 /* return borrowed reference to text attribute */
523
Serhiy Storchaka576def02017-03-30 09:47:31 +0300524 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000525
526 if (JOIN_GET(res)) {
527 res = JOIN_OBJ(res);
528 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300529 PyObject *tmp = list_join(res);
530 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000531 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300532 self->text = tmp;
533 Py_DECREF(res);
534 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000535 }
536 }
537
538 return res;
539}
540
541LOCAL(PyObject*)
542element_get_tail(ElementObject* self)
543{
544 /* return borrowed reference to text attribute */
545
Serhiy Storchaka576def02017-03-30 09:47:31 +0300546 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000547
548 if (JOIN_GET(res)) {
549 res = JOIN_OBJ(res);
550 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300551 PyObject *tmp = list_join(res);
552 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300554 self->tail = tmp;
555 Py_DECREF(res);
556 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 }
558 }
559
560 return res;
561}
562
563static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300564subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565{
566 PyObject* elem;
567
568 ElementObject* parent;
569 PyObject* tag;
570 PyObject* attrib = NULL;
571 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
572 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800573 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800575 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576
Eli Bendersky737b1732012-05-29 06:02:56 +0300577 if (attrib) {
578 /* attrib passed as positional arg */
579 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000580 if (!attrib)
581 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300582 if (kwds) {
583 if (PyDict_Update(attrib, kwds) < 0) {
584 return NULL;
585 }
586 }
587 } else if (kwds) {
588 /* have keyword args */
589 attrib = get_attrib_from_keywords(kwds);
590 if (!attrib)
591 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300593 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 Py_INCREF(Py_None);
595 attrib = Py_None;
596 }
597
Eli Bendersky092af1f2012-03-04 07:14:03 +0200598 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200600 if (elem == NULL)
601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000603 if (element_add_subelement(parent, elem) < 0) {
604 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000605 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000606 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607
608 return elem;
609}
610
Eli Bendersky0192ba32012-03-30 16:38:33 +0300611static int
612element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
613{
614 Py_VISIT(self->tag);
615 Py_VISIT(JOIN_OBJ(self->text));
616 Py_VISIT(JOIN_OBJ(self->tail));
617
618 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200619 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 Py_VISIT(self->extra->attrib);
621
622 for (i = 0; i < self->extra->length; ++i)
623 Py_VISIT(self->extra->children[i]);
624 }
625 return 0;
626}
627
628static int
629element_gc_clear(ElementObject *self)
630{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700632 _clear_joined_ptr(&self->text);
633 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634
635 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300636 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300637 */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700638 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 return 0;
640}
641
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642static void
643element_dealloc(ElementObject* self)
644{
INADA Naokia6296d32017-08-24 14:55:17 +0900645 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200647 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300648
649 if (self->weakreflist != NULL)
650 PyObject_ClearWeakRefs((PyObject *) self);
651
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652 /* element_gc_clear clears all references and deallocates extra
653 */
654 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000655
656 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200657 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200658 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000659}
660
661/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666 subelement: object(subclass_of='&Element_Type')
667 /
668
669[clinic start generated code]*/
670
671static PyObject *
672_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
673/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
674{
675 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676 return NULL;
677
678 Py_RETURN_NONE;
679}
680
Serhiy Storchakacb985562015-05-04 15:32:48 +0300681/*[clinic input]
682_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000683
Serhiy Storchakacb985562015-05-04 15:32:48 +0300684[clinic start generated code]*/
685
686static PyObject *
687_elementtree_Element_clear_impl(ElementObject *self)
688/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
689{
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700690 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691
692 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300693 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000694
695 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300696 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_RETURN_NONE;
699}
700
Serhiy Storchakacb985562015-05-04 15:32:48 +0300701/*[clinic input]
702_elementtree.Element.__copy__
703
704[clinic start generated code]*/
705
706static PyObject *
707_elementtree_Element___copy___impl(ElementObject *self)
708/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200710 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 ElementObject* element;
712
Eli Bendersky092af1f2012-03-04 07:14:03 +0200713 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800714 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715 if (!element)
716 return NULL;
717
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 Py_INCREF(JOIN_OBJ(self->text));
719 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 Py_INCREF(JOIN_OBJ(self->tail));
722 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700724 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000726 if (element_resize(element, self->extra->length) < 0) {
727 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000729 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730
731 for (i = 0; i < self->extra->length; i++) {
732 Py_INCREF(self->extra->children[i]);
733 element->extra->children[i] = self->extra->children[i];
734 }
735
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700736 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000738 }
739
740 return (PyObject*) element;
741}
742
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200743/* Helper for a deep copy. */
744LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
745
Serhiy Storchakacb985562015-05-04 15:32:48 +0300746/*[clinic input]
747_elementtree.Element.__deepcopy__
748
Oren Milmand0568182017-09-12 17:39:15 +0300749 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300750 /
751
752[clinic start generated code]*/
753
754static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300755_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
756/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000757{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200758 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759 ElementObject* element;
760 PyObject* tag;
761 PyObject* attrib;
762 PyObject* text;
763 PyObject* tail;
764 PyObject* id;
765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766 tag = deepcopy(self->tag, memo);
767 if (!tag)
768 return NULL;
769
770 if (self->extra) {
771 attrib = deepcopy(self->extra->attrib, memo);
772 if (!attrib) {
773 Py_DECREF(tag);
774 return NULL;
775 }
776 } else {
777 Py_INCREF(Py_None);
778 attrib = Py_None;
779 }
780
Eli Bendersky092af1f2012-03-04 07:14:03 +0200781 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782
783 Py_DECREF(tag);
784 Py_DECREF(attrib);
785
786 if (!element)
787 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100788
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789 text = deepcopy(JOIN_OBJ(self->text), memo);
790 if (!text)
791 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300792 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000793
794 tail = deepcopy(JOIN_OBJ(self->tail), memo);
795 if (!tail)
796 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300797 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700799 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 if (element_resize(element, self->extra->length) < 0)
802 goto error;
803
804 for (i = 0; i < self->extra->length; i++) {
805 PyObject* child = deepcopy(self->extra->children[i], memo);
806 if (!child) {
807 element->extra->length = i;
808 goto error;
809 }
810 element->extra->children[i] = child;
811 }
812
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700813 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000815 }
816
817 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700818 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000819 if (!id)
820 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821
822 i = PyDict_SetItem(memo, id, (PyObject*) element);
823
824 Py_DECREF(id);
825
826 if (i < 0)
827 goto error;
828
829 return (PyObject*) element;
830
831 error:
832 Py_DECREF(element);
833 return NULL;
834}
835
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200836LOCAL(PyObject *)
837deepcopy(PyObject *object, PyObject *memo)
838{
839 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200840 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200841 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200842
843 /* Fast paths */
844 if (object == Py_None || PyUnicode_CheckExact(object)) {
845 Py_INCREF(object);
846 return object;
847 }
848
849 if (Py_REFCNT(object) == 1) {
850 if (PyDict_CheckExact(object)) {
851 PyObject *key, *value;
852 Py_ssize_t pos = 0;
853 int simple = 1;
854 while (PyDict_Next(object, &pos, &key, &value)) {
855 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
856 simple = 0;
857 break;
858 }
859 }
860 if (simple)
861 return PyDict_Copy(object);
862 /* Fall through to general case */
863 }
864 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300865 return _elementtree_Element___deepcopy___impl(
866 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200867 }
868 }
869
870 /* General case */
871 st = ET_STATE_GLOBAL;
872 if (!st->deepcopy_obj) {
873 PyErr_SetString(PyExc_RuntimeError,
874 "deepcopy helper not found");
875 return NULL;
876 }
877
Victor Stinner7fbac452016-08-20 01:34:44 +0200878 stack[0] = object;
879 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200880 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200881}
882
883
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884/*[clinic input]
885_elementtree.Element.__sizeof__ -> Py_ssize_t
886
887[clinic start generated code]*/
888
889static Py_ssize_t
890_elementtree_Element___sizeof___impl(ElementObject *self)
891/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200892{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200893 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200894 if (self->extra) {
895 result += sizeof(ElementObjectExtra);
896 if (self->extra->children != self->extra->_children)
897 result += sizeof(PyObject*) * self->extra->allocated;
898 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300899 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200900}
901
Eli Bendersky698bdb22013-01-10 06:01:06 -0800902/* dict keys for getstate/setstate. */
903#define PICKLED_TAG "tag"
904#define PICKLED_CHILDREN "_children"
905#define PICKLED_ATTRIB "attrib"
906#define PICKLED_TAIL "tail"
907#define PICKLED_TEXT "text"
908
909/* __getstate__ returns a fabricated instance dict as in the pure-Python
910 * Element implementation, for interoperability/interchangeability. This
911 * makes the pure-Python implementation details an API, but (a) there aren't
912 * any unnecessary structures there; and (b) it buys compatibility with 3.2
913 * pickles. See issue #16076.
914 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300915/*[clinic input]
916_elementtree.Element.__getstate__
917
918[clinic start generated code]*/
919
Eli Bendersky698bdb22013-01-10 06:01:06 -0800920static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300921_elementtree_Element___getstate___impl(ElementObject *self)
922/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800923{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200924 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925 PyObject *instancedict = NULL, *children;
926
927 /* Build a list of children. */
928 children = PyList_New(self->extra ? self->extra->length : 0);
929 if (!children)
930 return NULL;
931 for (i = 0; i < PyList_GET_SIZE(children); i++) {
932 PyObject *child = self->extra->children[i];
933 Py_INCREF(child);
934 PyList_SET_ITEM(children, i, child);
935 }
936
937 /* Construct the state object. */
938 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
939 if (noattrib)
940 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
941 PICKLED_TAG, self->tag,
942 PICKLED_CHILDREN, children,
943 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700944 PICKLED_TEXT, JOIN_OBJ(self->text),
945 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800946 else
947 instancedict = Py_BuildValue("{sOsOsOsOsO}",
948 PICKLED_TAG, self->tag,
949 PICKLED_CHILDREN, children,
950 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700951 PICKLED_TEXT, JOIN_OBJ(self->text),
952 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800953 if (instancedict) {
954 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800955 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800956 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800957 else {
958 for (i = 0; i < PyList_GET_SIZE(children); i++)
959 Py_DECREF(PyList_GET_ITEM(children, i));
960 Py_DECREF(children);
961
962 return NULL;
963 }
964}
965
966static PyObject *
967element_setstate_from_attributes(ElementObject *self,
968 PyObject *tag,
969 PyObject *attrib,
970 PyObject *text,
971 PyObject *tail,
972 PyObject *children)
973{
974 Py_ssize_t i, nchildren;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700975 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 if (!tag) {
978 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
979 return NULL;
980 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800981
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200982 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300983 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984
Oren Milman39ecb9c2017-10-10 23:26:24 +0300985 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
986 Py_INCREF(JOIN_OBJ(text));
987 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800988
Oren Milman39ecb9c2017-10-10 23:26:24 +0300989 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
990 Py_INCREF(JOIN_OBJ(tail));
991 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800992
993 /* Handle ATTRIB and CHILDREN. */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700994 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995 Py_RETURN_NONE;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700996 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997
998 /* Compute 'nchildren'. */
999 if (children) {
1000 if (!PyList_Check(children)) {
1001 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1002 return NULL;
1003 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001004 nchildren = PyList_GET_SIZE(children);
1005
1006 /* (Re-)allocate 'extra'.
1007 Avoid DECREFs calling into this code again (cycles, etc.)
1008 */
1009 oldextra = self->extra;
1010 self->extra = NULL;
1011 if (element_resize(self, nchildren)) {
1012 assert(!self->extra || !self->extra->length);
1013 clear_extra(self);
1014 self->extra = oldextra;
1015 return NULL;
1016 }
1017 assert(self->extra);
1018 assert(self->extra->allocated >= nchildren);
1019 if (oldextra) {
1020 assert(self->extra->attrib == Py_None);
1021 self->extra->attrib = oldextra->attrib;
1022 oldextra->attrib = Py_None;
1023 }
1024
1025 /* Copy children */
1026 for (i = 0; i < nchildren; i++) {
1027 self->extra->children[i] = PyList_GET_ITEM(children, i);
1028 Py_INCREF(self->extra->children[i]);
1029 }
1030
1031 assert(!self->extra->length);
1032 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 }
1034 else {
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001035 if (element_resize(self, 0)) {
1036 return NULL;
1037 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001038 }
1039
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040 /* Stash attrib. */
1041 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001043 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001045 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001046
1047 Py_RETURN_NONE;
1048}
1049
1050/* __setstate__ for Element instance from the Python implementation.
1051 * 'state' should be the instance dict.
1052 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
1055element_setstate_from_Python(ElementObject *self, PyObject *state)
1056{
1057 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1058 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1059 PyObject *args;
1060 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001061 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001062
Eli Bendersky698bdb22013-01-10 06:01:06 -08001063 tag = attrib = text = tail = children = NULL;
1064 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001065 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001066 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001067
1068 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1069 &attrib, &text, &tail, &children))
1070 retval = element_setstate_from_attributes(self, tag, attrib, text,
1071 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001072 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001073 retval = NULL;
1074
1075 Py_DECREF(args);
1076 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001077}
1078
Serhiy Storchakacb985562015-05-04 15:32:48 +03001079/*[clinic input]
1080_elementtree.Element.__setstate__
1081
1082 state: object
1083 /
1084
1085[clinic start generated code]*/
1086
Eli Bendersky698bdb22013-01-10 06:01:06 -08001087static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001088_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1089/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090{
1091 if (!PyDict_CheckExact(state)) {
1092 PyErr_Format(PyExc_TypeError,
1093 "Don't know how to unpickle \"%.200R\" as an Element",
1094 state);
1095 return NULL;
1096 }
1097 else
1098 return element_setstate_from_Python(self, state);
1099}
1100
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101LOCAL(int)
1102checkpath(PyObject* tag)
1103{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001104 Py_ssize_t i;
1105 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106
1107 /* check if a tag contains an xpath character */
1108
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109#define PATHCHAR(ch) \
1110 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001112 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1114 void *data = PyUnicode_DATA(tag);
1115 unsigned int kind = PyUnicode_KIND(tag);
1116 for (i = 0; i < len; i++) {
1117 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1118 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001119 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123 return 1;
1124 }
1125 return 0;
1126 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001127 if (PyBytes_Check(tag)) {
1128 char *p = PyBytes_AS_STRING(tag);
1129 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130 if (p[i] == '{')
1131 check = 0;
1132 else if (p[i] == '}')
1133 check = 1;
1134 else if (check && PATHCHAR(p[i]))
1135 return 1;
1136 }
1137 return 0;
1138 }
1139
1140 return 1; /* unknown type; might be path expression */
1141}
1142
Serhiy Storchakacb985562015-05-04 15:32:48 +03001143/*[clinic input]
1144_elementtree.Element.extend
1145
1146 elements: object
1147 /
1148
1149[clinic start generated code]*/
1150
1151static PyObject *
1152_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1153/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001154{
1155 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001156 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001157
Serhiy Storchakacb985562015-05-04 15:32:48 +03001158 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159 if (!seq) {
1160 PyErr_Format(
1161 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001162 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001163 );
1164 return NULL;
1165 }
1166
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001168 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_INCREF(element);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001170 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001171 PyErr_Format(
1172 PyExc_TypeError,
1173 "expected an Element, not \"%.200s\"",
1174 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001175 Py_DECREF(seq);
1176 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001177 return NULL;
1178 }
1179
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001180 if (element_add_subelement(self, element) < 0) {
1181 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001182 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001183 return NULL;
1184 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001185 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001186 }
1187
1188 Py_DECREF(seq);
1189
1190 Py_RETURN_NONE;
1191}
1192
Serhiy Storchakacb985562015-05-04 15:32:48 +03001193/*[clinic input]
1194_elementtree.Element.find
1195
1196 path: object
1197 namespaces: object = None
1198
1199[clinic start generated code]*/
1200
1201static PyObject *
1202_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1203 PyObject *namespaces)
1204/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001206 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001207 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001208
Serhiy Storchakacb985562015-05-04 15:32:48 +03001209 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001210 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001211 return _PyObject_CallMethodIdObjArgs(
1212 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001214 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215
1216 if (!self->extra)
1217 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 for (i = 0; i < self->extra->length; i++) {
1220 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001222 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 continue;
1224 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001225 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001226 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001228 Py_DECREF(item);
1229 if (rc < 0)
1230 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231 }
1232
1233 Py_RETURN_NONE;
1234}
1235
Serhiy Storchakacb985562015-05-04 15:32:48 +03001236/*[clinic input]
1237_elementtree.Element.findtext
1238
1239 path: object
1240 default: object = None
1241 namespaces: object = None
1242
1243[clinic start generated code]*/
1244
1245static PyObject *
1246_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1247 PyObject *default_value,
1248 PyObject *namespaces)
1249/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001251 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001252 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001254
Serhiy Storchakacb985562015-05-04 15:32:48 +03001255 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001256 return _PyObject_CallMethodIdObjArgs(
1257 st->elementpath_obj, &PyId_findtext,
1258 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001259 );
1260
1261 if (!self->extra) {
1262 Py_INCREF(default_value);
1263 return default_value;
1264 }
1265
1266 for (i = 0; i < self->extra->length; i++) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001267 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001268 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001269 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001270 continue;
1271 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001272 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001273 if (rc > 0) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001274 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001275 if (text == Py_None) {
1276 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001277 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001278 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001279 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001280 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281 return text;
1282 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001283 Py_DECREF(item);
1284 if (rc < 0)
1285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286 }
1287
1288 Py_INCREF(default_value);
1289 return default_value;
1290}
1291
Serhiy Storchakacb985562015-05-04 15:32:48 +03001292/*[clinic input]
1293_elementtree.Element.findall
1294
1295 path: object
1296 namespaces: object = None
1297
1298[clinic start generated code]*/
1299
1300static PyObject *
1301_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1302 PyObject *namespaces)
1303/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001305 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001306 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001307 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001308
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001309 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001310 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001311 return _PyObject_CallMethodIdObjArgs(
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001312 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001314 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315
1316 out = PyList_New(0);
1317 if (!out)
1318 return NULL;
1319
1320 if (!self->extra)
1321 return out;
1322
1323 for (i = 0; i < self->extra->length; i++) {
1324 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001325 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001326 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001327 continue;
1328 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001329 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001330 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1331 Py_DECREF(item);
1332 Py_DECREF(out);
1333 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001335 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336 }
1337
1338 return out;
1339}
1340
Serhiy Storchakacb985562015-05-04 15:32:48 +03001341/*[clinic input]
1342_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001343
Serhiy Storchakacb985562015-05-04 15:32:48 +03001344 path: object
1345 namespaces: object = None
1346
1347[clinic start generated code]*/
1348
1349static PyObject *
1350_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1351 PyObject *namespaces)
1352/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1353{
1354 PyObject* tag = path;
1355 _Py_IDENTIFIER(iterfind);
1356 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001357
Victor Stinnerf5616342016-12-09 15:26:00 +01001358 return _PyObject_CallMethodIdObjArgs(
1359 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001360}
1361
Serhiy Storchakacb985562015-05-04 15:32:48 +03001362/*[clinic input]
1363_elementtree.Element.get
1364
1365 key: object
1366 default: object = None
1367
1368[clinic start generated code]*/
1369
1370static PyObject *
1371_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1372 PyObject *default_value)
1373/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001374{
1375 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376
1377 if (!self->extra || self->extra->attrib == Py_None)
1378 value = default_value;
1379 else {
1380 value = PyDict_GetItem(self->extra->attrib, key);
1381 if (!value)
1382 value = default_value;
1383 }
1384
1385 Py_INCREF(value);
1386 return value;
1387}
1388
Serhiy Storchakacb985562015-05-04 15:32:48 +03001389/*[clinic input]
1390_elementtree.Element.getchildren
1391
1392[clinic start generated code]*/
1393
1394static PyObject *
1395_elementtree_Element_getchildren_impl(ElementObject *self)
1396/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001398 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399 PyObject* list;
1400
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001401 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1402 "This method will be removed in future versions. "
1403 "Use 'list(elem)' or iteration over elem instead.",
1404 1) < 0) {
1405 return NULL;
1406 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001407
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408 if (!self->extra)
1409 return PyList_New(0);
1410
1411 list = PyList_New(self->extra->length);
1412 if (!list)
1413 return NULL;
1414
1415 for (i = 0; i < self->extra->length; i++) {
1416 PyObject* item = self->extra->children[i];
1417 Py_INCREF(item);
1418 PyList_SET_ITEM(list, i, item);
1419 }
1420
1421 return list;
1422}
1423
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001424
Eli Bendersky64d11e62012-06-15 07:42:50 +03001425static PyObject *
1426create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1427
1428
Serhiy Storchakacb985562015-05-04 15:32:48 +03001429/*[clinic input]
1430_elementtree.Element.iter
1431
1432 tag: object = None
1433
1434[clinic start generated code]*/
1435
Eli Bendersky64d11e62012-06-15 07:42:50 +03001436static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001437_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1438/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001439{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001440 if (PyUnicode_Check(tag)) {
1441 if (PyUnicode_READY(tag) < 0)
1442 return NULL;
1443 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1444 tag = Py_None;
1445 }
1446 else if (PyBytes_Check(tag)) {
1447 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1448 tag = Py_None;
1449 }
1450
Eli Bendersky64d11e62012-06-15 07:42:50 +03001451 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001452}
1453
1454
Serhiy Storchakacb985562015-05-04 15:32:48 +03001455/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001456_elementtree.Element.getiterator
1457
1458 tag: object = None
1459
1460[clinic start generated code]*/
1461
1462static PyObject *
1463_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1464/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1465{
1466 /* Change for a DeprecationWarning in 1.4 */
1467 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1468 "This method will be removed in future versions. "
1469 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1470 1) < 0) {
1471 return NULL;
1472 }
1473 return _elementtree_Element_iter_impl(self, tag);
1474}
1475
1476
1477/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001478_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001479
Serhiy Storchakacb985562015-05-04 15:32:48 +03001480[clinic start generated code]*/
1481
1482static PyObject *
1483_elementtree_Element_itertext_impl(ElementObject *self)
1484/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1485{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001486 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487}
1488
Eli Bendersky64d11e62012-06-15 07:42:50 +03001489
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001490static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001491element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001493 ElementObject* self = (ElementObject*) self_;
1494
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495 if (!self->extra || index < 0 || index >= self->extra->length) {
1496 PyErr_SetString(
1497 PyExc_IndexError,
1498 "child index out of range"
1499 );
1500 return NULL;
1501 }
1502
1503 Py_INCREF(self->extra->children[index]);
1504 return self->extra->children[index];
1505}
1506
Serhiy Storchakacb985562015-05-04 15:32:48 +03001507/*[clinic input]
1508_elementtree.Element.insert
1509
1510 index: Py_ssize_t
1511 subelement: object(subclass_of='&Element_Type')
1512 /
1513
1514[clinic start generated code]*/
1515
1516static PyObject *
1517_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1518 PyObject *subelement)
1519/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001521 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522
Victor Stinner5f0af232013-07-11 23:01:36 +02001523 if (!self->extra) {
1524 if (create_extra(self, NULL) < 0)
1525 return NULL;
1526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001528 if (index < 0) {
1529 index += self->extra->length;
1530 if (index < 0)
1531 index = 0;
1532 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001533 if (index > self->extra->length)
1534 index = self->extra->length;
1535
1536 if (element_resize(self, 1) < 0)
1537 return NULL;
1538
1539 for (i = self->extra->length; i > index; i--)
1540 self->extra->children[i] = self->extra->children[i-1];
1541
Serhiy Storchakacb985562015-05-04 15:32:48 +03001542 Py_INCREF(subelement);
1543 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544
1545 self->extra->length++;
1546
1547 Py_RETURN_NONE;
1548}
1549
Serhiy Storchakacb985562015-05-04 15:32:48 +03001550/*[clinic input]
1551_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552
Serhiy Storchakacb985562015-05-04 15:32:48 +03001553[clinic start generated code]*/
1554
1555static PyObject *
1556_elementtree_Element_items_impl(ElementObject *self)
1557/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1558{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559 if (!self->extra || self->extra->attrib == Py_None)
1560 return PyList_New(0);
1561
1562 return PyDict_Items(self->extra->attrib);
1563}
1564
Serhiy Storchakacb985562015-05-04 15:32:48 +03001565/*[clinic input]
1566_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567
Serhiy Storchakacb985562015-05-04 15:32:48 +03001568[clinic start generated code]*/
1569
1570static PyObject *
1571_elementtree_Element_keys_impl(ElementObject *self)
1572/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1573{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 if (!self->extra || self->extra->attrib == Py_None)
1575 return PyList_New(0);
1576
1577 return PyDict_Keys(self->extra->attrib);
1578}
1579
Martin v. Löwis18e16552006-02-15 17:27:45 +00001580static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581element_length(ElementObject* self)
1582{
1583 if (!self->extra)
1584 return 0;
1585
1586 return self->extra->length;
1587}
1588
Serhiy Storchakacb985562015-05-04 15:32:48 +03001589/*[clinic input]
1590_elementtree.Element.makeelement
1591
1592 tag: object
1593 attrib: object
1594 /
1595
1596[clinic start generated code]*/
1597
1598static PyObject *
1599_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1600 PyObject *attrib)
1601/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602{
1603 PyObject* elem;
1604
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605 attrib = PyDict_Copy(attrib);
1606 if (!attrib)
1607 return NULL;
1608
Eli Bendersky092af1f2012-03-04 07:14:03 +02001609 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610
1611 Py_DECREF(attrib);
1612
1613 return elem;
1614}
1615
Serhiy Storchakacb985562015-05-04 15:32:48 +03001616/*[clinic input]
1617_elementtree.Element.remove
1618
1619 subelement: object(subclass_of='&Element_Type')
1620 /
1621
1622[clinic start generated code]*/
1623
1624static PyObject *
1625_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1626/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001628 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001629 int rc;
1630 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 if (!self->extra) {
1633 /* element has no children, so raise exception */
1634 PyErr_SetString(
1635 PyExc_ValueError,
1636 "list.remove(x): x not in list"
1637 );
1638 return NULL;
1639 }
1640
1641 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001642 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001644 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001645 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001646 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001647 if (rc < 0)
1648 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649 }
1650
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001651 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001652 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653 PyErr_SetString(
1654 PyExc_ValueError,
1655 "list.remove(x): x not in list"
1656 );
1657 return NULL;
1658 }
1659
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001660 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661
1662 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001663 for (; i < self->extra->length; i++)
1664 self->extra->children[i] = self->extra->children[i+1];
1665
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001666 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001667 Py_RETURN_NONE;
1668}
1669
1670static PyObject*
1671element_repr(ElementObject* self)
1672{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001673 int status;
1674
1675 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001676 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001677
1678 status = Py_ReprEnter((PyObject *)self);
1679 if (status == 0) {
1680 PyObject *res;
1681 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1682 Py_ReprLeave((PyObject *)self);
1683 return res;
1684 }
1685 if (status > 0)
1686 PyErr_Format(PyExc_RuntimeError,
1687 "reentrant call inside %s.__repr__",
1688 Py_TYPE(self)->tp_name);
1689 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001690}
1691
Serhiy Storchakacb985562015-05-04 15:32:48 +03001692/*[clinic input]
1693_elementtree.Element.set
1694
1695 key: object
1696 value: object
1697 /
1698
1699[clinic start generated code]*/
1700
1701static PyObject *
1702_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1703 PyObject *value)
1704/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001705{
1706 PyObject* attrib;
1707
Victor Stinner5f0af232013-07-11 23:01:36 +02001708 if (!self->extra) {
1709 if (create_extra(self, NULL) < 0)
1710 return NULL;
1711 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001712
1713 attrib = element_get_attrib(self);
1714 if (!attrib)
1715 return NULL;
1716
1717 if (PyDict_SetItem(attrib, key, value) < 0)
1718 return NULL;
1719
1720 Py_RETURN_NONE;
1721}
1722
1723static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001724element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001725{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001726 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001727 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001728 PyObject* old;
1729
1730 if (!self->extra || index < 0 || index >= self->extra->length) {
1731 PyErr_SetString(
1732 PyExc_IndexError,
1733 "child assignment index out of range");
1734 return -1;
1735 }
1736
1737 old = self->extra->children[index];
1738
1739 if (item) {
1740 Py_INCREF(item);
1741 self->extra->children[index] = item;
1742 } else {
1743 self->extra->length--;
1744 for (i = index; i < self->extra->length; i++)
1745 self->extra->children[i] = self->extra->children[i+1];
1746 }
1747
1748 Py_DECREF(old);
1749
1750 return 0;
1751}
1752
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001753static PyObject*
1754element_subscr(PyObject* self_, PyObject* item)
1755{
1756 ElementObject* self = (ElementObject*) self_;
1757
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001758 if (PyIndex_Check(item)) {
1759 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001760
1761 if (i == -1 && PyErr_Occurred()) {
1762 return NULL;
1763 }
1764 if (i < 0 && self->extra)
1765 i += self->extra->length;
1766 return element_getitem(self_, i);
1767 }
1768 else if (PySlice_Check(item)) {
1769 Py_ssize_t start, stop, step, slicelen, cur, i;
1770 PyObject* list;
1771
1772 if (!self->extra)
1773 return PyList_New(0);
1774
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001775 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776 return NULL;
1777 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001778 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1779 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001780
1781 if (slicelen <= 0)
1782 return PyList_New(0);
1783 else {
1784 list = PyList_New(slicelen);
1785 if (!list)
1786 return NULL;
1787
1788 for (cur = start, i = 0; i < slicelen;
1789 cur += step, i++) {
1790 PyObject* item = self->extra->children[cur];
1791 Py_INCREF(item);
1792 PyList_SET_ITEM(list, i, item);
1793 }
1794
1795 return list;
1796 }
1797 }
1798 else {
1799 PyErr_SetString(PyExc_TypeError,
1800 "element indices must be integers");
1801 return NULL;
1802 }
1803}
1804
1805static int
1806element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1807{
1808 ElementObject* self = (ElementObject*) self_;
1809
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001810 if (PyIndex_Check(item)) {
1811 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001812
1813 if (i == -1 && PyErr_Occurred()) {
1814 return -1;
1815 }
1816 if (i < 0 && self->extra)
1817 i += self->extra->length;
1818 return element_setitem(self_, i, value);
1819 }
1820 else if (PySlice_Check(item)) {
1821 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1822
1823 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001824 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001825
Victor Stinner5f0af232013-07-11 23:01:36 +02001826 if (!self->extra) {
1827 if (create_extra(self, NULL) < 0)
1828 return -1;
1829 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001830
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001831 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001832 return -1;
1833 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001834 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1835 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001836
Eli Bendersky865756a2012-03-09 13:38:15 +02001837 if (value == NULL) {
1838 /* Delete slice */
1839 size_t cur;
1840 Py_ssize_t i;
1841
1842 if (slicelen <= 0)
1843 return 0;
1844
1845 /* Since we're deleting, the direction of the range doesn't matter,
1846 * so for simplicity make it always ascending.
1847 */
1848 if (step < 0) {
1849 stop = start + 1;
1850 start = stop + step * (slicelen - 1) - 1;
1851 step = -step;
1852 }
1853
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001854 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001855
1856 /* recycle is a list that will contain all the children
1857 * scheduled for removal.
1858 */
1859 if (!(recycle = PyList_New(slicelen))) {
1860 PyErr_NoMemory();
1861 return -1;
1862 }
1863
1864 /* This loop walks over all the children that have to be deleted,
1865 * with cur pointing at them. num_moved is the amount of children
1866 * until the next deleted child that have to be "shifted down" to
1867 * occupy the deleted's places.
1868 * Note that in the ith iteration, shifting is done i+i places down
1869 * because i children were already removed.
1870 */
1871 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1872 /* Compute how many children have to be moved, clipping at the
1873 * list end.
1874 */
1875 Py_ssize_t num_moved = step - 1;
1876 if (cur + step >= (size_t)self->extra->length) {
1877 num_moved = self->extra->length - cur - 1;
1878 }
1879
1880 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1881
1882 memmove(
1883 self->extra->children + cur - i,
1884 self->extra->children + cur + 1,
1885 num_moved * sizeof(PyObject *));
1886 }
1887
1888 /* Leftover "tail" after the last removed child */
1889 cur = start + (size_t)slicelen * step;
1890 if (cur < (size_t)self->extra->length) {
1891 memmove(
1892 self->extra->children + cur - slicelen,
1893 self->extra->children + cur,
1894 (self->extra->length - cur) * sizeof(PyObject *));
1895 }
1896
1897 self->extra->length -= slicelen;
1898
1899 /* Discard the recycle list with all the deleted sub-elements */
1900 Py_XDECREF(recycle);
1901 return 0;
1902 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001903
1904 /* A new slice is actually being assigned */
1905 seq = PySequence_Fast(value, "");
1906 if (!seq) {
1907 PyErr_Format(
1908 PyExc_TypeError,
1909 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1910 );
1911 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001912 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001913 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001914
1915 if (step != 1 && newlen != slicelen)
1916 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001917 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919 "attempt to assign sequence of size %zd "
1920 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001921 newlen, slicelen
1922 );
1923 return -1;
1924 }
1925
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001926 /* Resize before creating the recycle bin, to prevent refleaks. */
1927 if (newlen > slicelen) {
1928 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001929 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001930 return -1;
1931 }
1932 }
1933
1934 if (slicelen > 0) {
1935 /* to avoid recursive calls to this method (via decref), move
1936 old items to the recycle bin here, and get rid of them when
1937 we're done modifying the element */
1938 recycle = PyList_New(slicelen);
1939 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001940 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001941 return -1;
1942 }
1943 for (cur = start, i = 0; i < slicelen;
1944 cur += step, i++)
1945 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1946 }
1947
1948 if (newlen < slicelen) {
1949 /* delete slice */
1950 for (i = stop; i < self->extra->length; i++)
1951 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1952 } else if (newlen > slicelen) {
1953 /* insert slice */
1954 for (i = self->extra->length-1; i >= stop; i--)
1955 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1956 }
1957
1958 /* replace the slice */
1959 for (cur = start, i = 0; i < newlen;
1960 cur += step, i++) {
1961 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1962 Py_INCREF(element);
1963 self->extra->children[cur] = element;
1964 }
1965
1966 self->extra->length += newlen - slicelen;
1967
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001968 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001969
1970 /* discard the recycle bin, and everything in it */
1971 Py_XDECREF(recycle);
1972
1973 return 0;
1974 }
1975 else {
1976 PyErr_SetString(PyExc_TypeError,
1977 "element indices must be integers");
1978 return -1;
1979 }
1980}
1981
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001982static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001983element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001984{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001985 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001986 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001987 return res;
1988}
1989
Serhiy Storchakadde08152015-11-25 15:28:13 +02001990static PyObject*
1991element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001992{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001993 PyObject *res = element_get_text(self);
1994 Py_XINCREF(res);
1995 return res;
1996}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001997
Serhiy Storchakadde08152015-11-25 15:28:13 +02001998static PyObject*
1999element_tail_getter(ElementObject *self, void *closure)
2000{
2001 PyObject *res = element_get_tail(self);
2002 Py_XINCREF(res);
2003 return res;
2004}
2005
2006static PyObject*
2007element_attrib_getter(ElementObject *self, void *closure)
2008{
2009 PyObject *res;
2010 if (!self->extra) {
2011 if (create_extra(self, NULL) < 0)
2012 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002013 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002014 res = element_get_attrib(self);
2015 Py_XINCREF(res);
2016 return res;
2017}
Victor Stinner4d463432013-07-11 23:05:03 +02002018
Serhiy Storchakadde08152015-11-25 15:28:13 +02002019/* macro for setter validation */
2020#define _VALIDATE_ATTR_VALUE(V) \
2021 if ((V) == NULL) { \
2022 PyErr_SetString( \
2023 PyExc_AttributeError, \
2024 "can't delete element attribute"); \
2025 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002026 }
2027
Serhiy Storchakadde08152015-11-25 15:28:13 +02002028static int
2029element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2030{
2031 _VALIDATE_ATTR_VALUE(value);
2032 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002033 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002034 return 0;
2035}
2036
2037static int
2038element_text_setter(ElementObject *self, PyObject *value, void *closure)
2039{
2040 _VALIDATE_ATTR_VALUE(value);
2041 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002042 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002043 return 0;
2044}
2045
2046static int
2047element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2048{
2049 _VALIDATE_ATTR_VALUE(value);
2050 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002051 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002052 return 0;
2053}
2054
2055static int
2056element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2057{
2058 _VALIDATE_ATTR_VALUE(value);
2059 if (!self->extra) {
2060 if (create_extra(self, NULL) < 0)
2061 return -1;
2062 }
2063 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002064 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002065 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002066}
2067
2068static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002069 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070 0, /* sq_concat */
2071 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002072 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002073 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002074 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002075 0,
2076};
2077
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078/******************************* Element iterator ****************************/
2079
2080/* ElementIterObject represents the iteration state over an XML element in
2081 * pre-order traversal. To keep track of which sub-element should be returned
2082 * next, a stack of parents is maintained. This is a standard stack-based
2083 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002084 * The stack is managed using a continuous array.
2085 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002086 * the current one is exhausted, and the next child to examine in that parent.
2087 */
2088typedef struct ParentLocator_t {
2089 ElementObject *parent;
2090 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002091} ParentLocator;
2092
2093typedef struct {
2094 PyObject_HEAD
2095 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002096 Py_ssize_t parent_stack_used;
2097 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 ElementObject *root_element;
2099 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002100 int gettext;
2101} ElementIterObject;
2102
2103
2104static void
2105elementiter_dealloc(ElementIterObject *it)
2106{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002107 Py_ssize_t i = it->parent_stack_used;
2108 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002109 /* bpo-31095: UnTrack is needed before calling any callbacks */
2110 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002111 while (i--)
2112 Py_XDECREF(it->parent_stack[i].parent);
2113 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114
2115 Py_XDECREF(it->sought_tag);
2116 Py_XDECREF(it->root_element);
2117
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118 PyObject_GC_Del(it);
2119}
2120
2121static int
2122elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2123{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002124 Py_ssize_t i = it->parent_stack_used;
2125 while (i--)
2126 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002127
2128 Py_VISIT(it->root_element);
2129 Py_VISIT(it->sought_tag);
2130 return 0;
2131}
2132
2133/* Helper function for elementiter_next. Add a new parent to the parent stack.
2134 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002135static int
2136parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002137{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002138 ParentLocator *item;
2139
2140 if (it->parent_stack_used >= it->parent_stack_size) {
2141 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2142 ParentLocator *parent_stack = it->parent_stack;
2143 PyMem_Resize(parent_stack, ParentLocator, new_size);
2144 if (parent_stack == NULL)
2145 return -1;
2146 it->parent_stack = parent_stack;
2147 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002149 item = it->parent_stack + it->parent_stack_used++;
2150 Py_INCREF(parent);
2151 item->parent = parent;
2152 item->child_index = 0;
2153 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154}
2155
2156static PyObject *
2157elementiter_next(ElementIterObject *it)
2158{
2159 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002160 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002161 * A short note on gettext: this function serves both the iter() and
2162 * itertext() methods to avoid code duplication. However, there are a few
2163 * small differences in the way these iterations work. Namely:
2164 * - itertext() only yields text from nodes that have it, and continues
2165 * iterating when a node doesn't have text (so it doesn't return any
2166 * node like iter())
2167 * - itertext() also has to handle tail, after finishing with all the
2168 * children of a node.
2169 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002170 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002171 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002172 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002173
2174 while (1) {
2175 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002177 * iterator is exhausted.
2178 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002179 if (!it->parent_stack_used) {
2180 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002181 PyErr_SetNone(PyExc_StopIteration);
2182 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002183 }
2184
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002185 elem = it->root_element; /* steals a reference */
2186 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002187 }
2188 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002189 /* See if there are children left to traverse in the current parent. If
2190 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002191 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002192 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2193 Py_ssize_t child_index = item->child_index;
2194 ElementObjectExtra *extra;
2195 elem = item->parent;
2196 extra = elem->extra;
2197 if (!extra || child_index >= extra->length) {
2198 it->parent_stack_used--;
2199 /* Note that extra condition on it->parent_stack_used here;
2200 * this is because itertext() is supposed to only return *inner*
2201 * text, not text following the element it began iteration with.
2202 */
2203 if (it->gettext && it->parent_stack_used) {
2204 text = element_get_tail(elem);
2205 goto gettext;
2206 }
2207 Py_DECREF(elem);
2208 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002209 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002210
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07002211 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002212 PyErr_Format(PyExc_AttributeError,
2213 "'%.100s' object has no attribute 'iter'",
2214 Py_TYPE(extra->children[child_index])->tp_name);
2215 return NULL;
2216 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002217 elem = (ElementObject *)extra->children[child_index];
2218 item->child_index++;
2219 Py_INCREF(elem);
2220 }
2221
2222 if (parent_stack_push_new(it, elem) < 0) {
2223 Py_DECREF(elem);
2224 PyErr_NoMemory();
2225 return NULL;
2226 }
2227 if (it->gettext) {
2228 text = element_get_text(elem);
2229 goto gettext;
2230 }
2231
2232 if (it->sought_tag == Py_None)
2233 return (PyObject *)elem;
2234
2235 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2236 if (rc > 0)
2237 return (PyObject *)elem;
2238
2239 Py_DECREF(elem);
2240 if (rc < 0)
2241 return NULL;
2242 continue;
2243
2244gettext:
2245 if (!text) {
2246 Py_DECREF(elem);
2247 return NULL;
2248 }
2249 if (text == Py_None) {
2250 Py_DECREF(elem);
2251 }
2252 else {
2253 Py_INCREF(text);
2254 Py_DECREF(elem);
2255 rc = PyObject_IsTrue(text);
2256 if (rc > 0)
2257 return text;
2258 Py_DECREF(text);
2259 if (rc < 0)
2260 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002261 }
2262 }
2263
2264 return NULL;
2265}
2266
2267
2268static PyTypeObject ElementIter_Type = {
2269 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002270 /* Using the module's name since the pure-Python implementation does not
2271 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002272 "_elementtree._element_iterator", /* tp_name */
2273 sizeof(ElementIterObject), /* tp_basicsize */
2274 0, /* tp_itemsize */
2275 /* methods */
2276 (destructor)elementiter_dealloc, /* tp_dealloc */
2277 0, /* tp_print */
2278 0, /* tp_getattr */
2279 0, /* tp_setattr */
2280 0, /* tp_reserved */
2281 0, /* tp_repr */
2282 0, /* tp_as_number */
2283 0, /* tp_as_sequence */
2284 0, /* tp_as_mapping */
2285 0, /* tp_hash */
2286 0, /* tp_call */
2287 0, /* tp_str */
2288 0, /* tp_getattro */
2289 0, /* tp_setattro */
2290 0, /* tp_as_buffer */
2291 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2292 0, /* tp_doc */
2293 (traverseproc)elementiter_traverse, /* tp_traverse */
2294 0, /* tp_clear */
2295 0, /* tp_richcompare */
2296 0, /* tp_weaklistoffset */
2297 PyObject_SelfIter, /* tp_iter */
2298 (iternextfunc)elementiter_next, /* tp_iternext */
2299 0, /* tp_methods */
2300 0, /* tp_members */
2301 0, /* tp_getset */
2302 0, /* tp_base */
2303 0, /* tp_dict */
2304 0, /* tp_descr_get */
2305 0, /* tp_descr_set */
2306 0, /* tp_dictoffset */
2307 0, /* tp_init */
2308 0, /* tp_alloc */
2309 0, /* tp_new */
2310};
2311
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002312#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002313
2314static PyObject *
2315create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2316{
2317 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002318
2319 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2320 if (!it)
2321 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002322
Victor Stinner4d463432013-07-11 23:05:03 +02002323 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002324 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002325 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002326 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002327 it->root_element = self;
2328
Eli Bendersky64d11e62012-06-15 07:42:50 +03002329 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002330
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002331 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002332 if (it->parent_stack == NULL) {
2333 Py_DECREF(it);
2334 PyErr_NoMemory();
2335 return NULL;
2336 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002337 it->parent_stack_used = 0;
2338 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002339
Eli Bendersky64d11e62012-06-15 07:42:50 +03002340 return (PyObject *)it;
2341}
2342
2343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002344/* ==================================================================== */
2345/* the tree builder type */
2346
2347typedef struct {
2348 PyObject_HEAD
2349
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002351
Antoine Pitrouee329312012-10-04 19:53:29 +02002352 PyObject *this; /* current node */
2353 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356
Eli Bendersky58d548d2012-05-29 15:45:16 +03002357 PyObject *stack; /* element stack */
2358 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002359
Eli Bendersky48d358b2012-05-30 17:57:50 +03002360 PyObject *element_factory;
2361
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002363 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002364 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2365 PyObject *end_event_obj;
2366 PyObject *start_ns_event_obj;
2367 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368} TreeBuilderObject;
2369
Christian Heimes90aa7642007-12-19 02:45:37 +00002370#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371
2372/* -------------------------------------------------------------------- */
2373/* constructor and destructor */
2374
Eli Bendersky58d548d2012-05-29 15:45:16 +03002375static PyObject *
2376treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002378 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2379 if (t != NULL) {
2380 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381
Eli Bendersky58d548d2012-05-29 15:45:16 +03002382 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002383 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002384 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002385 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
Eli Bendersky58d548d2012-05-29 15:45:16 +03002387 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002388 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002389 t->stack = PyList_New(20);
2390 if (!t->stack) {
2391 Py_DECREF(t->this);
2392 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002393 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002394 return NULL;
2395 }
2396 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002398 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002399 t->start_event_obj = t->end_event_obj = NULL;
2400 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2401 }
2402 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403}
2404
Serhiy Storchakacb985562015-05-04 15:32:48 +03002405/*[clinic input]
2406_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002407
Serhiy Storchakacb985562015-05-04 15:32:48 +03002408 element_factory: object = NULL
2409
2410[clinic start generated code]*/
2411
2412static int
2413_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2414 PyObject *element_factory)
2415/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2416{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002417 if (element_factory) {
2418 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002419 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420 }
2421
Eli Bendersky58d548d2012-05-29 15:45:16 +03002422 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423}
2424
Eli Bendersky48d358b2012-05-30 17:57:50 +03002425static int
2426treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2427{
2428 Py_VISIT(self->root);
2429 Py_VISIT(self->this);
2430 Py_VISIT(self->last);
2431 Py_VISIT(self->data);
2432 Py_VISIT(self->stack);
2433 Py_VISIT(self->element_factory);
2434 return 0;
2435}
2436
2437static int
2438treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002440 Py_CLEAR(self->end_ns_event_obj);
2441 Py_CLEAR(self->start_ns_event_obj);
2442 Py_CLEAR(self->end_event_obj);
2443 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002444 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002445 Py_CLEAR(self->stack);
2446 Py_CLEAR(self->data);
2447 Py_CLEAR(self->last);
2448 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002449 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002450 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002451 return 0;
2452}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002453
Eli Bendersky48d358b2012-05-30 17:57:50 +03002454static void
2455treebuilder_dealloc(TreeBuilderObject *self)
2456{
2457 PyObject_GC_UnTrack(self);
2458 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002459 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002460}
2461
2462/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002463/* helpers for handling of arbitrary element-like objects */
2464
2465static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002466treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002467 PyObject **dest, _Py_Identifier *name)
2468{
2469 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002470 PyObject *tmp = JOIN_OBJ(*dest);
2471 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2472 *data = NULL;
2473 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002474 return 0;
2475 }
2476 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002477 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002478 int r;
2479 if (joined == NULL)
2480 return -1;
2481 r = _PyObject_SetAttrId(element, name, joined);
2482 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002483 if (r < 0)
2484 return -1;
2485 Py_CLEAR(*data);
2486 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002487 }
2488}
2489
Serhiy Storchaka576def02017-03-30 09:47:31 +03002490LOCAL(int)
2491treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002492{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002493 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002494
Serhiy Storchaka576def02017-03-30 09:47:31 +03002495 if (!self->data) {
2496 return 0;
2497 }
2498
2499 if (self->this == element) {
2500 _Py_IDENTIFIER(text);
2501 return treebuilder_set_element_text_or_tail(
2502 element, &self->data,
2503 &((ElementObject *) element)->text, &PyId_text);
2504 }
2505 else {
2506 _Py_IDENTIFIER(tail);
2507 return treebuilder_set_element_text_or_tail(
2508 element, &self->data,
2509 &((ElementObject *) element)->tail, &PyId_tail);
2510 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002511}
2512
2513static int
2514treebuilder_add_subelement(PyObject *element, PyObject *child)
2515{
2516 _Py_IDENTIFIER(append);
2517 if (Element_CheckExact(element)) {
2518 ElementObject *elem = (ElementObject *) element;
2519 return element_add_subelement(elem, child);
2520 }
2521 else {
2522 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002523 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002524 if (res == NULL)
2525 return -1;
2526 Py_DECREF(res);
2527 return 0;
2528 }
2529}
2530
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002531LOCAL(int)
2532treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2533 PyObject *node)
2534{
2535 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002536 PyObject *res;
2537 PyObject *event = PyTuple_Pack(2, action, node);
2538 if (event == NULL)
2539 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002540 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002541 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002542 if (res == NULL)
2543 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002544 Py_DECREF(res);
2545 }
2546 return 0;
2547}
2548
Antoine Pitrouee329312012-10-04 19:53:29 +02002549/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550/* handlers */
2551
2552LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2554 PyObject* attrib)
2555{
2556 PyObject* node;
2557 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002558 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559
Serhiy Storchaka576def02017-03-30 09:47:31 +03002560 if (treebuilder_flush_data(self) < 0) {
2561 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 }
2563
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002564 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002565 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002566 } else if (attrib == Py_None) {
2567 attrib = PyDict_New();
2568 if (!attrib)
2569 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002570 node = PyObject_CallFunctionObjArgs(self->element_factory,
2571 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002572 Py_DECREF(attrib);
2573 }
2574 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002575 node = PyObject_CallFunctionObjArgs(self->element_factory,
2576 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002577 }
2578 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002580 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581
Antoine Pitrouee329312012-10-04 19:53:29 +02002582 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583
2584 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002585 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002586 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587 } else {
2588 if (self->root) {
2589 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002590 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 "multiple elements on top level"
2592 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002593 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 }
2595 Py_INCREF(node);
2596 self->root = node;
2597 }
2598
2599 if (self->index < PyList_GET_SIZE(self->stack)) {
2600 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002601 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 Py_INCREF(this);
2603 } else {
2604 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002605 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 }
2607 self->index++;
2608
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002609 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002610 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002612 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002614 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2615 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616
2617 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002618
2619 error:
2620 Py_DECREF(node);
2621 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622}
2623
2624LOCAL(PyObject*)
2625treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2626{
2627 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002628 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002629 /* ignore calls to data before the first call to start */
2630 Py_RETURN_NONE;
2631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 /* store the first item as is */
2633 Py_INCREF(data); self->data = data;
2634 } else {
2635 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002636 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2637 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002638 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639 /* expat often generates single character data sections; handle
2640 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002641 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2642 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002644 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645 } else if (PyList_CheckExact(self->data)) {
2646 if (PyList_Append(self->data, data) < 0)
2647 return NULL;
2648 } else {
2649 PyObject* list = PyList_New(2);
2650 if (!list)
2651 return NULL;
2652 PyList_SET_ITEM(list, 0, self->data);
2653 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2654 self->data = list;
2655 }
2656 }
2657
2658 Py_RETURN_NONE;
2659}
2660
2661LOCAL(PyObject*)
2662treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2663{
2664 PyObject* item;
2665
Serhiy Storchaka576def02017-03-30 09:47:31 +03002666 if (treebuilder_flush_data(self) < 0) {
2667 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668 }
2669
2670 if (self->index == 0) {
2671 PyErr_SetString(
2672 PyExc_IndexError,
2673 "pop from empty stack"
2674 );
2675 return NULL;
2676 }
2677
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002678 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002679 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002680 self->index--;
2681 self->this = PyList_GET_ITEM(self->stack, self->index);
2682 Py_INCREF(self->this);
2683 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002685 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2686 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687
2688 Py_INCREF(self->last);
2689 return (PyObject*) self->last;
2690}
2691
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692/* -------------------------------------------------------------------- */
2693/* methods (in alphabetical order) */
2694
Serhiy Storchakacb985562015-05-04 15:32:48 +03002695/*[clinic input]
2696_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697
Serhiy Storchakacb985562015-05-04 15:32:48 +03002698 data: object
2699 /
2700
2701[clinic start generated code]*/
2702
2703static PyObject *
2704_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2705/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2706{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707 return treebuilder_handle_data(self, data);
2708}
2709
Serhiy Storchakacb985562015-05-04 15:32:48 +03002710/*[clinic input]
2711_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712
Serhiy Storchakacb985562015-05-04 15:32:48 +03002713 tag: object
2714 /
2715
2716[clinic start generated code]*/
2717
2718static PyObject *
2719_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2720/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2721{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722 return treebuilder_handle_end(self, tag);
2723}
2724
2725LOCAL(PyObject*)
2726treebuilder_done(TreeBuilderObject* self)
2727{
2728 PyObject* res;
2729
2730 /* FIXME: check stack size? */
2731
2732 if (self->root)
2733 res = self->root;
2734 else
2735 res = Py_None;
2736
2737 Py_INCREF(res);
2738 return res;
2739}
2740
Serhiy Storchakacb985562015-05-04 15:32:48 +03002741/*[clinic input]
2742_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743
Serhiy Storchakacb985562015-05-04 15:32:48 +03002744[clinic start generated code]*/
2745
2746static PyObject *
2747_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2748/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2749{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 return treebuilder_done(self);
2751}
2752
Serhiy Storchakacb985562015-05-04 15:32:48 +03002753/*[clinic input]
2754_elementtree.TreeBuilder.start
2755
2756 tag: object
2757 attrs: object = None
2758 /
2759
2760[clinic start generated code]*/
2761
2762static PyObject *
2763_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2764 PyObject *attrs)
2765/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002767 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768}
2769
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770/* ==================================================================== */
2771/* the expat interface */
2772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002775
2776/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2777 * cached globally without being in per-module state.
2778 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002779static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781
Eli Bendersky52467b12012-06-01 07:13:08 +03002782static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2783 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2784
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785typedef struct {
2786 PyObject_HEAD
2787
2788 XML_Parser parser;
2789
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002790 PyObject *target;
2791 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002793 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002795 PyObject *handle_start;
2796 PyObject *handle_data;
2797 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002799 PyObject *handle_comment;
2800 PyObject *handle_pi;
2801 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002803 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002804
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805} XMLParserObject;
2806
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002807static PyObject*
Serhiy Storchakaa5552f02017-12-15 13:11:11 +02002808_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002809static PyObject *
2810_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2811 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002812
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813/* helpers */
2814
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815LOCAL(PyObject*)
2816makeuniversal(XMLParserObject* self, const char* string)
2817{
2818 /* convert a UTF-8 tag/attribute name from the expat parser
2819 to a universal name string */
2820
Antoine Pitrouc1948842012-10-01 23:40:37 +02002821 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822 PyObject* key;
2823 PyObject* value;
2824
2825 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002826 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 if (!key)
2828 return NULL;
2829
2830 value = PyDict_GetItem(self->names, key);
2831
2832 if (value) {
2833 Py_INCREF(value);
2834 } else {
2835 /* new name. convert to universal name, and decode as
2836 necessary */
2837
2838 PyObject* tag;
2839 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002840 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841
2842 /* look for namespace separator */
2843 for (i = 0; i < size; i++)
2844 if (string[i] == '}')
2845 break;
2846 if (i != size) {
2847 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002848 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002849 if (tag == NULL) {
2850 Py_DECREF(key);
2851 return NULL;
2852 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002853 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002854 p[0] = '{';
2855 memcpy(p+1, string, size);
2856 size++;
2857 } else {
2858 /* plain name; use key as tag */
2859 Py_INCREF(key);
2860 tag = key;
2861 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002862
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002863 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002864 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002865 value = PyUnicode_DecodeUTF8(p, size, "strict");
2866 Py_DECREF(tag);
2867 if (!value) {
2868 Py_DECREF(key);
2869 return NULL;
2870 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002871
2872 /* add to names dictionary */
2873 if (PyDict_SetItem(self->names, key, value) < 0) {
2874 Py_DECREF(key);
2875 Py_DECREF(value);
2876 return NULL;
2877 }
2878 }
2879
2880 Py_DECREF(key);
2881 return value;
2882}
2883
Eli Bendersky5b77d812012-03-16 08:20:05 +02002884/* Set the ParseError exception with the given parameters.
2885 * If message is not NULL, it's used as the error string. Otherwise, the
2886 * message string is the default for the given error_code.
2887*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002888static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002889expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2890 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002892 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002893 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002894
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002895 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002896 message ? message : EXPAT(ErrorString)(error_code),
2897 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002898 if (errmsg == NULL)
2899 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002900
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002901 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002902 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903 if (!error)
2904 return;
2905
Eli Bendersky5b77d812012-03-16 08:20:05 +02002906 /* Add code and position attributes */
2907 code = PyLong_FromLong((long)error_code);
2908 if (!code) {
2909 Py_DECREF(error);
2910 return;
2911 }
2912 if (PyObject_SetAttrString(error, "code", code) == -1) {
2913 Py_DECREF(error);
2914 Py_DECREF(code);
2915 return;
2916 }
2917 Py_DECREF(code);
2918
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002919 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002920 if (!position) {
2921 Py_DECREF(error);
2922 return;
2923 }
2924 if (PyObject_SetAttrString(error, "position", position) == -1) {
2925 Py_DECREF(error);
2926 Py_DECREF(position);
2927 return;
2928 }
2929 Py_DECREF(position);
2930
Eli Bendersky532d03e2013-08-10 08:00:39 -07002931 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 Py_DECREF(error);
2933}
2934
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935/* -------------------------------------------------------------------- */
2936/* handlers */
2937
2938static void
2939expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2940 int data_len)
2941{
2942 PyObject* key;
2943 PyObject* value;
2944 PyObject* res;
2945
2946 if (data_len < 2 || data_in[0] != '&')
2947 return;
2948
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002949 if (PyErr_Occurred())
2950 return;
2951
Neal Norwitz0269b912007-08-08 06:56:02 +00002952 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953 if (!key)
2954 return;
2955
2956 value = PyDict_GetItem(self->entity, key);
2957
2958 if (value) {
2959 if (TreeBuilder_CheckExact(self->target))
2960 res = treebuilder_handle_data(
2961 (TreeBuilderObject*) self->target, value
2962 );
2963 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002964 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 else
2966 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002968 } else if (!PyErr_Occurred()) {
2969 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002970 char message[128] = "undefined entity ";
2971 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002972 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002973 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002975 EXPAT(GetErrorColumnNumber)(self->parser),
2976 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 );
2978 }
2979
2980 Py_DECREF(key);
2981}
2982
2983static void
2984expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2985 const XML_Char **attrib_in)
2986{
2987 PyObject* res;
2988 PyObject* tag;
2989 PyObject* attrib;
2990 int ok;
2991
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002992 if (PyErr_Occurred())
2993 return;
2994
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 /* tag name */
2996 tag = makeuniversal(self, tag_in);
2997 if (!tag)
2998 return; /* parser will look for errors */
2999
3000 /* attributes */
3001 if (attrib_in[0]) {
3002 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003003 if (!attrib) {
3004 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003006 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003007 while (attrib_in[0] && attrib_in[1]) {
3008 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003009 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 if (!key || !value) {
3011 Py_XDECREF(value);
3012 Py_XDECREF(key);
3013 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003014 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015 return;
3016 }
3017 ok = PyDict_SetItem(attrib, key, value);
3018 Py_DECREF(value);
3019 Py_DECREF(key);
3020 if (ok < 0) {
3021 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003022 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 return;
3024 }
3025 attrib_in += 2;
3026 }
3027 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003028 Py_INCREF(Py_None);
3029 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003030 }
3031
3032 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 /* shortcut */
3034 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3035 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003036 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003037 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003038 if (attrib == Py_None) {
3039 Py_DECREF(attrib);
3040 attrib = PyDict_New();
3041 if (!attrib) {
3042 Py_DECREF(tag);
3043 return;
3044 }
3045 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003046 res = PyObject_CallFunctionObjArgs(self->handle_start,
3047 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003048 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003049 res = NULL;
3050
3051 Py_DECREF(tag);
3052 Py_DECREF(attrib);
3053
3054 Py_XDECREF(res);
3055}
3056
3057static void
3058expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3059 int data_len)
3060{
3061 PyObject* data;
3062 PyObject* res;
3063
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003064 if (PyErr_Occurred())
3065 return;
3066
Neal Norwitz0269b912007-08-08 06:56:02 +00003067 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003068 if (!data)
3069 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070
3071 if (TreeBuilder_CheckExact(self->target))
3072 /* shortcut */
3073 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3074 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003075 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076 else
3077 res = NULL;
3078
3079 Py_DECREF(data);
3080
3081 Py_XDECREF(res);
3082}
3083
3084static void
3085expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3086{
3087 PyObject* tag;
3088 PyObject* res = NULL;
3089
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003090 if (PyErr_Occurred())
3091 return;
3092
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093 if (TreeBuilder_CheckExact(self->target))
3094 /* shortcut */
3095 /* the standard tree builder doesn't look at the end tag */
3096 res = treebuilder_handle_end(
3097 (TreeBuilderObject*) self->target, Py_None
3098 );
3099 else if (self->handle_end) {
3100 tag = makeuniversal(self, tag_in);
3101 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003102 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103 Py_DECREF(tag);
3104 }
3105 }
3106
3107 Py_XDECREF(res);
3108}
3109
3110static void
3111expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3112 const XML_Char *uri)
3113{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003114 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3115 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003116
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003117 if (PyErr_Occurred())
3118 return;
3119
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003120 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003121 return;
3122
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003123 if (!uri)
3124 uri = "";
3125 if (!prefix)
3126 prefix = "";
3127
3128 parcel = Py_BuildValue("ss", prefix, uri);
3129 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003130 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003131 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3132 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133}
3134
3135static void
3136expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3137{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003138 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3139
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003140 if (PyErr_Occurred())
3141 return;
3142
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003143 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003144 return;
3145
3146 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003147}
3148
3149static void
3150expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3151{
3152 PyObject* comment;
3153 PyObject* res;
3154
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003155 if (PyErr_Occurred())
3156 return;
3157
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003158 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003159 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003160 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003161 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3162 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163 Py_XDECREF(res);
3164 Py_DECREF(comment);
3165 }
3166 }
3167}
3168
Eli Bendersky45839902013-01-13 05:14:47 -08003169static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003170expat_start_doctype_handler(XMLParserObject *self,
3171 const XML_Char *doctype_name,
3172 const XML_Char *sysid,
3173 const XML_Char *pubid,
3174 int has_internal_subset)
3175{
3176 PyObject *self_pyobj = (PyObject *)self;
3177 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3178 PyObject *parser_doctype = NULL;
3179 PyObject *res = NULL;
3180
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003181 if (PyErr_Occurred())
3182 return;
3183
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003184 doctype_name_obj = makeuniversal(self, doctype_name);
3185 if (!doctype_name_obj)
3186 return;
3187
3188 if (sysid) {
3189 sysid_obj = makeuniversal(self, sysid);
3190 if (!sysid_obj) {
3191 Py_DECREF(doctype_name_obj);
3192 return;
3193 }
3194 } else {
3195 Py_INCREF(Py_None);
3196 sysid_obj = Py_None;
3197 }
3198
3199 if (pubid) {
3200 pubid_obj = makeuniversal(self, pubid);
3201 if (!pubid_obj) {
3202 Py_DECREF(doctype_name_obj);
3203 Py_DECREF(sysid_obj);
3204 return;
3205 }
3206 } else {
3207 Py_INCREF(Py_None);
3208 pubid_obj = Py_None;
3209 }
3210
3211 /* If the target has a handler for doctype, call it. */
3212 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003213 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3214 doctype_name_obj, pubid_obj,
3215 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003216 Py_CLEAR(res);
3217 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003218 else {
3219 /* Now see if the parser itself has a doctype method. If yes and it's
3220 * a custom method, call it but warn about deprecation. If it's only
3221 * the vanilla XMLParser method, do nothing.
3222 */
3223 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3224 if (parser_doctype &&
3225 !(PyCFunction_Check(parser_doctype) &&
3226 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3227 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003228 (PyCFunction) _elementtree_XMLParser_doctype)) {
3229 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3230 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003231 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003232 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003233 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003234 res = PyObject_CallFunctionObjArgs(parser_doctype,
3235 doctype_name_obj, pubid_obj,
3236 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003237 Py_CLEAR(res);
3238 }
3239 }
3240
3241clear:
3242 Py_XDECREF(parser_doctype);
3243 Py_DECREF(doctype_name_obj);
3244 Py_DECREF(pubid_obj);
3245 Py_DECREF(sysid_obj);
3246}
3247
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248static void
3249expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3250 const XML_Char* data_in)
3251{
3252 PyObject* target;
3253 PyObject* data;
3254 PyObject* res;
3255
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003256 if (PyErr_Occurred())
3257 return;
3258
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003260 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3261 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003263 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3264 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 Py_XDECREF(res);
3266 Py_DECREF(data);
3267 Py_DECREF(target);
3268 } else {
3269 Py_XDECREF(data);
3270 Py_XDECREF(target);
3271 }
3272 }
3273}
3274
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276
Eli Bendersky52467b12012-06-01 07:13:08 +03003277static PyObject *
3278xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279{
Eli Bendersky52467b12012-06-01 07:13:08 +03003280 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3281 if (self) {
3282 self->parser = NULL;
3283 self->target = self->entity = self->names = NULL;
3284 self->handle_start = self->handle_data = self->handle_end = NULL;
3285 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003286 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003288 return (PyObject *)self;
3289}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290
scoderc8d8e152017-09-14 22:00:03 +02003291static int
3292ignore_attribute_error(PyObject *value)
3293{
3294 if (value == NULL) {
3295 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3296 return -1;
3297 }
3298 PyErr_Clear();
3299 }
3300 return 0;
3301}
3302
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303/*[clinic input]
3304_elementtree.XMLParser.__init__
3305
3306 html: object = NULL
3307 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003308 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003309
3310[clinic start generated code]*/
3311
Eli Bendersky52467b12012-06-01 07:13:08 +03003312static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003313_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3314 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003315/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003316{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003317 if (html != NULL) {
3318 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3319 "The html argument of XMLParser() is deprecated",
3320 1) < 0) {
3321 return -1;
3322 }
3323 }
3324
Serhiy Storchakacb985562015-05-04 15:32:48 +03003325 self->entity = PyDict_New();
3326 if (!self->entity)
3327 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003328
Serhiy Storchakacb985562015-05-04 15:32:48 +03003329 self->names = PyDict_New();
3330 if (!self->names) {
3331 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003332 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003334
Serhiy Storchakacb985562015-05-04 15:32:48 +03003335 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3336 if (!self->parser) {
3337 Py_CLEAR(self->entity);
3338 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003340 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003341 }
Miss Islington (bot)470a4352018-09-18 06:11:09 -07003342 /* expat < 2.1.0 has no XML_SetHashSalt() */
3343 if (EXPAT(SetHashSalt) != NULL) {
3344 EXPAT(SetHashSalt)(self->parser,
3345 (unsigned long)_Py_HashSecret.expat.hashsalt);
3346 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347
Eli Bendersky52467b12012-06-01 07:13:08 +03003348 if (target) {
3349 Py_INCREF(target);
3350 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003351 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003353 Py_CLEAR(self->entity);
3354 Py_CLEAR(self->names);
3355 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003356 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003358 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360
Serhiy Storchakacb985562015-05-04 15:32:48 +03003361 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003362 if (ignore_attribute_error(self->handle_start)) {
3363 return -1;
3364 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003366 if (ignore_attribute_error(self->handle_data)) {
3367 return -1;
3368 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003370 if (ignore_attribute_error(self->handle_end)) {
3371 return -1;
3372 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003373 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003374 if (ignore_attribute_error(self->handle_comment)) {
3375 return -1;
3376 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003378 if (ignore_attribute_error(self->handle_pi)) {
3379 return -1;
3380 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003381 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003382 if (ignore_attribute_error(self->handle_close)) {
3383 return -1;
3384 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003385 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003386 if (ignore_attribute_error(self->handle_doctype)) {
3387 return -1;
3388 }
Eli Bendersky45839902013-01-13 05:14:47 -08003389
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003393 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 (XML_StartElementHandler) expat_start_handler,
3395 (XML_EndElementHandler) expat_end_handler
3396 );
3397 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003398 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 (XML_DefaultHandler) expat_default_handler
3400 );
3401 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003402 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403 (XML_CharacterDataHandler) expat_data_handler
3404 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003405 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003407 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408 (XML_CommentHandler) expat_comment_handler
3409 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003410 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003412 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 (XML_ProcessingInstructionHandler) expat_pi_handler
3414 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003415 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003416 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003417 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3418 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003421 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003423
Eli Bendersky52467b12012-06-01 07:13:08 +03003424 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003425}
3426
Eli Bendersky52467b12012-06-01 07:13:08 +03003427static int
3428xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3429{
3430 Py_VISIT(self->handle_close);
3431 Py_VISIT(self->handle_pi);
3432 Py_VISIT(self->handle_comment);
3433 Py_VISIT(self->handle_end);
3434 Py_VISIT(self->handle_data);
3435 Py_VISIT(self->handle_start);
3436
3437 Py_VISIT(self->target);
3438 Py_VISIT(self->entity);
3439 Py_VISIT(self->names);
3440
3441 return 0;
3442}
3443
3444static int
3445xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446{
Victor Stinnere727d412017-09-18 05:29:37 -07003447 if (self->parser != NULL) {
3448 XML_Parser parser = self->parser;
3449 self->parser = NULL;
3450 EXPAT(ParserFree)(parser);
3451 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452
Antoine Pitrouc1948842012-10-01 23:40:37 +02003453 Py_CLEAR(self->handle_close);
3454 Py_CLEAR(self->handle_pi);
3455 Py_CLEAR(self->handle_comment);
3456 Py_CLEAR(self->handle_end);
3457 Py_CLEAR(self->handle_data);
3458 Py_CLEAR(self->handle_start);
3459 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460
Antoine Pitrouc1948842012-10-01 23:40:37 +02003461 Py_CLEAR(self->target);
3462 Py_CLEAR(self->entity);
3463 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464
Eli Bendersky52467b12012-06-01 07:13:08 +03003465 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003466}
3467
Eli Bendersky52467b12012-06-01 07:13:08 +03003468static void
3469xmlparser_dealloc(XMLParserObject* self)
3470{
3471 PyObject_GC_UnTrack(self);
3472 xmlparser_gc_clear(self);
3473 Py_TYPE(self)->tp_free((PyObject *)self);
3474}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475
3476LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003477expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478{
3479 int ok;
3480
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003481 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3483
3484 if (PyErr_Occurred())
3485 return NULL;
3486
3487 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003488 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003489 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003491 EXPAT(GetErrorColumnNumber)(self->parser),
3492 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003493 );
3494 return NULL;
3495 }
3496
3497 Py_RETURN_NONE;
3498}
3499
Serhiy Storchakacb985562015-05-04 15:32:48 +03003500/*[clinic input]
3501_elementtree.XMLParser.close
3502
3503[clinic start generated code]*/
3504
3505static PyObject *
3506_elementtree_XMLParser_close_impl(XMLParserObject *self)
3507/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508{
3509 /* end feeding data to parser */
3510
3511 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003513 if (!res)
3514 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003516 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 Py_DECREF(res);
3518 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003519 }
3520 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003521 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003522 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003523 }
3524 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003525 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527}
3528
Serhiy Storchakacb985562015-05-04 15:32:48 +03003529/*[clinic input]
3530_elementtree.XMLParser.feed
3531
3532 data: object
3533 /
3534
3535[clinic start generated code]*/
3536
3537static PyObject *
3538_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3539/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540{
3541 /* feed data to parser */
3542
Serhiy Storchakacb985562015-05-04 15:32:48 +03003543 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003544 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3546 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003547 return NULL;
3548 if (data_len > INT_MAX) {
3549 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3550 return NULL;
3551 }
3552 /* Explicitly set UTF-8 encoding. Return code ignored. */
3553 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003554 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003555 }
3556 else {
3557 Py_buffer view;
3558 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003559 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003560 return NULL;
3561 if (view.len > INT_MAX) {
3562 PyBuffer_Release(&view);
3563 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3564 return NULL;
3565 }
3566 res = expat_parse(self, view.buf, (int)view.len, 0);
3567 PyBuffer_Release(&view);
3568 return res;
3569 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003570}
3571
Serhiy Storchakacb985562015-05-04 15:32:48 +03003572/*[clinic input]
3573_elementtree.XMLParser._parse_whole
3574
3575 file: object
3576 /
3577
3578[clinic start generated code]*/
3579
3580static PyObject *
3581_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3582/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583{
Eli Benderskya3699232013-05-19 18:47:23 -07003584 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 PyObject* reader;
3586 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003587 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 PyObject* res;
3589
Serhiy Storchakacb985562015-05-04 15:32:48 +03003590 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 if (!reader)
3592 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003593
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 /* read from open file object */
3595 for (;;) {
3596
3597 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3598
3599 if (!buffer) {
3600 /* read failed (e.g. due to KeyboardInterrupt) */
3601 Py_DECREF(reader);
3602 return NULL;
3603 }
3604
Eli Benderskyf996e772012-03-16 05:53:30 +02003605 if (PyUnicode_CheckExact(buffer)) {
3606 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003607 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003608 Py_DECREF(buffer);
3609 break;
3610 }
3611 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003612 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003613 if (!temp) {
3614 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003615 Py_DECREF(reader);
3616 return NULL;
3617 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003618 buffer = temp;
3619 }
3620 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621 Py_DECREF(buffer);
3622 break;
3623 }
3624
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003625 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3626 Py_DECREF(buffer);
3627 Py_DECREF(reader);
3628 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3629 return NULL;
3630 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003632 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 );
3634
3635 Py_DECREF(buffer);
3636
3637 if (!res) {
3638 Py_DECREF(reader);
3639 return NULL;
3640 }
3641 Py_DECREF(res);
3642
3643 }
3644
3645 Py_DECREF(reader);
3646
3647 res = expat_parse(self, "", 0, 1);
3648
3649 if (res && TreeBuilder_CheckExact(self->target)) {
3650 Py_DECREF(res);
3651 return treebuilder_done((TreeBuilderObject*) self->target);
3652 }
3653
3654 return res;
3655}
3656
Serhiy Storchakacb985562015-05-04 15:32:48 +03003657/*[clinic input]
3658_elementtree.XMLParser.doctype
3659
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003660 name: object
3661 pubid: object
3662 system: object
3663 /
3664
Serhiy Storchakacb985562015-05-04 15:32:48 +03003665[clinic start generated code]*/
3666
3667static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003668_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3669 PyObject *pubid, PyObject *system)
3670/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003671{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003672 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3673 "This method of XMLParser is deprecated. Define"
3674 " doctype() method on the TreeBuilder target.",
3675 1) < 0) {
3676 return NULL;
3677 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003678 Py_RETURN_NONE;
3679}
3680
Serhiy Storchakacb985562015-05-04 15:32:48 +03003681/*[clinic input]
3682_elementtree.XMLParser._setevents
3683
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003684 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003685 events_to_report: object = None
3686 /
3687
3688[clinic start generated code]*/
3689
3690static PyObject *
3691_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3692 PyObject *events_queue,
3693 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003694/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695{
3696 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003697 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003698 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003699 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700
3701 if (!TreeBuilder_CheckExact(self->target)) {
3702 PyErr_SetString(
3703 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003704 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003705 "targets"
3706 );
3707 return NULL;
3708 }
3709
3710 target = (TreeBuilderObject*) self->target;
3711
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003712 events_append = PyObject_GetAttrString(events_queue, "append");
3713 if (events_append == NULL)
3714 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003715 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003716
3717 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003718 Py_CLEAR(target->start_event_obj);
3719 Py_CLEAR(target->end_event_obj);
3720 Py_CLEAR(target->start_ns_event_obj);
3721 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003723 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003725 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003726 Py_RETURN_NONE;
3727 }
3728
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003729 if (!(events_seq = PySequence_Fast(events_to_report,
3730 "events must be a sequence"))) {
3731 return NULL;
3732 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003734 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003735 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003736 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003737 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003738 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003739 } else if (PyBytes_Check(event_name_obj)) {
3740 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003741 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003742 if (event_name == NULL) {
3743 Py_DECREF(events_seq);
3744 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3745 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003746 }
3747
3748 Py_INCREF(event_name_obj);
3749 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003750 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003751 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003752 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003753 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003754 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003755 EXPAT(SetNamespaceDeclHandler)(
3756 self->parser,
3757 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3758 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3759 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003760 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003761 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762 EXPAT(SetNamespaceDeclHandler)(
3763 self->parser,
3764 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3765 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3766 );
3767 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003768 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003769 Py_DECREF(events_seq);
3770 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003771 return NULL;
3772 }
3773 }
3774
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003775 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003776 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003777}
3778
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003779static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003780xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003781{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003782 if (PyUnicode_Check(nameobj)) {
3783 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003784 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003785 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003786 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003787 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003788 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003789 return PyUnicode_FromFormat(
3790 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003791 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003792 }
3793 else
3794 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003795
Alexander Belopolskye239d232010-12-08 23:31:48 +00003796 Py_INCREF(res);
3797 return res;
3798 }
3799 generic:
3800 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003801}
3802
Serhiy Storchakacb985562015-05-04 15:32:48 +03003803#include "clinic/_elementtree.c.h"
3804
3805static PyMethodDef element_methods[] = {
3806
3807 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3808
3809 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3810 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3811
3812 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3813 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3814 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3815
3816 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3817 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3818 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3819 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3820
3821 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3822 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3823 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3824
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003825 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003826 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3827
3828 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3829 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3830
3831 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3832
3833 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3834 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3835 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3836 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3837 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3838
3839 {NULL, NULL}
3840};
3841
3842static PyMappingMethods element_as_mapping = {
3843 (lenfunc) element_length,
3844 (binaryfunc) element_subscr,
3845 (objobjargproc) element_ass_subscr,
3846};
3847
Serhiy Storchakadde08152015-11-25 15:28:13 +02003848static PyGetSetDef element_getsetlist[] = {
3849 {"tag",
3850 (getter)element_tag_getter,
3851 (setter)element_tag_setter,
3852 "A string identifying what kind of data this element represents"},
3853 {"text",
3854 (getter)element_text_getter,
3855 (setter)element_text_setter,
3856 "A string of text directly after the start tag, or None"},
3857 {"tail",
3858 (getter)element_tail_getter,
3859 (setter)element_tail_setter,
3860 "A string of text directly after the end tag, or None"},
3861 {"attrib",
3862 (getter)element_attrib_getter,
3863 (setter)element_attrib_setter,
3864 "A dictionary containing the element's attributes"},
3865 {NULL},
3866};
3867
Serhiy Storchakacb985562015-05-04 15:32:48 +03003868static PyTypeObject Element_Type = {
3869 PyVarObject_HEAD_INIT(NULL, 0)
3870 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3871 /* methods */
3872 (destructor)element_dealloc, /* tp_dealloc */
3873 0, /* tp_print */
3874 0, /* tp_getattr */
3875 0, /* tp_setattr */
3876 0, /* tp_reserved */
3877 (reprfunc)element_repr, /* tp_repr */
3878 0, /* tp_as_number */
3879 &element_as_sequence, /* tp_as_sequence */
3880 &element_as_mapping, /* tp_as_mapping */
3881 0, /* tp_hash */
3882 0, /* tp_call */
3883 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003884 PyObject_GenericGetAttr, /* tp_getattro */
3885 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003886 0, /* tp_as_buffer */
3887 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3888 /* tp_flags */
3889 0, /* tp_doc */
3890 (traverseproc)element_gc_traverse, /* tp_traverse */
3891 (inquiry)element_gc_clear, /* tp_clear */
3892 0, /* tp_richcompare */
3893 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3894 0, /* tp_iter */
3895 0, /* tp_iternext */
3896 element_methods, /* tp_methods */
3897 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003898 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003899 0, /* tp_base */
3900 0, /* tp_dict */
3901 0, /* tp_descr_get */
3902 0, /* tp_descr_set */
3903 0, /* tp_dictoffset */
3904 (initproc)element_init, /* tp_init */
3905 PyType_GenericAlloc, /* tp_alloc */
3906 element_new, /* tp_new */
3907 0, /* tp_free */
3908};
3909
3910static PyMethodDef treebuilder_methods[] = {
3911 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3912 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3913 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3914 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3915 {NULL, NULL}
3916};
3917
3918static PyTypeObject TreeBuilder_Type = {
3919 PyVarObject_HEAD_INIT(NULL, 0)
3920 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3921 /* methods */
3922 (destructor)treebuilder_dealloc, /* tp_dealloc */
3923 0, /* tp_print */
3924 0, /* tp_getattr */
3925 0, /* tp_setattr */
3926 0, /* tp_reserved */
3927 0, /* tp_repr */
3928 0, /* tp_as_number */
3929 0, /* tp_as_sequence */
3930 0, /* tp_as_mapping */
3931 0, /* tp_hash */
3932 0, /* tp_call */
3933 0, /* tp_str */
3934 0, /* tp_getattro */
3935 0, /* tp_setattro */
3936 0, /* tp_as_buffer */
3937 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3938 /* tp_flags */
3939 0, /* tp_doc */
3940 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3941 (inquiry)treebuilder_gc_clear, /* tp_clear */
3942 0, /* tp_richcompare */
3943 0, /* tp_weaklistoffset */
3944 0, /* tp_iter */
3945 0, /* tp_iternext */
3946 treebuilder_methods, /* tp_methods */
3947 0, /* tp_members */
3948 0, /* tp_getset */
3949 0, /* tp_base */
3950 0, /* tp_dict */
3951 0, /* tp_descr_get */
3952 0, /* tp_descr_set */
3953 0, /* tp_dictoffset */
3954 _elementtree_TreeBuilder___init__, /* tp_init */
3955 PyType_GenericAlloc, /* tp_alloc */
3956 treebuilder_new, /* tp_new */
3957 0, /* tp_free */
3958};
3959
3960static PyMethodDef xmlparser_methods[] = {
3961 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3962 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3963 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3964 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3965 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3966 {NULL, NULL}
3967};
3968
Neal Norwitz227b5332006-03-22 09:28:35 +00003969static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003970 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003971 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 (destructor)xmlparser_dealloc, /* tp_dealloc */
3974 0, /* tp_print */
3975 0, /* tp_getattr */
3976 0, /* tp_setattr */
3977 0, /* tp_reserved */
3978 0, /* tp_repr */
3979 0, /* tp_as_number */
3980 0, /* tp_as_sequence */
3981 0, /* tp_as_mapping */
3982 0, /* tp_hash */
3983 0, /* tp_call */
3984 0, /* tp_str */
3985 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3986 0, /* tp_setattro */
3987 0, /* tp_as_buffer */
3988 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3989 /* tp_flags */
3990 0, /* tp_doc */
3991 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3992 (inquiry)xmlparser_gc_clear, /* tp_clear */
3993 0, /* tp_richcompare */
3994 0, /* tp_weaklistoffset */
3995 0, /* tp_iter */
3996 0, /* tp_iternext */
3997 xmlparser_methods, /* tp_methods */
3998 0, /* tp_members */
3999 0, /* tp_getset */
4000 0, /* tp_base */
4001 0, /* tp_dict */
4002 0, /* tp_descr_get */
4003 0, /* tp_descr_set */
4004 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004005 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004006 PyType_GenericAlloc, /* tp_alloc */
4007 xmlparser_new, /* tp_new */
4008 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004009};
4010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004011/* ==================================================================== */
4012/* python module interface */
4013
4014static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08004015 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004016 {NULL, NULL}
4017};
4018
Martin v. Löwis1a214512008-06-11 05:26:20 +00004019
Eli Bendersky532d03e2013-08-10 08:00:39 -07004020static struct PyModuleDef elementtreemodule = {
4021 PyModuleDef_HEAD_INIT,
4022 "_elementtree",
4023 NULL,
4024 sizeof(elementtreestate),
4025 _functions,
4026 NULL,
4027 elementtree_traverse,
4028 elementtree_clear,
4029 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004030};
4031
Neal Norwitzf6657e62006-12-28 04:47:50 +00004032PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004033PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004034{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004035 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004036 elementtreestate *st;
4037
4038 m = PyState_FindModule(&elementtreemodule);
4039 if (m) {
4040 Py_INCREF(m);
4041 return m;
4042 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004043
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004044 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004045 if (PyType_Ready(&ElementIter_Type) < 0)
4046 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004047 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004048 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004049 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004050 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004051 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004052 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004053
Eli Bendersky532d03e2013-08-10 08:00:39 -07004054 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004055 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004056 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004057 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004058
Eli Bendersky828efde2012-04-05 05:40:58 +03004059 if (!(temp = PyImport_ImportModule("copy")))
4060 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004061 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004062 Py_XDECREF(temp);
4063
Victor Stinnerb136f112017-07-10 22:28:02 +02004064 if (st->deepcopy_obj == NULL) {
4065 return NULL;
4066 }
4067
4068 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004069 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004070 return NULL;
4071
Eli Bendersky20d41742012-06-01 09:48:37 +03004072 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004073 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4074 if (expat_capi) {
4075 /* check that it's usable */
4076 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004077 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004078 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4079 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004080 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004081 PyErr_SetString(PyExc_ImportError,
4082 "pyexpat version is incompatible");
4083 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004084 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004085 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004086 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004087 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004088
Eli Bendersky532d03e2013-08-10 08:00:39 -07004089 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004090 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004091 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004092 Py_INCREF(st->parseerror_obj);
4093 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004094
Eli Bendersky092af1f2012-03-04 07:14:03 +02004095 Py_INCREF((PyObject *)&Element_Type);
4096 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4097
Eli Bendersky58d548d2012-05-29 15:45:16 +03004098 Py_INCREF((PyObject *)&TreeBuilder_Type);
4099 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4100
Eli Bendersky52467b12012-06-01 07:13:08 +03004101 Py_INCREF((PyObject *)&XMLParser_Type);
4102 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004103
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004104 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004105}