blob: b4c0f4c87a89cb75b96873f1eb31b9095fea2154 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000207
208/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200209/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210
211LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200212create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000213{
214 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200215 if (!self->extra) {
216 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200218 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219
220 if (!attrib)
221 attrib = Py_None;
222
223 Py_INCREF(attrib);
224 self->extra->attrib = attrib;
225
226 self->extra->length = 0;
227 self->extra->allocated = STATIC_CHILDREN;
228 self->extra->children = self->extra->_children;
229
230 return 0;
231}
232
233LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200234dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235{
Eli Bendersky08b85292012-04-04 15:55:07 +0300236 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200237 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300238
Eli Benderskyebf37a22012-04-03 22:02:37 +0300239 if (!self->extra)
240 return;
241
242 /* Avoid DECREFs calling into this code again (cycles, etc.)
243 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300244 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 self->extra = NULL;
246
247 Py_DECREF(myextra->attrib);
248
Eli Benderskyebf37a22012-04-03 22:02:37 +0300249 for (i = 0; i < myextra->length; i++)
250 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251
Eli Benderskyebf37a22012-04-03 22:02:37 +0300252 if (myextra->children != myextra->_children)
253 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254
Eli Benderskyebf37a22012-04-03 22:02:37 +0300255 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256}
257
Eli Bendersky092af1f2012-03-04 07:14:03 +0200258/* Convenience internal function to create new Element objects with the given
259 * tag and attributes.
260*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000261LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200262create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263{
264 ElementObject* self;
265
Eli Bendersky0192ba32012-03-30 16:38:33 +0300266 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000267 if (self == NULL)
268 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000269 self->extra = NULL;
270
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 Py_INCREF(tag);
272 self->tag = tag;
273
274 Py_INCREF(Py_None);
275 self->text = Py_None;
276
277 Py_INCREF(Py_None);
278 self->tail = Py_None;
279
Eli Benderskyebf37a22012-04-03 22:02:37 +0300280 self->weakreflist = NULL;
281
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 ALLOC(sizeof(ElementObject), "create element");
283 PyObject_GC_Track(self);
284
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200285 if (attrib != Py_None && !is_empty_dict(attrib)) {
286 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200287 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200288 return NULL;
289 }
290 }
291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292 return (PyObject*) self;
293}
294
Eli Bendersky092af1f2012-03-04 07:14:03 +0200295static PyObject *
296element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
297{
298 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
299 if (e != NULL) {
300 Py_INCREF(Py_None);
301 e->tag = Py_None;
302
303 Py_INCREF(Py_None);
304 e->text = Py_None;
305
306 Py_INCREF(Py_None);
307 e->tail = Py_None;
308
309 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300310 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200311 }
312 return (PyObject *)e;
313}
314
Eli Bendersky737b1732012-05-29 06:02:56 +0300315/* Helper function for extracting the attrib dictionary from a keywords dict.
316 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800317 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300318 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700319 *
320 * Return a dictionary with the content of kwds merged into the content of
321 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300322 */
323static PyObject*
324get_attrib_from_keywords(PyObject *kwds)
325{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700326 PyObject *attrib_str = PyUnicode_FromString("attrib");
327 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328
329 if (attrib) {
330 /* If attrib was found in kwds, copy its value and remove it from
331 * kwds
332 */
333 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700334 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
336 Py_TYPE(attrib)->tp_name);
337 return NULL;
338 }
339 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700340 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 } else {
342 attrib = PyDict_New();
343 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344
345 Py_DECREF(attrib_str);
346
347 /* attrib can be NULL if PyDict_New failed */
348 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200349 if (PyDict_Update(attrib, kwds) < 0)
350 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300351 return attrib;
352}
353
Serhiy Storchakacb985562015-05-04 15:32:48 +0300354/*[clinic input]
355module _elementtree
356class _elementtree.Element "ElementObject *" "&Element_Type"
357class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
358class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
359[clinic start generated code]*/
360/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
361
Eli Bendersky092af1f2012-03-04 07:14:03 +0200362static int
363element_init(PyObject *self, PyObject *args, PyObject *kwds)
364{
365 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200366 PyObject *attrib = NULL;
367 ElementObject *self_elem;
368
369 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
370 return -1;
371
Eli Bendersky737b1732012-05-29 06:02:56 +0300372 if (attrib) {
373 /* attrib passed as positional arg */
374 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200375 if (!attrib)
376 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300377 if (kwds) {
378 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200379 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300380 return -1;
381 }
382 }
383 } else if (kwds) {
384 /* have keywords args */
385 attrib = get_attrib_from_keywords(kwds);
386 if (!attrib)
387 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200388 }
389
390 self_elem = (ElementObject *)self;
391
Antoine Pitrouc1948842012-10-01 23:40:37 +0200392 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 return -1;
396 }
397 }
398
Eli Bendersky48d358b2012-05-30 17:57:50 +0300399 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200400 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300404 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300407 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408
Eli Bendersky092af1f2012-03-04 07:14:03 +0200409 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300410 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
Serhiy Storchaka576def02017-03-30 09:47:31 +0300511 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300516 PyObject *tmp = list_join(res);
517 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000518 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300519 self->text = tmp;
520 Py_DECREF(res);
521 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000522 }
523 }
524
525 return res;
526}
527
528LOCAL(PyObject*)
529element_get_tail(ElementObject* self)
530{
531 /* return borrowed reference to text attribute */
532
Serhiy Storchaka576def02017-03-30 09:47:31 +0300533 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534
535 if (JOIN_GET(res)) {
536 res = JOIN_OBJ(res);
537 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300538 PyObject *tmp = list_join(res);
539 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000540 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300541 self->tail = tmp;
542 Py_DECREF(res);
543 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544 }
545 }
546
547 return res;
548}
549
550static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300551subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000552{
553 PyObject* elem;
554
555 ElementObject* parent;
556 PyObject* tag;
557 PyObject* attrib = NULL;
558 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
559 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800562 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563
Eli Bendersky737b1732012-05-29 06:02:56 +0300564 if (attrib) {
565 /* attrib passed as positional arg */
566 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000567 if (!attrib)
568 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300569 if (kwds) {
570 if (PyDict_Update(attrib, kwds) < 0) {
571 return NULL;
572 }
573 }
574 } else if (kwds) {
575 /* have keyword args */
576 attrib = get_attrib_from_keywords(kwds);
577 if (!attrib)
578 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300580 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581 Py_INCREF(Py_None);
582 attrib = Py_None;
583 }
584
Eli Bendersky092af1f2012-03-04 07:14:03 +0200585 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200587 if (elem == NULL)
588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000590 if (element_add_subelement(parent, elem) < 0) {
591 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000593 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594
595 return elem;
596}
597
Eli Bendersky0192ba32012-03-30 16:38:33 +0300598static int
599element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
600{
601 Py_VISIT(self->tag);
602 Py_VISIT(JOIN_OBJ(self->text));
603 Py_VISIT(JOIN_OBJ(self->tail));
604
605 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200606 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300607 Py_VISIT(self->extra->attrib);
608
609 for (i = 0; i < self->extra->length; ++i)
610 Py_VISIT(self->extra->children[i]);
611 }
612 return 0;
613}
614
615static int
616element_gc_clear(ElementObject *self)
617{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300618 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700619 _clear_joined_ptr(&self->text);
620 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300621
622 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300625 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300626 return 0;
627}
628
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000629static void
630element_dealloc(ElementObject* self)
631{
INADA Naokia6296d32017-08-24 14:55:17 +0900632 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200634 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300635
636 if (self->weakreflist != NULL)
637 PyObject_ClearWeakRefs((PyObject *) self);
638
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 /* element_gc_clear clears all references and deallocates extra
640 */
641 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
643 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200644 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200645 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646}
647
648/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000649
Serhiy Storchakacb985562015-05-04 15:32:48 +0300650/*[clinic input]
651_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000652
Serhiy Storchakacb985562015-05-04 15:32:48 +0300653 subelement: object(subclass_of='&Element_Type')
654 /
655
656[clinic start generated code]*/
657
658static PyObject *
659_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
660/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
661{
662 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663 return NULL;
664
665 Py_RETURN_NONE;
666}
667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668/*[clinic input]
669_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
Serhiy Storchakacb985562015-05-04 15:32:48 +0300671[clinic start generated code]*/
672
673static PyObject *
674_elementtree_Element_clear_impl(ElementObject *self)
675/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
676{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300677 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678
679 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300680 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681
682 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300683 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
685 Py_RETURN_NONE;
686}
687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.__copy__
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element___copy___impl(ElementObject *self)
695/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200697 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698 ElementObject* element;
699
Eli Bendersky092af1f2012-03-04 07:14:03 +0200700 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800701 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 if (!element)
703 return NULL;
704
Oren Milman39ecb9c2017-10-10 23:26:24 +0300705 Py_INCREF(JOIN_OBJ(self->text));
706 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707
Oren Milman39ecb9c2017-10-10 23:26:24 +0300708 Py_INCREF(JOIN_OBJ(self->tail));
709 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000710
711 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 if (element_resize(element, self->extra->length) < 0) {
713 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000715 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 for (i = 0; i < self->extra->length; i++) {
718 Py_INCREF(self->extra->children[i]);
719 element->extra->children[i] = self->extra->children[i];
720 }
721
722 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 }
724
725 return (PyObject*) element;
726}
727
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200728/* Helper for a deep copy. */
729LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
730
Serhiy Storchakacb985562015-05-04 15:32:48 +0300731/*[clinic input]
732_elementtree.Element.__deepcopy__
733
Oren Milmand0568182017-09-12 17:39:15 +0300734 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300735 /
736
737[clinic start generated code]*/
738
739static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300740_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
741/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200743 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744 ElementObject* element;
745 PyObject* tag;
746 PyObject* attrib;
747 PyObject* text;
748 PyObject* tail;
749 PyObject* id;
750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000751 tag = deepcopy(self->tag, memo);
752 if (!tag)
753 return NULL;
754
755 if (self->extra) {
756 attrib = deepcopy(self->extra->attrib, memo);
757 if (!attrib) {
758 Py_DECREF(tag);
759 return NULL;
760 }
761 } else {
762 Py_INCREF(Py_None);
763 attrib = Py_None;
764 }
765
Eli Bendersky092af1f2012-03-04 07:14:03 +0200766 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000767
768 Py_DECREF(tag);
769 Py_DECREF(attrib);
770
771 if (!element)
772 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100773
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000774 text = deepcopy(JOIN_OBJ(self->text), memo);
775 if (!text)
776 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300777 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300782 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000783
784 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785 if (element_resize(element, self->extra->length) < 0)
786 goto error;
787
788 for (i = 0; i < self->extra->length; i++) {
789 PyObject* child = deepcopy(self->extra->children[i], memo);
790 if (!child) {
791 element->extra->length = i;
792 goto error;
793 }
794 element->extra->children[i] = child;
795 }
796
797 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798 }
799
800 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700801 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000802 if (!id)
803 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000804
805 i = PyDict_SetItem(memo, id, (PyObject*) element);
806
807 Py_DECREF(id);
808
809 if (i < 0)
810 goto error;
811
812 return (PyObject*) element;
813
814 error:
815 Py_DECREF(element);
816 return NULL;
817}
818
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200819LOCAL(PyObject *)
820deepcopy(PyObject *object, PyObject *memo)
821{
822 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200823 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200824 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200825
826 /* Fast paths */
827 if (object == Py_None || PyUnicode_CheckExact(object)) {
828 Py_INCREF(object);
829 return object;
830 }
831
832 if (Py_REFCNT(object) == 1) {
833 if (PyDict_CheckExact(object)) {
834 PyObject *key, *value;
835 Py_ssize_t pos = 0;
836 int simple = 1;
837 while (PyDict_Next(object, &pos, &key, &value)) {
838 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
839 simple = 0;
840 break;
841 }
842 }
843 if (simple)
844 return PyDict_Copy(object);
845 /* Fall through to general case */
846 }
847 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300848 return _elementtree_Element___deepcopy___impl(
849 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200850 }
851 }
852
853 /* General case */
854 st = ET_STATE_GLOBAL;
855 if (!st->deepcopy_obj) {
856 PyErr_SetString(PyExc_RuntimeError,
857 "deepcopy helper not found");
858 return NULL;
859 }
860
Victor Stinner7fbac452016-08-20 01:34:44 +0200861 stack[0] = object;
862 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200863 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200864}
865
866
Serhiy Storchakacb985562015-05-04 15:32:48 +0300867/*[clinic input]
868_elementtree.Element.__sizeof__ -> Py_ssize_t
869
870[clinic start generated code]*/
871
872static Py_ssize_t
873_elementtree_Element___sizeof___impl(ElementObject *self)
874/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200876 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877 if (self->extra) {
878 result += sizeof(ElementObjectExtra);
879 if (self->extra->children != self->extra->_children)
880 result += sizeof(PyObject*) * self->extra->allocated;
881 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300882 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200883}
884
Eli Bendersky698bdb22013-01-10 06:01:06 -0800885/* dict keys for getstate/setstate. */
886#define PICKLED_TAG "tag"
887#define PICKLED_CHILDREN "_children"
888#define PICKLED_ATTRIB "attrib"
889#define PICKLED_TAIL "tail"
890#define PICKLED_TEXT "text"
891
892/* __getstate__ returns a fabricated instance dict as in the pure-Python
893 * Element implementation, for interoperability/interchangeability. This
894 * makes the pure-Python implementation details an API, but (a) there aren't
895 * any unnecessary structures there; and (b) it buys compatibility with 3.2
896 * pickles. See issue #16076.
897 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300898/*[clinic input]
899_elementtree.Element.__getstate__
900
901[clinic start generated code]*/
902
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300904_elementtree_Element___getstate___impl(ElementObject *self)
905/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200907 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908 PyObject *instancedict = NULL, *children;
909
910 /* Build a list of children. */
911 children = PyList_New(self->extra ? self->extra->length : 0);
912 if (!children)
913 return NULL;
914 for (i = 0; i < PyList_GET_SIZE(children); i++) {
915 PyObject *child = self->extra->children[i];
916 Py_INCREF(child);
917 PyList_SET_ITEM(children, i, child);
918 }
919
920 /* Construct the state object. */
921 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
922 if (noattrib)
923 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
924 PICKLED_TAG, self->tag,
925 PICKLED_CHILDREN, children,
926 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700927 PICKLED_TEXT, JOIN_OBJ(self->text),
928 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800929 else
930 instancedict = Py_BuildValue("{sOsOsOsOsO}",
931 PICKLED_TAG, self->tag,
932 PICKLED_CHILDREN, children,
933 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700934 PICKLED_TEXT, JOIN_OBJ(self->text),
935 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800936 if (instancedict) {
937 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800939 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 else {
941 for (i = 0; i < PyList_GET_SIZE(children); i++)
942 Py_DECREF(PyList_GET_ITEM(children, i));
943 Py_DECREF(children);
944
945 return NULL;
946 }
947}
948
949static PyObject *
950element_setstate_from_attributes(ElementObject *self,
951 PyObject *tag,
952 PyObject *attrib,
953 PyObject *text,
954 PyObject *tail,
955 PyObject *children)
956{
957 Py_ssize_t i, nchildren;
958
959 if (!tag) {
960 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
961 return NULL;
962 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200964 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300965 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966
Oren Milman39ecb9c2017-10-10 23:26:24 +0300967 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
968 Py_INCREF(JOIN_OBJ(text));
969 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Oren Milman39ecb9c2017-10-10 23:26:24 +0300971 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
972 Py_INCREF(JOIN_OBJ(tail));
973 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
975 /* Handle ATTRIB and CHILDREN. */
976 if (!children && !attrib)
977 Py_RETURN_NONE;
978
979 /* Compute 'nchildren'. */
980 if (children) {
981 if (!PyList_Check(children)) {
982 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
983 return NULL;
984 }
985 nchildren = PyList_Size(children);
986 }
987 else {
988 nchildren = 0;
989 }
990
991 /* Allocate 'extra'. */
992 if (element_resize(self, nchildren)) {
993 return NULL;
994 }
995 assert(self->extra && self->extra->allocated >= nchildren);
996
997 /* Copy children */
998 for (i = 0; i < nchildren; i++) {
999 self->extra->children[i] = PyList_GET_ITEM(children, i);
1000 Py_INCREF(self->extra->children[i]);
1001 }
1002
1003 self->extra->length = nchildren;
1004 self->extra->allocated = nchildren;
1005
1006 /* Stash attrib. */
1007 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001009 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 }
1011
1012 Py_RETURN_NONE;
1013}
1014
1015/* __setstate__ for Element instance from the Python implementation.
1016 * 'state' should be the instance dict.
1017 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001018
Eli Bendersky698bdb22013-01-10 06:01:06 -08001019static PyObject *
1020element_setstate_from_Python(ElementObject *self, PyObject *state)
1021{
1022 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1023 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1024 PyObject *args;
1025 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001026 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028 tag = attrib = text = tail = children = NULL;
1029 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032
1033 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1034 &attrib, &text, &tail, &children))
1035 retval = element_setstate_from_attributes(self, tag, attrib, text,
1036 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001037 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001038 retval = NULL;
1039
1040 Py_DECREF(args);
1041 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042}
1043
Serhiy Storchakacb985562015-05-04 15:32:48 +03001044/*[clinic input]
1045_elementtree.Element.__setstate__
1046
1047 state: object
1048 /
1049
1050[clinic start generated code]*/
1051
Eli Bendersky698bdb22013-01-10 06:01:06 -08001052static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001053_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1054/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001055{
1056 if (!PyDict_CheckExact(state)) {
1057 PyErr_Format(PyExc_TypeError,
1058 "Don't know how to unpickle \"%.200R\" as an Element",
1059 state);
1060 return NULL;
1061 }
1062 else
1063 return element_setstate_from_Python(self, state);
1064}
1065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066LOCAL(int)
1067checkpath(PyObject* tag)
1068{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001069 Py_ssize_t i;
1070 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071
1072 /* check if a tag contains an xpath character */
1073
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001074#define PATHCHAR(ch) \
1075 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1079 void *data = PyUnicode_DATA(tag);
1080 unsigned int kind = PyUnicode_KIND(tag);
1081 for (i = 0; i < len; i++) {
1082 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1083 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return 1;
1089 }
1090 return 0;
1091 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001092 if (PyBytes_Check(tag)) {
1093 char *p = PyBytes_AS_STRING(tag);
1094 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001095 if (p[i] == '{')
1096 check = 0;
1097 else if (p[i] == '}')
1098 check = 1;
1099 else if (check && PATHCHAR(p[i]))
1100 return 1;
1101 }
1102 return 0;
1103 }
1104
1105 return 1; /* unknown type; might be path expression */
1106}
1107
Serhiy Storchakacb985562015-05-04 15:32:48 +03001108/*[clinic input]
1109_elementtree.Element.extend
1110
1111 elements: object
1112 /
1113
1114[clinic start generated code]*/
1115
1116static PyObject *
1117_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1118/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119{
1120 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001121 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122
Serhiy Storchakacb985562015-05-04 15:32:48 +03001123 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 if (!seq) {
1125 PyErr_Format(
1126 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 );
1129 return NULL;
1130 }
1131
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001133 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 Py_INCREF(element);
1135 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001136 PyErr_Format(
1137 PyExc_TypeError,
1138 "expected an Element, not \"%.200s\"",
1139 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001140 Py_DECREF(seq);
1141 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001142 return NULL;
1143 }
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145 if (element_add_subelement(self, element) < 0) {
1146 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001147 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 return NULL;
1149 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001150 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 }
1152
1153 Py_DECREF(seq);
1154
1155 Py_RETURN_NONE;
1156}
1157
Serhiy Storchakacb985562015-05-04 15:32:48 +03001158/*[clinic input]
1159_elementtree.Element.find
1160
1161 path: object
1162 namespaces: object = None
1163
1164[clinic start generated code]*/
1165
1166static PyObject *
1167_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1168 PyObject *namespaces)
1169/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001171 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001172 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001173
Serhiy Storchakacb985562015-05-04 15:32:48 +03001174 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001176 return _PyObject_CallMethodIdObjArgs(
1177 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001179 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180
1181 if (!self->extra)
1182 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184 for (i = 0; i < self->extra->length; i++) {
1185 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001186 int rc;
1187 if (!Element_CheckExact(item))
1188 continue;
1189 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001190 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 Py_DECREF(item);
1194 if (rc < 0)
1195 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 }
1197
1198 Py_RETURN_NONE;
1199}
1200
Serhiy Storchakacb985562015-05-04 15:32:48 +03001201/*[clinic input]
1202_elementtree.Element.findtext
1203
1204 path: object
1205 default: object = None
1206 namespaces: object = None
1207
1208[clinic start generated code]*/
1209
1210static PyObject *
1211_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1212 PyObject *default_value,
1213 PyObject *namespaces)
1214/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001216 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001217 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001218 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219
Serhiy Storchakacb985562015-05-04 15:32:48 +03001220 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001221 return _PyObject_CallMethodIdObjArgs(
1222 st->elementpath_obj, &PyId_findtext,
1223 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 );
1225
1226 if (!self->extra) {
1227 Py_INCREF(default_value);
1228 return default_value;
1229 }
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001233 int rc;
1234 if (!Element_CheckExact(item))
1235 continue;
1236 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001237 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001238 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (text == Py_None) {
1241 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001242 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001244 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 return text;
1247 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 Py_DECREF(item);
1249 if (rc < 0)
1250 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251 }
1252
1253 Py_INCREF(default_value);
1254 return default_value;
1255}
1256
Serhiy Storchakacb985562015-05-04 15:32:48 +03001257/*[clinic input]
1258_elementtree.Element.findall
1259
1260 path: object
1261 namespaces: object = None
1262
1263[clinic start generated code]*/
1264
1265static PyObject *
1266_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1267 PyObject *namespaces)
1268/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001270 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001272 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001273 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001274
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001276 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001277 return _PyObject_CallMethodIdObjArgs(
1278 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281
1282 out = PyList_New(0);
1283 if (!out)
1284 return NULL;
1285
1286 if (!self->extra)
1287 return out;
1288
1289 for (i = 0; i < self->extra->length; i++) {
1290 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 int rc;
1292 if (!Element_CheckExact(item))
1293 continue;
1294 Py_INCREF(item);
1295 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1296 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1297 Py_DECREF(item);
1298 Py_DECREF(out);
1299 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001301 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 }
1303
1304 return out;
1305}
1306
Serhiy Storchakacb985562015-05-04 15:32:48 +03001307/*[clinic input]
1308_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310 path: object
1311 namespaces: object = None
1312
1313[clinic start generated code]*/
1314
1315static PyObject *
1316_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1317 PyObject *namespaces)
1318/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1319{
1320 PyObject* tag = path;
1321 _Py_IDENTIFIER(iterfind);
1322 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
Victor Stinnerf5616342016-12-09 15:26:00 +01001324 return _PyObject_CallMethodIdObjArgs(
1325 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326}
1327
Serhiy Storchakacb985562015-05-04 15:32:48 +03001328/*[clinic input]
1329_elementtree.Element.get
1330
1331 key: object
1332 default: object = None
1333
1334[clinic start generated code]*/
1335
1336static PyObject *
1337_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1338 PyObject *default_value)
1339/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340{
1341 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342
1343 if (!self->extra || self->extra->attrib == Py_None)
1344 value = default_value;
1345 else {
1346 value = PyDict_GetItem(self->extra->attrib, key);
1347 if (!value)
1348 value = default_value;
1349 }
1350
1351 Py_INCREF(value);
1352 return value;
1353}
1354
Serhiy Storchakacb985562015-05-04 15:32:48 +03001355/*[clinic input]
1356_elementtree.Element.getchildren
1357
1358[clinic start generated code]*/
1359
1360static PyObject *
1361_elementtree_Element_getchildren_impl(ElementObject *self)
1362/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001364 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365 PyObject* list;
1366
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001367 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1368 "This method will be removed in future versions. "
1369 "Use 'list(elem)' or iteration over elem instead.",
1370 1) < 0) {
1371 return NULL;
1372 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001373
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001374 if (!self->extra)
1375 return PyList_New(0);
1376
1377 list = PyList_New(self->extra->length);
1378 if (!list)
1379 return NULL;
1380
1381 for (i = 0; i < self->extra->length; i++) {
1382 PyObject* item = self->extra->children[i];
1383 Py_INCREF(item);
1384 PyList_SET_ITEM(list, i, item);
1385 }
1386
1387 return list;
1388}
1389
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001390
Eli Bendersky64d11e62012-06-15 07:42:50 +03001391static PyObject *
1392create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1393
1394
Serhiy Storchakacb985562015-05-04 15:32:48 +03001395/*[clinic input]
1396_elementtree.Element.iter
1397
1398 tag: object = None
1399
1400[clinic start generated code]*/
1401
Eli Bendersky64d11e62012-06-15 07:42:50 +03001402static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001403_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1404/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001405{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001406 if (PyUnicode_Check(tag)) {
1407 if (PyUnicode_READY(tag) < 0)
1408 return NULL;
1409 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1410 tag = Py_None;
1411 }
1412 else if (PyBytes_Check(tag)) {
1413 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1414 tag = Py_None;
1415 }
1416
Eli Bendersky64d11e62012-06-15 07:42:50 +03001417 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001418}
1419
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001422_elementtree.Element.getiterator
1423
1424 tag: object = None
1425
1426[clinic start generated code]*/
1427
1428static PyObject *
1429_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1430/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1431{
1432 /* Change for a DeprecationWarning in 1.4 */
1433 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1434 "This method will be removed in future versions. "
1435 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1436 1) < 0) {
1437 return NULL;
1438 }
1439 return _elementtree_Element_iter_impl(self, tag);
1440}
1441
1442
1443/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001444_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001445
Serhiy Storchakacb985562015-05-04 15:32:48 +03001446[clinic start generated code]*/
1447
1448static PyObject *
1449_elementtree_Element_itertext_impl(ElementObject *self)
1450/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1451{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453}
1454
Eli Bendersky64d11e62012-06-15 07:42:50 +03001455
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001457element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001459 ElementObject* self = (ElementObject*) self_;
1460
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001461 if (!self->extra || index < 0 || index >= self->extra->length) {
1462 PyErr_SetString(
1463 PyExc_IndexError,
1464 "child index out of range"
1465 );
1466 return NULL;
1467 }
1468
1469 Py_INCREF(self->extra->children[index]);
1470 return self->extra->children[index];
1471}
1472
Serhiy Storchakacb985562015-05-04 15:32:48 +03001473/*[clinic input]
1474_elementtree.Element.insert
1475
1476 index: Py_ssize_t
1477 subelement: object(subclass_of='&Element_Type')
1478 /
1479
1480[clinic start generated code]*/
1481
1482static PyObject *
1483_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1484 PyObject *subelement)
1485/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001487 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488
Victor Stinner5f0af232013-07-11 23:01:36 +02001489 if (!self->extra) {
1490 if (create_extra(self, NULL) < 0)
1491 return NULL;
1492 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 if (index < 0) {
1495 index += self->extra->length;
1496 if (index < 0)
1497 index = 0;
1498 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 if (index > self->extra->length)
1500 index = self->extra->length;
1501
1502 if (element_resize(self, 1) < 0)
1503 return NULL;
1504
1505 for (i = self->extra->length; i > index; i--)
1506 self->extra->children[i] = self->extra->children[i-1];
1507
Serhiy Storchakacb985562015-05-04 15:32:48 +03001508 Py_INCREF(subelement);
1509 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510
1511 self->extra->length++;
1512
1513 Py_RETURN_NONE;
1514}
1515
Serhiy Storchakacb985562015-05-04 15:32:48 +03001516/*[clinic input]
1517_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_items_impl(ElementObject *self)
1523/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1524{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525 if (!self->extra || self->extra->attrib == Py_None)
1526 return PyList_New(0);
1527
1528 return PyDict_Items(self->extra->attrib);
1529}
1530
Serhiy Storchakacb985562015-05-04 15:32:48 +03001531/*[clinic input]
1532_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001533
Serhiy Storchakacb985562015-05-04 15:32:48 +03001534[clinic start generated code]*/
1535
1536static PyObject *
1537_elementtree_Element_keys_impl(ElementObject *self)
1538/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1539{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 if (!self->extra || self->extra->attrib == Py_None)
1541 return PyList_New(0);
1542
1543 return PyDict_Keys(self->extra->attrib);
1544}
1545
Martin v. Löwis18e16552006-02-15 17:27:45 +00001546static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547element_length(ElementObject* self)
1548{
1549 if (!self->extra)
1550 return 0;
1551
1552 return self->extra->length;
1553}
1554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555/*[clinic input]
1556_elementtree.Element.makeelement
1557
1558 tag: object
1559 attrib: object
1560 /
1561
1562[clinic start generated code]*/
1563
1564static PyObject *
1565_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1566 PyObject *attrib)
1567/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568{
1569 PyObject* elem;
1570
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571 attrib = PyDict_Copy(attrib);
1572 if (!attrib)
1573 return NULL;
1574
Eli Bendersky092af1f2012-03-04 07:14:03 +02001575 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001576
1577 Py_DECREF(attrib);
1578
1579 return elem;
1580}
1581
Serhiy Storchakacb985562015-05-04 15:32:48 +03001582/*[clinic input]
1583_elementtree.Element.remove
1584
1585 subelement: object(subclass_of='&Element_Type')
1586 /
1587
1588[clinic start generated code]*/
1589
1590static PyObject *
1591_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1592/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001594 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001595 int rc;
1596 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 if (!self->extra) {
1599 /* element has no children, so raise exception */
1600 PyErr_SetString(
1601 PyExc_ValueError,
1602 "list.remove(x): x not in list"
1603 );
1604 return NULL;
1605 }
1606
1607 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001608 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001610 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001611 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001613 if (rc < 0)
1614 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 }
1616
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001617 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001618 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619 PyErr_SetString(
1620 PyExc_ValueError,
1621 "list.remove(x): x not in list"
1622 );
1623 return NULL;
1624 }
1625
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001626 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627
1628 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 for (; i < self->extra->length; i++)
1630 self->extra->children[i] = self->extra->children[i+1];
1631
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001632 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633 Py_RETURN_NONE;
1634}
1635
1636static PyObject*
1637element_repr(ElementObject* self)
1638{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001639 int status;
1640
1641 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001642 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001643
1644 status = Py_ReprEnter((PyObject *)self);
1645 if (status == 0) {
1646 PyObject *res;
1647 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1648 Py_ReprLeave((PyObject *)self);
1649 return res;
1650 }
1651 if (status > 0)
1652 PyErr_Format(PyExc_RuntimeError,
1653 "reentrant call inside %s.__repr__",
1654 Py_TYPE(self)->tp_name);
1655 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001656}
1657
Serhiy Storchakacb985562015-05-04 15:32:48 +03001658/*[clinic input]
1659_elementtree.Element.set
1660
1661 key: object
1662 value: object
1663 /
1664
1665[clinic start generated code]*/
1666
1667static PyObject *
1668_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1669 PyObject *value)
1670/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671{
1672 PyObject* attrib;
1673
Victor Stinner5f0af232013-07-11 23:01:36 +02001674 if (!self->extra) {
1675 if (create_extra(self, NULL) < 0)
1676 return NULL;
1677 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678
1679 attrib = element_get_attrib(self);
1680 if (!attrib)
1681 return NULL;
1682
1683 if (PyDict_SetItem(attrib, key, value) < 0)
1684 return NULL;
1685
1686 Py_RETURN_NONE;
1687}
1688
1689static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001690element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001691{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001692 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001693 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694 PyObject* old;
1695
1696 if (!self->extra || index < 0 || index >= self->extra->length) {
1697 PyErr_SetString(
1698 PyExc_IndexError,
1699 "child assignment index out of range");
1700 return -1;
1701 }
1702
1703 old = self->extra->children[index];
1704
1705 if (item) {
1706 Py_INCREF(item);
1707 self->extra->children[index] = item;
1708 } else {
1709 self->extra->length--;
1710 for (i = index; i < self->extra->length; i++)
1711 self->extra->children[i] = self->extra->children[i+1];
1712 }
1713
1714 Py_DECREF(old);
1715
1716 return 0;
1717}
1718
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719static PyObject*
1720element_subscr(PyObject* self_, PyObject* item)
1721{
1722 ElementObject* self = (ElementObject*) self_;
1723
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724 if (PyIndex_Check(item)) {
1725 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
1727 if (i == -1 && PyErr_Occurred()) {
1728 return NULL;
1729 }
1730 if (i < 0 && self->extra)
1731 i += self->extra->length;
1732 return element_getitem(self_, i);
1733 }
1734 else if (PySlice_Check(item)) {
1735 Py_ssize_t start, stop, step, slicelen, cur, i;
1736 PyObject* list;
1737
1738 if (!self->extra)
1739 return PyList_New(0);
1740
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001741 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 return NULL;
1743 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001744 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1745 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746
1747 if (slicelen <= 0)
1748 return PyList_New(0);
1749 else {
1750 list = PyList_New(slicelen);
1751 if (!list)
1752 return NULL;
1753
1754 for (cur = start, i = 0; i < slicelen;
1755 cur += step, i++) {
1756 PyObject* item = self->extra->children[cur];
1757 Py_INCREF(item);
1758 PyList_SET_ITEM(list, i, item);
1759 }
1760
1761 return list;
1762 }
1763 }
1764 else {
1765 PyErr_SetString(PyExc_TypeError,
1766 "element indices must be integers");
1767 return NULL;
1768 }
1769}
1770
1771static int
1772element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1773{
1774 ElementObject* self = (ElementObject*) self_;
1775
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776 if (PyIndex_Check(item)) {
1777 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778
1779 if (i == -1 && PyErr_Occurred()) {
1780 return -1;
1781 }
1782 if (i < 0 && self->extra)
1783 i += self->extra->length;
1784 return element_setitem(self_, i, value);
1785 }
1786 else if (PySlice_Check(item)) {
1787 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1788
1789 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001790 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001791
Victor Stinner5f0af232013-07-11 23:01:36 +02001792 if (!self->extra) {
1793 if (create_extra(self, NULL) < 0)
1794 return -1;
1795 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001797 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001798 return -1;
1799 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001800 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1801 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802
Eli Bendersky865756a2012-03-09 13:38:15 +02001803 if (value == NULL) {
1804 /* Delete slice */
1805 size_t cur;
1806 Py_ssize_t i;
1807
1808 if (slicelen <= 0)
1809 return 0;
1810
1811 /* Since we're deleting, the direction of the range doesn't matter,
1812 * so for simplicity make it always ascending.
1813 */
1814 if (step < 0) {
1815 stop = start + 1;
1816 start = stop + step * (slicelen - 1) - 1;
1817 step = -step;
1818 }
1819
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001820 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001821
1822 /* recycle is a list that will contain all the children
1823 * scheduled for removal.
1824 */
1825 if (!(recycle = PyList_New(slicelen))) {
1826 PyErr_NoMemory();
1827 return -1;
1828 }
1829
1830 /* This loop walks over all the children that have to be deleted,
1831 * with cur pointing at them. num_moved is the amount of children
1832 * until the next deleted child that have to be "shifted down" to
1833 * occupy the deleted's places.
1834 * Note that in the ith iteration, shifting is done i+i places down
1835 * because i children were already removed.
1836 */
1837 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1838 /* Compute how many children have to be moved, clipping at the
1839 * list end.
1840 */
1841 Py_ssize_t num_moved = step - 1;
1842 if (cur + step >= (size_t)self->extra->length) {
1843 num_moved = self->extra->length - cur - 1;
1844 }
1845
1846 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1847
1848 memmove(
1849 self->extra->children + cur - i,
1850 self->extra->children + cur + 1,
1851 num_moved * sizeof(PyObject *));
1852 }
1853
1854 /* Leftover "tail" after the last removed child */
1855 cur = start + (size_t)slicelen * step;
1856 if (cur < (size_t)self->extra->length) {
1857 memmove(
1858 self->extra->children + cur - slicelen,
1859 self->extra->children + cur,
1860 (self->extra->length - cur) * sizeof(PyObject *));
1861 }
1862
1863 self->extra->length -= slicelen;
1864
1865 /* Discard the recycle list with all the deleted sub-elements */
1866 Py_XDECREF(recycle);
1867 return 0;
1868 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869
1870 /* A new slice is actually being assigned */
1871 seq = PySequence_Fast(value, "");
1872 if (!seq) {
1873 PyErr_Format(
1874 PyExc_TypeError,
1875 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1876 );
1877 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001879 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880
1881 if (step != 1 && newlen != slicelen)
1882 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001883 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001885 "attempt to assign sequence of size %zd "
1886 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 newlen, slicelen
1888 );
1889 return -1;
1890 }
1891
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001892 /* Resize before creating the recycle bin, to prevent refleaks. */
1893 if (newlen > slicelen) {
1894 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001895 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896 return -1;
1897 }
1898 }
1899
1900 if (slicelen > 0) {
1901 /* to avoid recursive calls to this method (via decref), move
1902 old items to the recycle bin here, and get rid of them when
1903 we're done modifying the element */
1904 recycle = PyList_New(slicelen);
1905 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001906 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001907 return -1;
1908 }
1909 for (cur = start, i = 0; i < slicelen;
1910 cur += step, i++)
1911 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1912 }
1913
1914 if (newlen < slicelen) {
1915 /* delete slice */
1916 for (i = stop; i < self->extra->length; i++)
1917 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1918 } else if (newlen > slicelen) {
1919 /* insert slice */
1920 for (i = self->extra->length-1; i >= stop; i--)
1921 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1922 }
1923
1924 /* replace the slice */
1925 for (cur = start, i = 0; i < newlen;
1926 cur += step, i++) {
1927 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1928 Py_INCREF(element);
1929 self->extra->children[cur] = element;
1930 }
1931
1932 self->extra->length += newlen - slicelen;
1933
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001934 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001935
1936 /* discard the recycle bin, and everything in it */
1937 Py_XDECREF(recycle);
1938
1939 return 0;
1940 }
1941 else {
1942 PyErr_SetString(PyExc_TypeError,
1943 "element indices must be integers");
1944 return -1;
1945 }
1946}
1947
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001948static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001949element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001950{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001951 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001952 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001953 return res;
1954}
1955
Serhiy Storchakadde08152015-11-25 15:28:13 +02001956static PyObject*
1957element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001958{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001959 PyObject *res = element_get_text(self);
1960 Py_XINCREF(res);
1961 return res;
1962}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001963
Serhiy Storchakadde08152015-11-25 15:28:13 +02001964static PyObject*
1965element_tail_getter(ElementObject *self, void *closure)
1966{
1967 PyObject *res = element_get_tail(self);
1968 Py_XINCREF(res);
1969 return res;
1970}
1971
1972static PyObject*
1973element_attrib_getter(ElementObject *self, void *closure)
1974{
1975 PyObject *res;
1976 if (!self->extra) {
1977 if (create_extra(self, NULL) < 0)
1978 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001979 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001980 res = element_get_attrib(self);
1981 Py_XINCREF(res);
1982 return res;
1983}
Victor Stinner4d463432013-07-11 23:05:03 +02001984
Serhiy Storchakadde08152015-11-25 15:28:13 +02001985/* macro for setter validation */
1986#define _VALIDATE_ATTR_VALUE(V) \
1987 if ((V) == NULL) { \
1988 PyErr_SetString( \
1989 PyExc_AttributeError, \
1990 "can't delete element attribute"); \
1991 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001992 }
1993
Serhiy Storchakadde08152015-11-25 15:28:13 +02001994static int
1995element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1996{
1997 _VALIDATE_ATTR_VALUE(value);
1998 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001999 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002000 return 0;
2001}
2002
2003static int
2004element_text_setter(ElementObject *self, PyObject *value, void *closure)
2005{
2006 _VALIDATE_ATTR_VALUE(value);
2007 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002008 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002009 return 0;
2010}
2011
2012static int
2013element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2014{
2015 _VALIDATE_ATTR_VALUE(value);
2016 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002017 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002018 return 0;
2019}
2020
2021static int
2022element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2023{
2024 _VALIDATE_ATTR_VALUE(value);
2025 if (!self->extra) {
2026 if (create_extra(self, NULL) < 0)
2027 return -1;
2028 }
2029 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002030 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002031 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002032}
2033
2034static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002035 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036 0, /* sq_concat */
2037 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002038 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002039 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002040 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002041 0,
2042};
2043
Eli Bendersky64d11e62012-06-15 07:42:50 +03002044/******************************* Element iterator ****************************/
2045
2046/* ElementIterObject represents the iteration state over an XML element in
2047 * pre-order traversal. To keep track of which sub-element should be returned
2048 * next, a stack of parents is maintained. This is a standard stack-based
2049 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002050 * The stack is managed using a continuous array.
2051 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002052 * the current one is exhausted, and the next child to examine in that parent.
2053 */
2054typedef struct ParentLocator_t {
2055 ElementObject *parent;
2056 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002057} ParentLocator;
2058
2059typedef struct {
2060 PyObject_HEAD
2061 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002062 Py_ssize_t parent_stack_used;
2063 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002064 ElementObject *root_element;
2065 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002066 int gettext;
2067} ElementIterObject;
2068
2069
2070static void
2071elementiter_dealloc(ElementIterObject *it)
2072{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002073 Py_ssize_t i = it->parent_stack_used;
2074 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002075 /* bpo-31095: UnTrack is needed before calling any callbacks */
2076 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077 while (i--)
2078 Py_XDECREF(it->parent_stack[i].parent);
2079 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002080
2081 Py_XDECREF(it->sought_tag);
2082 Py_XDECREF(it->root_element);
2083
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084 PyObject_GC_Del(it);
2085}
2086
2087static int
2088elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2089{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002090 Py_ssize_t i = it->parent_stack_used;
2091 while (i--)
2092 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002093
2094 Py_VISIT(it->root_element);
2095 Py_VISIT(it->sought_tag);
2096 return 0;
2097}
2098
2099/* Helper function for elementiter_next. Add a new parent to the parent stack.
2100 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002101static int
2102parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002104 ParentLocator *item;
2105
2106 if (it->parent_stack_used >= it->parent_stack_size) {
2107 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2108 ParentLocator *parent_stack = it->parent_stack;
2109 PyMem_Resize(parent_stack, ParentLocator, new_size);
2110 if (parent_stack == NULL)
2111 return -1;
2112 it->parent_stack = parent_stack;
2113 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002115 item = it->parent_stack + it->parent_stack_used++;
2116 Py_INCREF(parent);
2117 item->parent = parent;
2118 item->child_index = 0;
2119 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120}
2121
2122static PyObject *
2123elementiter_next(ElementIterObject *it)
2124{
2125 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002126 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002127 * A short note on gettext: this function serves both the iter() and
2128 * itertext() methods to avoid code duplication. However, there are a few
2129 * small differences in the way these iterations work. Namely:
2130 * - itertext() only yields text from nodes that have it, and continues
2131 * iterating when a node doesn't have text (so it doesn't return any
2132 * node like iter())
2133 * - itertext() also has to handle tail, after finishing with all the
2134 * children of a node.
2135 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002136 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002137 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002138 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002139
2140 while (1) {
2141 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002142 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002143 * iterator is exhausted.
2144 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002145 if (!it->parent_stack_used) {
2146 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002147 PyErr_SetNone(PyExc_StopIteration);
2148 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 }
2150
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151 elem = it->root_element; /* steals a reference */
2152 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002153 }
2154 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002155 /* See if there are children left to traverse in the current parent. If
2156 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002157 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002158 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2159 Py_ssize_t child_index = item->child_index;
2160 ElementObjectExtra *extra;
2161 elem = item->parent;
2162 extra = elem->extra;
2163 if (!extra || child_index >= extra->length) {
2164 it->parent_stack_used--;
2165 /* Note that extra condition on it->parent_stack_used here;
2166 * this is because itertext() is supposed to only return *inner*
2167 * text, not text following the element it began iteration with.
2168 */
2169 if (it->gettext && it->parent_stack_used) {
2170 text = element_get_tail(elem);
2171 goto gettext;
2172 }
2173 Py_DECREF(elem);
2174 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002175 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176
Serhiy Storchaka576def02017-03-30 09:47:31 +03002177 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2178 PyErr_Format(PyExc_AttributeError,
2179 "'%.100s' object has no attribute 'iter'",
2180 Py_TYPE(extra->children[child_index])->tp_name);
2181 return NULL;
2182 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002183 elem = (ElementObject *)extra->children[child_index];
2184 item->child_index++;
2185 Py_INCREF(elem);
2186 }
2187
2188 if (parent_stack_push_new(it, elem) < 0) {
2189 Py_DECREF(elem);
2190 PyErr_NoMemory();
2191 return NULL;
2192 }
2193 if (it->gettext) {
2194 text = element_get_text(elem);
2195 goto gettext;
2196 }
2197
2198 if (it->sought_tag == Py_None)
2199 return (PyObject *)elem;
2200
2201 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2202 if (rc > 0)
2203 return (PyObject *)elem;
2204
2205 Py_DECREF(elem);
2206 if (rc < 0)
2207 return NULL;
2208 continue;
2209
2210gettext:
2211 if (!text) {
2212 Py_DECREF(elem);
2213 return NULL;
2214 }
2215 if (text == Py_None) {
2216 Py_DECREF(elem);
2217 }
2218 else {
2219 Py_INCREF(text);
2220 Py_DECREF(elem);
2221 rc = PyObject_IsTrue(text);
2222 if (rc > 0)
2223 return text;
2224 Py_DECREF(text);
2225 if (rc < 0)
2226 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002227 }
2228 }
2229
2230 return NULL;
2231}
2232
2233
2234static PyTypeObject ElementIter_Type = {
2235 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002236 /* Using the module's name since the pure-Python implementation does not
2237 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002238 "_elementtree._element_iterator", /* tp_name */
2239 sizeof(ElementIterObject), /* tp_basicsize */
2240 0, /* tp_itemsize */
2241 /* methods */
2242 (destructor)elementiter_dealloc, /* tp_dealloc */
2243 0, /* tp_print */
2244 0, /* tp_getattr */
2245 0, /* tp_setattr */
2246 0, /* tp_reserved */
2247 0, /* tp_repr */
2248 0, /* tp_as_number */
2249 0, /* tp_as_sequence */
2250 0, /* tp_as_mapping */
2251 0, /* tp_hash */
2252 0, /* tp_call */
2253 0, /* tp_str */
2254 0, /* tp_getattro */
2255 0, /* tp_setattro */
2256 0, /* tp_as_buffer */
2257 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2258 0, /* tp_doc */
2259 (traverseproc)elementiter_traverse, /* tp_traverse */
2260 0, /* tp_clear */
2261 0, /* tp_richcompare */
2262 0, /* tp_weaklistoffset */
2263 PyObject_SelfIter, /* tp_iter */
2264 (iternextfunc)elementiter_next, /* tp_iternext */
2265 0, /* tp_methods */
2266 0, /* tp_members */
2267 0, /* tp_getset */
2268 0, /* tp_base */
2269 0, /* tp_dict */
2270 0, /* tp_descr_get */
2271 0, /* tp_descr_set */
2272 0, /* tp_dictoffset */
2273 0, /* tp_init */
2274 0, /* tp_alloc */
2275 0, /* tp_new */
2276};
2277
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002278#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002279
2280static PyObject *
2281create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2282{
2283 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002284
2285 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2286 if (!it)
2287 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002288
Victor Stinner4d463432013-07-11 23:05:03 +02002289 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002290 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002291 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002292 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002293 it->root_element = self;
2294
Eli Bendersky64d11e62012-06-15 07:42:50 +03002295 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002296
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002297 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002298 if (it->parent_stack == NULL) {
2299 Py_DECREF(it);
2300 PyErr_NoMemory();
2301 return NULL;
2302 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002303 it->parent_stack_used = 0;
2304 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002305
Eli Bendersky64d11e62012-06-15 07:42:50 +03002306 return (PyObject *)it;
2307}
2308
2309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310/* ==================================================================== */
2311/* the tree builder type */
2312
2313typedef struct {
2314 PyObject_HEAD
2315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002317
Antoine Pitrouee329312012-10-04 19:53:29 +02002318 PyObject *this; /* current node */
2319 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 PyObject *stack; /* element stack */
2324 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky48d358b2012-05-30 17:57:50 +03002326 PyObject *element_factory;
2327
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002329 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002330 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2331 PyObject *end_event_obj;
2332 PyObject *start_ns_event_obj;
2333 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334} TreeBuilderObject;
2335
Christian Heimes90aa7642007-12-19 02:45:37 +00002336#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337
2338/* -------------------------------------------------------------------- */
2339/* constructor and destructor */
2340
Eli Bendersky58d548d2012-05-29 15:45:16 +03002341static PyObject *
2342treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2345 if (t != NULL) {
2346 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347
Eli Bendersky58d548d2012-05-29 15:45:16 +03002348 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002349 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002351 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002354 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 t->stack = PyList_New(20);
2356 if (!t->stack) {
2357 Py_DECREF(t->this);
2358 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002359 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002360 return NULL;
2361 }
2362 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002364 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002365 t->start_event_obj = t->end_event_obj = NULL;
2366 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2367 }
2368 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369}
2370
Serhiy Storchakacb985562015-05-04 15:32:48 +03002371/*[clinic input]
2372_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002373
Serhiy Storchakacb985562015-05-04 15:32:48 +03002374 element_factory: object = NULL
2375
2376[clinic start generated code]*/
2377
2378static int
2379_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2380 PyObject *element_factory)
2381/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2382{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002383 if (element_factory) {
2384 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002385 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386 }
2387
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389}
2390
Eli Bendersky48d358b2012-05-30 17:57:50 +03002391static int
2392treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2393{
2394 Py_VISIT(self->root);
2395 Py_VISIT(self->this);
2396 Py_VISIT(self->last);
2397 Py_VISIT(self->data);
2398 Py_VISIT(self->stack);
2399 Py_VISIT(self->element_factory);
2400 return 0;
2401}
2402
2403static int
2404treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002406 Py_CLEAR(self->end_ns_event_obj);
2407 Py_CLEAR(self->start_ns_event_obj);
2408 Py_CLEAR(self->end_event_obj);
2409 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002410 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002411 Py_CLEAR(self->stack);
2412 Py_CLEAR(self->data);
2413 Py_CLEAR(self->last);
2414 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002415 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002416 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002417 return 0;
2418}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420static void
2421treebuilder_dealloc(TreeBuilderObject *self)
2422{
2423 PyObject_GC_UnTrack(self);
2424 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002425 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426}
2427
2428/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002429/* helpers for handling of arbitrary element-like objects */
2430
2431static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002432treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002433 PyObject **dest, _Py_Identifier *name)
2434{
2435 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002436 PyObject *tmp = JOIN_OBJ(*dest);
2437 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2438 *data = NULL;
2439 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002440 return 0;
2441 }
2442 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002443 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002444 int r;
2445 if (joined == NULL)
2446 return -1;
2447 r = _PyObject_SetAttrId(element, name, joined);
2448 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002449 if (r < 0)
2450 return -1;
2451 Py_CLEAR(*data);
2452 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002453 }
2454}
2455
Serhiy Storchaka576def02017-03-30 09:47:31 +03002456LOCAL(int)
2457treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002458{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002459 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002460
Serhiy Storchaka576def02017-03-30 09:47:31 +03002461 if (!self->data) {
2462 return 0;
2463 }
2464
2465 if (self->this == element) {
2466 _Py_IDENTIFIER(text);
2467 return treebuilder_set_element_text_or_tail(
2468 element, &self->data,
2469 &((ElementObject *) element)->text, &PyId_text);
2470 }
2471 else {
2472 _Py_IDENTIFIER(tail);
2473 return treebuilder_set_element_text_or_tail(
2474 element, &self->data,
2475 &((ElementObject *) element)->tail, &PyId_tail);
2476 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002477}
2478
2479static int
2480treebuilder_add_subelement(PyObject *element, PyObject *child)
2481{
2482 _Py_IDENTIFIER(append);
2483 if (Element_CheckExact(element)) {
2484 ElementObject *elem = (ElementObject *) element;
2485 return element_add_subelement(elem, child);
2486 }
2487 else {
2488 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002489 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002490 if (res == NULL)
2491 return -1;
2492 Py_DECREF(res);
2493 return 0;
2494 }
2495}
2496
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002497LOCAL(int)
2498treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2499 PyObject *node)
2500{
2501 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002502 PyObject *res;
2503 PyObject *event = PyTuple_Pack(2, action, node);
2504 if (event == NULL)
2505 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002506 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002507 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002508 if (res == NULL)
2509 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002510 Py_DECREF(res);
2511 }
2512 return 0;
2513}
2514
Antoine Pitrouee329312012-10-04 19:53:29 +02002515/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516/* handlers */
2517
2518LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2520 PyObject* attrib)
2521{
2522 PyObject* node;
2523 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002524 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525
Serhiy Storchaka576def02017-03-30 09:47:31 +03002526 if (treebuilder_flush_data(self) < 0) {
2527 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 }
2529
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002530 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002531 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002532 } else if (attrib == Py_None) {
2533 attrib = PyDict_New();
2534 if (!attrib)
2535 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002536 node = PyObject_CallFunctionObjArgs(self->element_factory,
2537 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002538 Py_DECREF(attrib);
2539 }
2540 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002541 node = PyObject_CallFunctionObjArgs(self->element_factory,
2542 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002543 }
2544 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002546 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547
Antoine Pitrouee329312012-10-04 19:53:29 +02002548 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549
2550 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002551 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002552 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 } else {
2554 if (self->root) {
2555 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002556 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 "multiple elements on top level"
2558 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002559 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 }
2561 Py_INCREF(node);
2562 self->root = node;
2563 }
2564
2565 if (self->index < PyList_GET_SIZE(self->stack)) {
2566 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002567 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 Py_INCREF(this);
2569 } else {
2570 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002571 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 }
2573 self->index++;
2574
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002576 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002578 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002580 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2581 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582
2583 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002584
2585 error:
2586 Py_DECREF(node);
2587 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588}
2589
2590LOCAL(PyObject*)
2591treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2592{
2593 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002594 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002595 /* ignore calls to data before the first call to start */
2596 Py_RETURN_NONE;
2597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598 /* store the first item as is */
2599 Py_INCREF(data); self->data = data;
2600 } else {
2601 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002602 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2603 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002604 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 /* expat often generates single character data sections; handle
2606 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002607 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2608 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002609 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002610 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 } else if (PyList_CheckExact(self->data)) {
2612 if (PyList_Append(self->data, data) < 0)
2613 return NULL;
2614 } else {
2615 PyObject* list = PyList_New(2);
2616 if (!list)
2617 return NULL;
2618 PyList_SET_ITEM(list, 0, self->data);
2619 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2620 self->data = list;
2621 }
2622 }
2623
2624 Py_RETURN_NONE;
2625}
2626
2627LOCAL(PyObject*)
2628treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2629{
2630 PyObject* item;
2631
Serhiy Storchaka576def02017-03-30 09:47:31 +03002632 if (treebuilder_flush_data(self) < 0) {
2633 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634 }
2635
2636 if (self->index == 0) {
2637 PyErr_SetString(
2638 PyExc_IndexError,
2639 "pop from empty stack"
2640 );
2641 return NULL;
2642 }
2643
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002644 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002645 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002646 self->index--;
2647 self->this = PyList_GET_ITEM(self->stack, self->index);
2648 Py_INCREF(self->this);
2649 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002651 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2652 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653
2654 Py_INCREF(self->last);
2655 return (PyObject*) self->last;
2656}
2657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658/* -------------------------------------------------------------------- */
2659/* methods (in alphabetical order) */
2660
Serhiy Storchakacb985562015-05-04 15:32:48 +03002661/*[clinic input]
2662_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663
Serhiy Storchakacb985562015-05-04 15:32:48 +03002664 data: object
2665 /
2666
2667[clinic start generated code]*/
2668
2669static PyObject *
2670_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2671/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2672{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 return treebuilder_handle_data(self, data);
2674}
2675
Serhiy Storchakacb985562015-05-04 15:32:48 +03002676/*[clinic input]
2677_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679 tag: object
2680 /
2681
2682[clinic start generated code]*/
2683
2684static PyObject *
2685_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2686/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2687{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 return treebuilder_handle_end(self, tag);
2689}
2690
2691LOCAL(PyObject*)
2692treebuilder_done(TreeBuilderObject* self)
2693{
2694 PyObject* res;
2695
2696 /* FIXME: check stack size? */
2697
2698 if (self->root)
2699 res = self->root;
2700 else
2701 res = Py_None;
2702
2703 Py_INCREF(res);
2704 return res;
2705}
2706
Serhiy Storchakacb985562015-05-04 15:32:48 +03002707/*[clinic input]
2708_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
Serhiy Storchakacb985562015-05-04 15:32:48 +03002710[clinic start generated code]*/
2711
2712static PyObject *
2713_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2714/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2715{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 return treebuilder_done(self);
2717}
2718
Serhiy Storchakacb985562015-05-04 15:32:48 +03002719/*[clinic input]
2720_elementtree.TreeBuilder.start
2721
2722 tag: object
2723 attrs: object = None
2724 /
2725
2726[clinic start generated code]*/
2727
2728static PyObject *
2729_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2730 PyObject *attrs)
2731/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002733 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734}
2735
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736/* ==================================================================== */
2737/* the expat interface */
2738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002741
2742/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2743 * cached globally without being in per-module state.
2744 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002745static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747
Eli Bendersky52467b12012-06-01 07:13:08 +03002748static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2749 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751typedef struct {
2752 PyObject_HEAD
2753
2754 XML_Parser parser;
2755
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002756 PyObject *target;
2757 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002759 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002761 PyObject *handle_start;
2762 PyObject *handle_data;
2763 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002765 PyObject *handle_comment;
2766 PyObject *handle_pi;
2767 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002769 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771} XMLParserObject;
2772
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002773static PyObject*
Serhiy Storchaka6969eaf2017-07-03 21:20:15 +03002774_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject **args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002775static PyObject *
2776_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2777 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779/* helpers */
2780
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781LOCAL(PyObject*)
2782makeuniversal(XMLParserObject* self, const char* string)
2783{
2784 /* convert a UTF-8 tag/attribute name from the expat parser
2785 to a universal name string */
2786
Antoine Pitrouc1948842012-10-01 23:40:37 +02002787 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 PyObject* key;
2789 PyObject* value;
2790
2791 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002792 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 if (!key)
2794 return NULL;
2795
2796 value = PyDict_GetItem(self->names, key);
2797
2798 if (value) {
2799 Py_INCREF(value);
2800 } else {
2801 /* new name. convert to universal name, and decode as
2802 necessary */
2803
2804 PyObject* tag;
2805 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002806 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807
2808 /* look for namespace separator */
2809 for (i = 0; i < size; i++)
2810 if (string[i] == '}')
2811 break;
2812 if (i != size) {
2813 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002814 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002815 if (tag == NULL) {
2816 Py_DECREF(key);
2817 return NULL;
2818 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002819 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820 p[0] = '{';
2821 memcpy(p+1, string, size);
2822 size++;
2823 } else {
2824 /* plain name; use key as tag */
2825 Py_INCREF(key);
2826 tag = key;
2827 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002828
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002830 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002831 value = PyUnicode_DecodeUTF8(p, size, "strict");
2832 Py_DECREF(tag);
2833 if (!value) {
2834 Py_DECREF(key);
2835 return NULL;
2836 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837
2838 /* add to names dictionary */
2839 if (PyDict_SetItem(self->names, key, value) < 0) {
2840 Py_DECREF(key);
2841 Py_DECREF(value);
2842 return NULL;
2843 }
2844 }
2845
2846 Py_DECREF(key);
2847 return value;
2848}
2849
Eli Bendersky5b77d812012-03-16 08:20:05 +02002850/* Set the ParseError exception with the given parameters.
2851 * If message is not NULL, it's used as the error string. Otherwise, the
2852 * message string is the default for the given error_code.
2853*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002855expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2856 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002857{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002858 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002859 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002861 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002862 message ? message : EXPAT(ErrorString)(error_code),
2863 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002864 if (errmsg == NULL)
2865 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002867 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002868 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869 if (!error)
2870 return;
2871
Eli Bendersky5b77d812012-03-16 08:20:05 +02002872 /* Add code and position attributes */
2873 code = PyLong_FromLong((long)error_code);
2874 if (!code) {
2875 Py_DECREF(error);
2876 return;
2877 }
2878 if (PyObject_SetAttrString(error, "code", code) == -1) {
2879 Py_DECREF(error);
2880 Py_DECREF(code);
2881 return;
2882 }
2883 Py_DECREF(code);
2884
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002885 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002886 if (!position) {
2887 Py_DECREF(error);
2888 return;
2889 }
2890 if (PyObject_SetAttrString(error, "position", position) == -1) {
2891 Py_DECREF(error);
2892 Py_DECREF(position);
2893 return;
2894 }
2895 Py_DECREF(position);
2896
Eli Bendersky532d03e2013-08-10 08:00:39 -07002897 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002898 Py_DECREF(error);
2899}
2900
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901/* -------------------------------------------------------------------- */
2902/* handlers */
2903
2904static void
2905expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2906 int data_len)
2907{
2908 PyObject* key;
2909 PyObject* value;
2910 PyObject* res;
2911
2912 if (data_len < 2 || data_in[0] != '&')
2913 return;
2914
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002915 if (PyErr_Occurred())
2916 return;
2917
Neal Norwitz0269b912007-08-08 06:56:02 +00002918 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919 if (!key)
2920 return;
2921
2922 value = PyDict_GetItem(self->entity, key);
2923
2924 if (value) {
2925 if (TreeBuilder_CheckExact(self->target))
2926 res = treebuilder_handle_data(
2927 (TreeBuilderObject*) self->target, value
2928 );
2929 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002930 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 else
2932 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002934 } else if (!PyErr_Occurred()) {
2935 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002936 char message[128] = "undefined entity ";
2937 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002938 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002939 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002941 EXPAT(GetErrorColumnNumber)(self->parser),
2942 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 );
2944 }
2945
2946 Py_DECREF(key);
2947}
2948
2949static void
2950expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2951 const XML_Char **attrib_in)
2952{
2953 PyObject* res;
2954 PyObject* tag;
2955 PyObject* attrib;
2956 int ok;
2957
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002958 if (PyErr_Occurred())
2959 return;
2960
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961 /* tag name */
2962 tag = makeuniversal(self, tag_in);
2963 if (!tag)
2964 return; /* parser will look for errors */
2965
2966 /* attributes */
2967 if (attrib_in[0]) {
2968 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002969 if (!attrib) {
2970 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002972 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973 while (attrib_in[0] && attrib_in[1]) {
2974 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002975 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 if (!key || !value) {
2977 Py_XDECREF(value);
2978 Py_XDECREF(key);
2979 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002980 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 return;
2982 }
2983 ok = PyDict_SetItem(attrib, key, value);
2984 Py_DECREF(value);
2985 Py_DECREF(key);
2986 if (ok < 0) {
2987 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002988 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 return;
2990 }
2991 attrib_in += 2;
2992 }
2993 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002994 Py_INCREF(Py_None);
2995 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002996 }
2997
2998 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999 /* shortcut */
3000 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3001 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003002 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003003 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003004 if (attrib == Py_None) {
3005 Py_DECREF(attrib);
3006 attrib = PyDict_New();
3007 if (!attrib) {
3008 Py_DECREF(tag);
3009 return;
3010 }
3011 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003012 res = PyObject_CallFunctionObjArgs(self->handle_start,
3013 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003014 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015 res = NULL;
3016
3017 Py_DECREF(tag);
3018 Py_DECREF(attrib);
3019
3020 Py_XDECREF(res);
3021}
3022
3023static void
3024expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3025 int data_len)
3026{
3027 PyObject* data;
3028 PyObject* res;
3029
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003030 if (PyErr_Occurred())
3031 return;
3032
Neal Norwitz0269b912007-08-08 06:56:02 +00003033 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003034 if (!data)
3035 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036
3037 if (TreeBuilder_CheckExact(self->target))
3038 /* shortcut */
3039 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3040 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003041 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042 else
3043 res = NULL;
3044
3045 Py_DECREF(data);
3046
3047 Py_XDECREF(res);
3048}
3049
3050static void
3051expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3052{
3053 PyObject* tag;
3054 PyObject* res = NULL;
3055
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003056 if (PyErr_Occurred())
3057 return;
3058
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059 if (TreeBuilder_CheckExact(self->target))
3060 /* shortcut */
3061 /* the standard tree builder doesn't look at the end tag */
3062 res = treebuilder_handle_end(
3063 (TreeBuilderObject*) self->target, Py_None
3064 );
3065 else if (self->handle_end) {
3066 tag = makeuniversal(self, tag_in);
3067 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003068 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069 Py_DECREF(tag);
3070 }
3071 }
3072
3073 Py_XDECREF(res);
3074}
3075
3076static void
3077expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3078 const XML_Char *uri)
3079{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003080 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3081 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003082
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003083 if (PyErr_Occurred())
3084 return;
3085
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003086 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003087 return;
3088
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003089 if (!uri)
3090 uri = "";
3091 if (!prefix)
3092 prefix = "";
3093
3094 parcel = Py_BuildValue("ss", prefix, uri);
3095 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003096 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003097 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3098 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099}
3100
3101static void
3102expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3103{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003104 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3105
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003106 if (PyErr_Occurred())
3107 return;
3108
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003109 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003110 return;
3111
3112 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113}
3114
3115static void
3116expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3117{
3118 PyObject* comment;
3119 PyObject* res;
3120
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003121 if (PyErr_Occurred())
3122 return;
3123
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003124 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003125 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003127 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3128 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129 Py_XDECREF(res);
3130 Py_DECREF(comment);
3131 }
3132 }
3133}
3134
Eli Bendersky45839902013-01-13 05:14:47 -08003135static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003136expat_start_doctype_handler(XMLParserObject *self,
3137 const XML_Char *doctype_name,
3138 const XML_Char *sysid,
3139 const XML_Char *pubid,
3140 int has_internal_subset)
3141{
3142 PyObject *self_pyobj = (PyObject *)self;
3143 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3144 PyObject *parser_doctype = NULL;
3145 PyObject *res = NULL;
3146
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003147 if (PyErr_Occurred())
3148 return;
3149
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003150 doctype_name_obj = makeuniversal(self, doctype_name);
3151 if (!doctype_name_obj)
3152 return;
3153
3154 if (sysid) {
3155 sysid_obj = makeuniversal(self, sysid);
3156 if (!sysid_obj) {
3157 Py_DECREF(doctype_name_obj);
3158 return;
3159 }
3160 } else {
3161 Py_INCREF(Py_None);
3162 sysid_obj = Py_None;
3163 }
3164
3165 if (pubid) {
3166 pubid_obj = makeuniversal(self, pubid);
3167 if (!pubid_obj) {
3168 Py_DECREF(doctype_name_obj);
3169 Py_DECREF(sysid_obj);
3170 return;
3171 }
3172 } else {
3173 Py_INCREF(Py_None);
3174 pubid_obj = Py_None;
3175 }
3176
3177 /* If the target has a handler for doctype, call it. */
3178 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003179 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3180 doctype_name_obj, pubid_obj,
3181 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003182 Py_CLEAR(res);
3183 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003184 else {
3185 /* Now see if the parser itself has a doctype method. If yes and it's
3186 * a custom method, call it but warn about deprecation. If it's only
3187 * the vanilla XMLParser method, do nothing.
3188 */
3189 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3190 if (parser_doctype &&
3191 !(PyCFunction_Check(parser_doctype) &&
3192 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3193 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003194 (PyCFunction) _elementtree_XMLParser_doctype)) {
3195 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3196 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003197 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003198 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003199 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003200 res = PyObject_CallFunctionObjArgs(parser_doctype,
3201 doctype_name_obj, pubid_obj,
3202 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003203 Py_CLEAR(res);
3204 }
3205 }
3206
3207clear:
3208 Py_XDECREF(parser_doctype);
3209 Py_DECREF(doctype_name_obj);
3210 Py_DECREF(pubid_obj);
3211 Py_DECREF(sysid_obj);
3212}
3213
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214static void
3215expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3216 const XML_Char* data_in)
3217{
3218 PyObject* target;
3219 PyObject* data;
3220 PyObject* res;
3221
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003222 if (PyErr_Occurred())
3223 return;
3224
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003226 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3227 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003229 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3230 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 Py_XDECREF(res);
3232 Py_DECREF(data);
3233 Py_DECREF(target);
3234 } else {
3235 Py_XDECREF(data);
3236 Py_XDECREF(target);
3237 }
3238 }
3239}
3240
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242
Eli Bendersky52467b12012-06-01 07:13:08 +03003243static PyObject *
3244xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245{
Eli Bendersky52467b12012-06-01 07:13:08 +03003246 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3247 if (self) {
3248 self->parser = NULL;
3249 self->target = self->entity = self->names = NULL;
3250 self->handle_start = self->handle_data = self->handle_end = NULL;
3251 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003252 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 return (PyObject *)self;
3255}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256
scoderc8d8e152017-09-14 22:00:03 +02003257static int
3258ignore_attribute_error(PyObject *value)
3259{
3260 if (value == NULL) {
3261 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3262 return -1;
3263 }
3264 PyErr_Clear();
3265 }
3266 return 0;
3267}
3268
Serhiy Storchakacb985562015-05-04 15:32:48 +03003269/*[clinic input]
3270_elementtree.XMLParser.__init__
3271
3272 html: object = NULL
3273 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003274 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003275
3276[clinic start generated code]*/
3277
Eli Bendersky52467b12012-06-01 07:13:08 +03003278static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3280 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003281/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003282{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003283 if (html != NULL) {
3284 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3285 "The html argument of XMLParser() is deprecated",
3286 1) < 0) {
3287 return -1;
3288 }
3289 }
3290
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->entity = PyDict_New();
3292 if (!self->entity)
3293 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294
Serhiy Storchakacb985562015-05-04 15:32:48 +03003295 self->names = PyDict_New();
3296 if (!self->names) {
3297 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003298 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003300
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3302 if (!self->parser) {
3303 Py_CLEAR(self->entity);
3304 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003306 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 }
3308
Eli Bendersky52467b12012-06-01 07:13:08 +03003309 if (target) {
3310 Py_INCREF(target);
3311 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003312 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003314 Py_CLEAR(self->entity);
3315 Py_CLEAR(self->names);
3316 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003317 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003319 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321
Serhiy Storchakacb985562015-05-04 15:32:48 +03003322 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003323 if (ignore_attribute_error(self->handle_start)) {
3324 return -1;
3325 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003327 if (ignore_attribute_error(self->handle_data)) {
3328 return -1;
3329 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003330 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003331 if (ignore_attribute_error(self->handle_end)) {
3332 return -1;
3333 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003334 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003335 if (ignore_attribute_error(self->handle_comment)) {
3336 return -1;
3337 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003338 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003339 if (ignore_attribute_error(self->handle_pi)) {
3340 return -1;
3341 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003342 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003343 if (ignore_attribute_error(self->handle_close)) {
3344 return -1;
3345 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003346 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003347 if (ignore_attribute_error(self->handle_doctype)) {
3348 return -1;
3349 }
Eli Bendersky45839902013-01-13 05:14:47 -08003350
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003352 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003354 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355 (XML_StartElementHandler) expat_start_handler,
3356 (XML_EndElementHandler) expat_end_handler
3357 );
3358 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360 (XML_DefaultHandler) expat_default_handler
3361 );
3362 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003363 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 (XML_CharacterDataHandler) expat_data_handler
3365 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003366 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003368 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369 (XML_CommentHandler) expat_comment_handler
3370 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003371 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003373 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003374 (XML_ProcessingInstructionHandler) expat_pi_handler
3375 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003376 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003378 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3379 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003381 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003382 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384
Eli Bendersky52467b12012-06-01 07:13:08 +03003385 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386}
3387
Eli Bendersky52467b12012-06-01 07:13:08 +03003388static int
3389xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3390{
3391 Py_VISIT(self->handle_close);
3392 Py_VISIT(self->handle_pi);
3393 Py_VISIT(self->handle_comment);
3394 Py_VISIT(self->handle_end);
3395 Py_VISIT(self->handle_data);
3396 Py_VISIT(self->handle_start);
3397
3398 Py_VISIT(self->target);
3399 Py_VISIT(self->entity);
3400 Py_VISIT(self->names);
3401
3402 return 0;
3403}
3404
3405static int
3406xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407{
Victor Stinnere727d412017-09-18 05:29:37 -07003408 if (self->parser != NULL) {
3409 XML_Parser parser = self->parser;
3410 self->parser = NULL;
3411 EXPAT(ParserFree)(parser);
3412 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413
Antoine Pitrouc1948842012-10-01 23:40:37 +02003414 Py_CLEAR(self->handle_close);
3415 Py_CLEAR(self->handle_pi);
3416 Py_CLEAR(self->handle_comment);
3417 Py_CLEAR(self->handle_end);
3418 Py_CLEAR(self->handle_data);
3419 Py_CLEAR(self->handle_start);
3420 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421
Antoine Pitrouc1948842012-10-01 23:40:37 +02003422 Py_CLEAR(self->target);
3423 Py_CLEAR(self->entity);
3424 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003425
Eli Bendersky52467b12012-06-01 07:13:08 +03003426 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427}
3428
Eli Bendersky52467b12012-06-01 07:13:08 +03003429static void
3430xmlparser_dealloc(XMLParserObject* self)
3431{
3432 PyObject_GC_UnTrack(self);
3433 xmlparser_gc_clear(self);
3434 Py_TYPE(self)->tp_free((PyObject *)self);
3435}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003436
3437LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003438expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003439{
3440 int ok;
3441
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003442 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3444
3445 if (PyErr_Occurred())
3446 return NULL;
3447
3448 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003449 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003450 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003451 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003452 EXPAT(GetErrorColumnNumber)(self->parser),
3453 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454 );
3455 return NULL;
3456 }
3457
3458 Py_RETURN_NONE;
3459}
3460
Serhiy Storchakacb985562015-05-04 15:32:48 +03003461/*[clinic input]
3462_elementtree.XMLParser.close
3463
3464[clinic start generated code]*/
3465
3466static PyObject *
3467_elementtree_XMLParser_close_impl(XMLParserObject *self)
3468/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469{
3470 /* end feeding data to parser */
3471
3472 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003474 if (!res)
3475 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003477 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478 Py_DECREF(res);
3479 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003480 }
3481 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003482 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003483 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003484 }
3485 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003486 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003487 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488}
3489
Serhiy Storchakacb985562015-05-04 15:32:48 +03003490/*[clinic input]
3491_elementtree.XMLParser.feed
3492
3493 data: object
3494 /
3495
3496[clinic start generated code]*/
3497
3498static PyObject *
3499_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3500/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501{
3502 /* feed data to parser */
3503
Serhiy Storchakacb985562015-05-04 15:32:48 +03003504 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003505 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003506 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3507 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003508 return NULL;
3509 if (data_len > INT_MAX) {
3510 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3511 return NULL;
3512 }
3513 /* Explicitly set UTF-8 encoding. Return code ignored. */
3514 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003515 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003516 }
3517 else {
3518 Py_buffer view;
3519 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003520 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003521 return NULL;
3522 if (view.len > INT_MAX) {
3523 PyBuffer_Release(&view);
3524 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3525 return NULL;
3526 }
3527 res = expat_parse(self, view.buf, (int)view.len, 0);
3528 PyBuffer_Release(&view);
3529 return res;
3530 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003531}
3532
Serhiy Storchakacb985562015-05-04 15:32:48 +03003533/*[clinic input]
3534_elementtree.XMLParser._parse_whole
3535
3536 file: object
3537 /
3538
3539[clinic start generated code]*/
3540
3541static PyObject *
3542_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3543/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003544{
Eli Benderskya3699232013-05-19 18:47:23 -07003545 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003546 PyObject* reader;
3547 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003548 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003549 PyObject* res;
3550
Serhiy Storchakacb985562015-05-04 15:32:48 +03003551 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 if (!reader)
3553 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003554
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 /* read from open file object */
3556 for (;;) {
3557
3558 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3559
3560 if (!buffer) {
3561 /* read failed (e.g. due to KeyboardInterrupt) */
3562 Py_DECREF(reader);
3563 return NULL;
3564 }
3565
Eli Benderskyf996e772012-03-16 05:53:30 +02003566 if (PyUnicode_CheckExact(buffer)) {
3567 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003568 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003569 Py_DECREF(buffer);
3570 break;
3571 }
3572 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003573 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003574 if (!temp) {
3575 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003576 Py_DECREF(reader);
3577 return NULL;
3578 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003579 buffer = temp;
3580 }
3581 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 Py_DECREF(buffer);
3583 break;
3584 }
3585
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003586 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3587 Py_DECREF(buffer);
3588 Py_DECREF(reader);
3589 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3590 return NULL;
3591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003593 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 );
3595
3596 Py_DECREF(buffer);
3597
3598 if (!res) {
3599 Py_DECREF(reader);
3600 return NULL;
3601 }
3602 Py_DECREF(res);
3603
3604 }
3605
3606 Py_DECREF(reader);
3607
3608 res = expat_parse(self, "", 0, 1);
3609
3610 if (res && TreeBuilder_CheckExact(self->target)) {
3611 Py_DECREF(res);
3612 return treebuilder_done((TreeBuilderObject*) self->target);
3613 }
3614
3615 return res;
3616}
3617
Serhiy Storchakacb985562015-05-04 15:32:48 +03003618/*[clinic input]
3619_elementtree.XMLParser.doctype
3620
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003621 name: object
3622 pubid: object
3623 system: object
3624 /
3625
Serhiy Storchakacb985562015-05-04 15:32:48 +03003626[clinic start generated code]*/
3627
3628static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003629_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3630 PyObject *pubid, PyObject *system)
3631/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003632{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003633 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3634 "This method of XMLParser is deprecated. Define"
3635 " doctype() method on the TreeBuilder target.",
3636 1) < 0) {
3637 return NULL;
3638 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003639 Py_RETURN_NONE;
3640}
3641
Serhiy Storchakacb985562015-05-04 15:32:48 +03003642/*[clinic input]
3643_elementtree.XMLParser._setevents
3644
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003645 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003646 events_to_report: object = None
3647 /
3648
3649[clinic start generated code]*/
3650
3651static PyObject *
3652_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3653 PyObject *events_queue,
3654 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003655/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656{
3657 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003658 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003659 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003660 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003661
3662 if (!TreeBuilder_CheckExact(self->target)) {
3663 PyErr_SetString(
3664 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003665 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666 "targets"
3667 );
3668 return NULL;
3669 }
3670
3671 target = (TreeBuilderObject*) self->target;
3672
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003673 events_append = PyObject_GetAttrString(events_queue, "append");
3674 if (events_append == NULL)
3675 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003676 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677
3678 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003679 Py_CLEAR(target->start_event_obj);
3680 Py_CLEAR(target->end_event_obj);
3681 Py_CLEAR(target->start_ns_event_obj);
3682 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003684 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003686 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687 Py_RETURN_NONE;
3688 }
3689
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003690 if (!(events_seq = PySequence_Fast(events_to_report,
3691 "events must be a sequence"))) {
3692 return NULL;
3693 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003695 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003696 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003697 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003698 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003699 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003700 } else if (PyBytes_Check(event_name_obj)) {
3701 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003702 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003703 if (event_name == NULL) {
3704 Py_DECREF(events_seq);
3705 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3706 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003707 }
3708
3709 Py_INCREF(event_name_obj);
3710 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003711 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003712 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003713 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003714 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003715 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003716 EXPAT(SetNamespaceDeclHandler)(
3717 self->parser,
3718 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3719 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3720 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003721 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003722 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003723 EXPAT(SetNamespaceDeclHandler)(
3724 self->parser,
3725 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3726 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3727 );
3728 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003729 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003730 Py_DECREF(events_seq);
3731 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003732 return NULL;
3733 }
3734 }
3735
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003736 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003737 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738}
3739
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003740static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003741xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003742{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003743 if (PyUnicode_Check(nameobj)) {
3744 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003745 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003746 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003747 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003748 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003749 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003750 return PyUnicode_FromFormat(
3751 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003752 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003753 }
3754 else
3755 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003756
Alexander Belopolskye239d232010-12-08 23:31:48 +00003757 Py_INCREF(res);
3758 return res;
3759 }
3760 generic:
3761 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762}
3763
Serhiy Storchakacb985562015-05-04 15:32:48 +03003764#include "clinic/_elementtree.c.h"
3765
3766static PyMethodDef element_methods[] = {
3767
3768 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3769
3770 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3771 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3772
3773 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3774 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3775 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3776
3777 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3778 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3779 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3780 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3781
3782 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3783 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3784 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3785
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003786 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003787 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3788
3789 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3790 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3791
3792 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3793
3794 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3795 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3796 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3797 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3798 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3799
3800 {NULL, NULL}
3801};
3802
3803static PyMappingMethods element_as_mapping = {
3804 (lenfunc) element_length,
3805 (binaryfunc) element_subscr,
3806 (objobjargproc) element_ass_subscr,
3807};
3808
Serhiy Storchakadde08152015-11-25 15:28:13 +02003809static PyGetSetDef element_getsetlist[] = {
3810 {"tag",
3811 (getter)element_tag_getter,
3812 (setter)element_tag_setter,
3813 "A string identifying what kind of data this element represents"},
3814 {"text",
3815 (getter)element_text_getter,
3816 (setter)element_text_setter,
3817 "A string of text directly after the start tag, or None"},
3818 {"tail",
3819 (getter)element_tail_getter,
3820 (setter)element_tail_setter,
3821 "A string of text directly after the end tag, or None"},
3822 {"attrib",
3823 (getter)element_attrib_getter,
3824 (setter)element_attrib_setter,
3825 "A dictionary containing the element's attributes"},
3826 {NULL},
3827};
3828
Serhiy Storchakacb985562015-05-04 15:32:48 +03003829static PyTypeObject Element_Type = {
3830 PyVarObject_HEAD_INIT(NULL, 0)
3831 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3832 /* methods */
3833 (destructor)element_dealloc, /* tp_dealloc */
3834 0, /* tp_print */
3835 0, /* tp_getattr */
3836 0, /* tp_setattr */
3837 0, /* tp_reserved */
3838 (reprfunc)element_repr, /* tp_repr */
3839 0, /* tp_as_number */
3840 &element_as_sequence, /* tp_as_sequence */
3841 &element_as_mapping, /* tp_as_mapping */
3842 0, /* tp_hash */
3843 0, /* tp_call */
3844 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003845 PyObject_GenericGetAttr, /* tp_getattro */
3846 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003847 0, /* tp_as_buffer */
3848 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3849 /* tp_flags */
3850 0, /* tp_doc */
3851 (traverseproc)element_gc_traverse, /* tp_traverse */
3852 (inquiry)element_gc_clear, /* tp_clear */
3853 0, /* tp_richcompare */
3854 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3855 0, /* tp_iter */
3856 0, /* tp_iternext */
3857 element_methods, /* tp_methods */
3858 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003859 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003860 0, /* tp_base */
3861 0, /* tp_dict */
3862 0, /* tp_descr_get */
3863 0, /* tp_descr_set */
3864 0, /* tp_dictoffset */
3865 (initproc)element_init, /* tp_init */
3866 PyType_GenericAlloc, /* tp_alloc */
3867 element_new, /* tp_new */
3868 0, /* tp_free */
3869};
3870
3871static PyMethodDef treebuilder_methods[] = {
3872 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3873 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3874 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3875 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3876 {NULL, NULL}
3877};
3878
3879static PyTypeObject TreeBuilder_Type = {
3880 PyVarObject_HEAD_INIT(NULL, 0)
3881 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3882 /* methods */
3883 (destructor)treebuilder_dealloc, /* tp_dealloc */
3884 0, /* tp_print */
3885 0, /* tp_getattr */
3886 0, /* tp_setattr */
3887 0, /* tp_reserved */
3888 0, /* tp_repr */
3889 0, /* tp_as_number */
3890 0, /* tp_as_sequence */
3891 0, /* tp_as_mapping */
3892 0, /* tp_hash */
3893 0, /* tp_call */
3894 0, /* tp_str */
3895 0, /* tp_getattro */
3896 0, /* tp_setattro */
3897 0, /* tp_as_buffer */
3898 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3899 /* tp_flags */
3900 0, /* tp_doc */
3901 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3902 (inquiry)treebuilder_gc_clear, /* tp_clear */
3903 0, /* tp_richcompare */
3904 0, /* tp_weaklistoffset */
3905 0, /* tp_iter */
3906 0, /* tp_iternext */
3907 treebuilder_methods, /* tp_methods */
3908 0, /* tp_members */
3909 0, /* tp_getset */
3910 0, /* tp_base */
3911 0, /* tp_dict */
3912 0, /* tp_descr_get */
3913 0, /* tp_descr_set */
3914 0, /* tp_dictoffset */
3915 _elementtree_TreeBuilder___init__, /* tp_init */
3916 PyType_GenericAlloc, /* tp_alloc */
3917 treebuilder_new, /* tp_new */
3918 0, /* tp_free */
3919};
3920
3921static PyMethodDef xmlparser_methods[] = {
3922 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3923 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3924 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3925 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3926 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3927 {NULL, NULL}
3928};
3929
Neal Norwitz227b5332006-03-22 09:28:35 +00003930static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003931 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003932 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003933 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003934 (destructor)xmlparser_dealloc, /* tp_dealloc */
3935 0, /* tp_print */
3936 0, /* tp_getattr */
3937 0, /* tp_setattr */
3938 0, /* tp_reserved */
3939 0, /* tp_repr */
3940 0, /* tp_as_number */
3941 0, /* tp_as_sequence */
3942 0, /* tp_as_mapping */
3943 0, /* tp_hash */
3944 0, /* tp_call */
3945 0, /* tp_str */
3946 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3947 0, /* tp_setattro */
3948 0, /* tp_as_buffer */
3949 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3950 /* tp_flags */
3951 0, /* tp_doc */
3952 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3953 (inquiry)xmlparser_gc_clear, /* tp_clear */
3954 0, /* tp_richcompare */
3955 0, /* tp_weaklistoffset */
3956 0, /* tp_iter */
3957 0, /* tp_iternext */
3958 xmlparser_methods, /* tp_methods */
3959 0, /* tp_members */
3960 0, /* tp_getset */
3961 0, /* tp_base */
3962 0, /* tp_dict */
3963 0, /* tp_descr_get */
3964 0, /* tp_descr_set */
3965 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003966 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003967 PyType_GenericAlloc, /* tp_alloc */
3968 xmlparser_new, /* tp_new */
3969 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003970};
3971
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972/* ==================================================================== */
3973/* python module interface */
3974
3975static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003976 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003977 {NULL, NULL}
3978};
3979
Martin v. Löwis1a214512008-06-11 05:26:20 +00003980
Eli Bendersky532d03e2013-08-10 08:00:39 -07003981static struct PyModuleDef elementtreemodule = {
3982 PyModuleDef_HEAD_INIT,
3983 "_elementtree",
3984 NULL,
3985 sizeof(elementtreestate),
3986 _functions,
3987 NULL,
3988 elementtree_traverse,
3989 elementtree_clear,
3990 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003991};
3992
Neal Norwitzf6657e62006-12-28 04:47:50 +00003993PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003994PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003995{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003996 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003997 elementtreestate *st;
3998
3999 m = PyState_FindModule(&elementtreemodule);
4000 if (m) {
4001 Py_INCREF(m);
4002 return m;
4003 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004004
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004005 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004006 if (PyType_Ready(&ElementIter_Type) < 0)
4007 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004008 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004009 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004010 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004011 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004012 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004013 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004014
Eli Bendersky532d03e2013-08-10 08:00:39 -07004015 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004016 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004017 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004018 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004019
Eli Bendersky828efde2012-04-05 05:40:58 +03004020 if (!(temp = PyImport_ImportModule("copy")))
4021 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004022 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004023 Py_XDECREF(temp);
4024
Victor Stinnerb136f112017-07-10 22:28:02 +02004025 if (st->deepcopy_obj == NULL) {
4026 return NULL;
4027 }
4028
4029 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004030 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004031 return NULL;
4032
Eli Bendersky20d41742012-06-01 09:48:37 +03004033 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004034 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4035 if (expat_capi) {
4036 /* check that it's usable */
4037 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004038 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004039 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4040 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004041 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004042 PyErr_SetString(PyExc_ImportError,
4043 "pyexpat version is incompatible");
4044 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004045 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004046 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004047 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004048 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004049
Eli Bendersky532d03e2013-08-10 08:00:39 -07004050 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004051 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004052 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004053 Py_INCREF(st->parseerror_obj);
4054 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004055
Eli Bendersky092af1f2012-03-04 07:14:03 +02004056 Py_INCREF((PyObject *)&Element_Type);
4057 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4058
Eli Bendersky58d548d2012-05-29 15:45:16 +03004059 Py_INCREF((PyObject *)&TreeBuilder_Type);
4060 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4061
Eli Bendersky52467b12012-06-01 07:13:08 +03004062 Py_INCREF((PyObject *)&XMLParser_Type);
4063 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004064
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004065 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004066}