blob: 1c72c653832690525efad6f715729f165d0870ae [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200236dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237{
Eli Bendersky08b85292012-04-04 15:55:07 +0300238 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200239 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300240
Eli Benderskyebf37a22012-04-03 22:02:37 +0300241 if (!self->extra)
242 return;
243
244 /* Avoid DECREFs calling into this code again (cycles, etc.)
245 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300246 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 self->extra = NULL;
248
249 Py_DECREF(myextra->attrib);
250
Eli Benderskyebf37a22012-04-03 22:02:37 +0300251 for (i = 0; i < myextra->length; i++)
252 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253
Eli Benderskyebf37a22012-04-03 22:02:37 +0300254 if (myextra->children != myextra->_children)
255 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256
Eli Benderskyebf37a22012-04-03 22:02:37 +0300257 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258}
259
Eli Bendersky092af1f2012-03-04 07:14:03 +0200260/* Convenience internal function to create new Element objects with the given
261 * tag and attributes.
262*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200264create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000265{
266 ElementObject* self;
267
Eli Bendersky0192ba32012-03-30 16:38:33 +0300268 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000269 if (self == NULL)
270 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 self->extra = NULL;
272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 Py_INCREF(tag);
274 self->tag = tag;
275
276 Py_INCREF(Py_None);
277 self->text = Py_None;
278
279 Py_INCREF(Py_None);
280 self->tail = Py_None;
281
Eli Benderskyebf37a22012-04-03 22:02:37 +0300282 self->weakreflist = NULL;
283
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200284 ALLOC(sizeof(ElementObject), "create element");
285 PyObject_GC_Track(self);
286
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200287 if (attrib != Py_None && !is_empty_dict(attrib)) {
288 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200289 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200290 return NULL;
291 }
292 }
293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294 return (PyObject*) self;
295}
296
Eli Bendersky092af1f2012-03-04 07:14:03 +0200297static PyObject *
298element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
299{
300 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
301 if (e != NULL) {
302 Py_INCREF(Py_None);
303 e->tag = Py_None;
304
305 Py_INCREF(Py_None);
306 e->text = Py_None;
307
308 Py_INCREF(Py_None);
309 e->tail = Py_None;
310
311 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300312 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313 }
314 return (PyObject *)e;
315}
316
Eli Bendersky737b1732012-05-29 06:02:56 +0300317/* Helper function for extracting the attrib dictionary from a keywords dict.
318 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800319 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300320 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700321 *
322 * Return a dictionary with the content of kwds merged into the content of
323 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300324 */
325static PyObject*
326get_attrib_from_keywords(PyObject *kwds)
327{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700328 PyObject *attrib_str = PyUnicode_FromString("attrib");
329 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330
331 if (attrib) {
332 /* If attrib was found in kwds, copy its value and remove it from
333 * kwds
334 */
335 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700336 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
338 Py_TYPE(attrib)->tp_name);
339 return NULL;
340 }
341 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343 } else {
344 attrib = PyDict_New();
345 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700346
347 Py_DECREF(attrib_str);
348
349 /* attrib can be NULL if PyDict_New failed */
350 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200351 if (PyDict_Update(attrib, kwds) < 0)
352 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300353 return attrib;
354}
355
Serhiy Storchakacb985562015-05-04 15:32:48 +0300356/*[clinic input]
357module _elementtree
358class _elementtree.Element "ElementObject *" "&Element_Type"
359class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
360class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
361[clinic start generated code]*/
362/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
363
Eli Bendersky092af1f2012-03-04 07:14:03 +0200364static int
365element_init(PyObject *self, PyObject *args, PyObject *kwds)
366{
367 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200368 PyObject *attrib = NULL;
369 ElementObject *self_elem;
370
371 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
372 return -1;
373
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 if (attrib) {
375 /* attrib passed as positional arg */
376 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200377 if (!attrib)
378 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300379 if (kwds) {
380 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200381 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 return -1;
383 }
384 }
385 } else if (kwds) {
386 /* have keywords args */
387 attrib = get_attrib_from_keywords(kwds);
388 if (!attrib)
389 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200390 }
391
392 self_elem = (ElementObject *)self;
393
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 return -1;
398 }
399 }
400
Eli Bendersky48d358b2012-05-30 17:57:50 +0300401 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403
404 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300406 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300409 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300412 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413
414 return 0;
415}
416
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200420 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000421 PyObject* *children;
422
423 /* make sure self->children can hold the given number of extra
424 elements. set an exception and return -1 if allocation failed */
425
Victor Stinner5f0af232013-07-11 23:01:36 +0200426 if (!self->extra) {
427 if (create_extra(self, NULL) < 0)
428 return -1;
429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432
433 if (size > self->extra->allocated) {
434 /* use Python 2.4's list growth strategy */
435 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000436 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100437 * which needs at least 4 bytes.
438 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000439 * be safe.
440 */
441 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
443 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100446 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * false alarm always assume at least one child to be safe.
448 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 children = PyObject_Realloc(self->extra->children,
450 size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 } else {
454 children = PyObject_Malloc(size * sizeof(PyObject*));
455 if (!children)
456 goto nomemory;
457 /* copy existing children from static area to malloc buffer */
458 memcpy(children, self->extra->children,
459 self->extra->length * sizeof(PyObject*));
460 }
461 self->extra->children = children;
462 self->extra->allocated = size;
463 }
464
465 return 0;
466
467 nomemory:
468 PyErr_NoMemory();
469 return -1;
470}
471
472LOCAL(int)
473element_add_subelement(ElementObject* self, PyObject* element)
474{
475 /* add a child element to a parent */
476
477 if (element_resize(self, 1) < 0)
478 return -1;
479
480 Py_INCREF(element);
481 self->extra->children[self->extra->length] = element;
482
483 self->extra->length++;
484
485 return 0;
486}
487
488LOCAL(PyObject*)
489element_get_attrib(ElementObject* self)
490{
491 /* return borrowed reference to attrib dictionary */
492 /* note: this function assumes that the extra section exists */
493
494 PyObject* res = self->extra->attrib;
495
496 if (res == Py_None) {
497 /* create missing dictionary */
498 res = PyDict_New();
499 if (!res)
500 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200501 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000502 self->extra->attrib = res;
503 }
504
505 return res;
506}
507
508LOCAL(PyObject*)
509element_get_text(ElementObject* self)
510{
511 /* return borrowed reference to text attribute */
512
Serhiy Storchaka576def02017-03-30 09:47:31 +0300513 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514
515 if (JOIN_GET(res)) {
516 res = JOIN_OBJ(res);
517 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300518 PyObject *tmp = list_join(res);
519 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300521 self->text = tmp;
522 Py_DECREF(res);
523 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524 }
525 }
526
527 return res;
528}
529
530LOCAL(PyObject*)
531element_get_tail(ElementObject* self)
532{
533 /* return borrowed reference to text attribute */
534
Serhiy Storchaka576def02017-03-30 09:47:31 +0300535 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000536
537 if (JOIN_GET(res)) {
538 res = JOIN_OBJ(res);
539 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300540 PyObject *tmp = list_join(res);
541 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300543 self->tail = tmp;
544 Py_DECREF(res);
545 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000546 }
547 }
548
549 return res;
550}
551
552static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300553subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554{
555 PyObject* elem;
556
557 ElementObject* parent;
558 PyObject* tag;
559 PyObject* attrib = NULL;
560 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
561 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800562 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800564 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565
Eli Bendersky737b1732012-05-29 06:02:56 +0300566 if (attrib) {
567 /* attrib passed as positional arg */
568 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000569 if (!attrib)
570 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300571 if (kwds) {
572 if (PyDict_Update(attrib, kwds) < 0) {
573 return NULL;
574 }
575 }
576 } else if (kwds) {
577 /* have keyword args */
578 attrib = get_attrib_from_keywords(kwds);
579 if (!attrib)
580 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300582 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 Py_INCREF(Py_None);
584 attrib = Py_None;
585 }
586
Eli Bendersky092af1f2012-03-04 07:14:03 +0200587 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200589 if (elem == NULL)
590 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000592 if (element_add_subelement(parent, elem) < 0) {
593 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000595 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596
597 return elem;
598}
599
Eli Bendersky0192ba32012-03-30 16:38:33 +0300600static int
601element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
602{
603 Py_VISIT(self->tag);
604 Py_VISIT(JOIN_OBJ(self->text));
605 Py_VISIT(JOIN_OBJ(self->tail));
606
607 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200608 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300609 Py_VISIT(self->extra->attrib);
610
611 for (i = 0; i < self->extra->length; ++i)
612 Py_VISIT(self->extra->children[i]);
613 }
614 return 0;
615}
616
617static int
618element_gc_clear(ElementObject *self)
619{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700621 _clear_joined_ptr(&self->text);
622 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300623
624 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300625 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300626 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300627 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 return 0;
629}
630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000631static void
632element_dealloc(ElementObject* self)
633{
INADA Naokia6296d32017-08-24 14:55:17 +0900634 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200636 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300637
638 if (self->weakreflist != NULL)
639 PyObject_ClearWeakRefs((PyObject *) self);
640
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 /* element_gc_clear clears all references and deallocates extra
642 */
643 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
645 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200646 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200647 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000648}
649
650/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651
Serhiy Storchakacb985562015-05-04 15:32:48 +0300652/*[clinic input]
653_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000654
Serhiy Storchakacb985562015-05-04 15:32:48 +0300655 subelement: object(subclass_of='&Element_Type')
656 /
657
658[clinic start generated code]*/
659
660static PyObject *
661_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
662/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
663{
664 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665 return NULL;
666
667 Py_RETURN_NONE;
668}
669
Serhiy Storchakacb985562015-05-04 15:32:48 +0300670/*[clinic input]
671_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672
Serhiy Storchakacb985562015-05-04 15:32:48 +0300673[clinic start generated code]*/
674
675static PyObject *
676_elementtree_Element_clear_impl(ElementObject *self)
677/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
678{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300679 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300682 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000683
684 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300685 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_RETURN_NONE;
688}
689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690/*[clinic input]
691_elementtree.Element.__copy__
692
693[clinic start generated code]*/
694
695static PyObject *
696_elementtree_Element___copy___impl(ElementObject *self)
697/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200699 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000700 ElementObject* element;
701
Eli Bendersky092af1f2012-03-04 07:14:03 +0200702 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800703 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704 if (!element)
705 return NULL;
706
Oren Milman39ecb9c2017-10-10 23:26:24 +0300707 Py_INCREF(JOIN_OBJ(self->text));
708 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709
Oren Milman39ecb9c2017-10-10 23:26:24 +0300710 Py_INCREF(JOIN_OBJ(self->tail));
711 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 if (element_resize(element, self->extra->length) < 0) {
715 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718
719 for (i = 0; i < self->extra->length; i++) {
720 Py_INCREF(self->extra->children[i]);
721 element->extra->children[i] = self->extra->children[i];
722 }
723
724 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 }
726
727 return (PyObject*) element;
728}
729
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200730/* Helper for a deep copy. */
731LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
732
Serhiy Storchakacb985562015-05-04 15:32:48 +0300733/*[clinic input]
734_elementtree.Element.__deepcopy__
735
Oren Milmand0568182017-09-12 17:39:15 +0300736 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300737 /
738
739[clinic start generated code]*/
740
741static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300742_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
743/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200745 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 ElementObject* element;
747 PyObject* tag;
748 PyObject* attrib;
749 PyObject* text;
750 PyObject* tail;
751 PyObject* id;
752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 tag = deepcopy(self->tag, memo);
754 if (!tag)
755 return NULL;
756
757 if (self->extra) {
758 attrib = deepcopy(self->extra->attrib, memo);
759 if (!attrib) {
760 Py_DECREF(tag);
761 return NULL;
762 }
763 } else {
764 Py_INCREF(Py_None);
765 attrib = Py_None;
766 }
767
Eli Bendersky092af1f2012-03-04 07:14:03 +0200768 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769
770 Py_DECREF(tag);
771 Py_DECREF(attrib);
772
773 if (!element)
774 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776 text = deepcopy(JOIN_OBJ(self->text), memo);
777 if (!text)
778 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300779 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000780
781 tail = deepcopy(JOIN_OBJ(self->tail), memo);
782 if (!tail)
783 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300784 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785
786 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787 if (element_resize(element, self->extra->length) < 0)
788 goto error;
789
790 for (i = 0; i < self->extra->length; i++) {
791 PyObject* child = deepcopy(self->extra->children[i], memo);
792 if (!child) {
793 element->extra->length = i;
794 goto error;
795 }
796 element->extra->children[i] = child;
797 }
798
799 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 }
801
802 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700803 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000804 if (!id)
805 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806
807 i = PyDict_SetItem(memo, id, (PyObject*) element);
808
809 Py_DECREF(id);
810
811 if (i < 0)
812 goto error;
813
814 return (PyObject*) element;
815
816 error:
817 Py_DECREF(element);
818 return NULL;
819}
820
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200821LOCAL(PyObject *)
822deepcopy(PyObject *object, PyObject *memo)
823{
824 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200825 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200826 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200827
828 /* Fast paths */
829 if (object == Py_None || PyUnicode_CheckExact(object)) {
830 Py_INCREF(object);
831 return object;
832 }
833
834 if (Py_REFCNT(object) == 1) {
835 if (PyDict_CheckExact(object)) {
836 PyObject *key, *value;
837 Py_ssize_t pos = 0;
838 int simple = 1;
839 while (PyDict_Next(object, &pos, &key, &value)) {
840 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
841 simple = 0;
842 break;
843 }
844 }
845 if (simple)
846 return PyDict_Copy(object);
847 /* Fall through to general case */
848 }
849 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300850 return _elementtree_Element___deepcopy___impl(
851 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200852 }
853 }
854
855 /* General case */
856 st = ET_STATE_GLOBAL;
857 if (!st->deepcopy_obj) {
858 PyErr_SetString(PyExc_RuntimeError,
859 "deepcopy helper not found");
860 return NULL;
861 }
862
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 stack[0] = object;
864 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200865 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Oren Milman39ecb9c2017-10-10 23:26:24 +0300969 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
970 Py_INCREF(JOIN_OBJ(text));
971 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Oren Milman39ecb9c2017-10-10 23:26:24 +0300973 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
974 Py_INCREF(JOIN_OBJ(tail));
975 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001137 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001178 return _PyObject_CallMethodIdObjArgs(
1179 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001189 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001223 return _PyObject_CallMethodIdObjArgs(
1224 st->elementpath_obj, &PyId_findtext,
1225 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 );
1227
1228 if (!self->extra) {
1229 Py_INCREF(default_value);
1230 return default_value;
1231 }
1232
1233 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001234 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001235 int rc;
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001236 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 continue;
1238 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001239 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001241 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 if (text == Py_None) {
1243 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001244 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001246 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248 return text;
1249 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001250 Py_DECREF(item);
1251 if (rc < 0)
1252 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 }
1254
1255 Py_INCREF(default_value);
1256 return default_value;
1257}
1258
Serhiy Storchakacb985562015-05-04 15:32:48 +03001259/*[clinic input]
1260_elementtree.Element.findall
1261
1262 path: object
1263 namespaces: object = None
1264
1265[clinic start generated code]*/
1266
1267static PyObject *
1268_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1269 PyObject *namespaces)
1270/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001272 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001274 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001276 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001278 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001279 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001281 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282
1283 out = PyList_New(0);
1284 if (!out)
1285 return NULL;
1286
1287 if (!self->extra)
1288 return out;
1289
1290 for (i = 0; i < self->extra->length; i++) {
1291 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 int rc;
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001293 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 continue;
1295 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1298 Py_DECREF(item);
1299 Py_DECREF(out);
1300 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 return out;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310
Serhiy Storchakacb985562015-05-04 15:32:48 +03001311 path: object
1312 namespaces: object = None
1313
1314[clinic start generated code]*/
1315
1316static PyObject *
1317_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1318 PyObject *namespaces)
1319/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1320{
1321 PyObject* tag = path;
1322 _Py_IDENTIFIER(iterfind);
1323 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324
Victor Stinnerf5616342016-12-09 15:26:00 +01001325 return _PyObject_CallMethodIdObjArgs(
1326 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.get
1331
1332 key: object
1333 default: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1339 PyObject *default_value)
1340/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
1342 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 if (!self->extra || self->extra->attrib == Py_None)
1345 value = default_value;
1346 else {
1347 value = PyDict_GetItem(self->extra->attrib, key);
1348 if (!value)
1349 value = default_value;
1350 }
1351
1352 Py_INCREF(value);
1353 return value;
1354}
1355
Serhiy Storchakacb985562015-05-04 15:32:48 +03001356/*[clinic input]
1357_elementtree.Element.getchildren
1358
1359[clinic start generated code]*/
1360
1361static PyObject *
1362_elementtree_Element_getchildren_impl(ElementObject *self)
1363/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001365 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 PyObject* list;
1367
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001368 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1369 "This method will be removed in future versions. "
1370 "Use 'list(elem)' or iteration over elem instead.",
1371 1) < 0) {
1372 return NULL;
1373 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001374
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001375 if (!self->extra)
1376 return PyList_New(0);
1377
1378 list = PyList_New(self->extra->length);
1379 if (!list)
1380 return NULL;
1381
1382 for (i = 0; i < self->extra->length; i++) {
1383 PyObject* item = self->extra->children[i];
1384 Py_INCREF(item);
1385 PyList_SET_ITEM(list, i, item);
1386 }
1387
1388 return list;
1389}
1390
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001391
Eli Bendersky64d11e62012-06-15 07:42:50 +03001392static PyObject *
1393create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1394
1395
Serhiy Storchakacb985562015-05-04 15:32:48 +03001396/*[clinic input]
1397_elementtree.Element.iter
1398
1399 tag: object = None
1400
1401[clinic start generated code]*/
1402
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001404_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1405/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001406{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001407 if (PyUnicode_Check(tag)) {
1408 if (PyUnicode_READY(tag) < 0)
1409 return NULL;
1410 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1411 tag = Py_None;
1412 }
1413 else if (PyBytes_Check(tag)) {
1414 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1415 tag = Py_None;
1416 }
1417
Eli Bendersky64d11e62012-06-15 07:42:50 +03001418 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001419}
1420
1421
Serhiy Storchakacb985562015-05-04 15:32:48 +03001422/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001423_elementtree.Element.getiterator
1424
1425 tag: object = None
1426
1427[clinic start generated code]*/
1428
1429static PyObject *
1430_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1431/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1432{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001433 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001434 "This method will be removed in future versions. "
1435 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1436 1) < 0) {
1437 return NULL;
1438 }
1439 return _elementtree_Element_iter_impl(self, tag);
1440}
1441
1442
1443/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001444_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001445
Serhiy Storchakacb985562015-05-04 15:32:48 +03001446[clinic start generated code]*/
1447
1448static PyObject *
1449_elementtree_Element_itertext_impl(ElementObject *self)
1450/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1451{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001453}
1454
Eli Bendersky64d11e62012-06-15 07:42:50 +03001455
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001457element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001459 ElementObject* self = (ElementObject*) self_;
1460
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001461 if (!self->extra || index < 0 || index >= self->extra->length) {
1462 PyErr_SetString(
1463 PyExc_IndexError,
1464 "child index out of range"
1465 );
1466 return NULL;
1467 }
1468
1469 Py_INCREF(self->extra->children[index]);
1470 return self->extra->children[index];
1471}
1472
Serhiy Storchakacb985562015-05-04 15:32:48 +03001473/*[clinic input]
1474_elementtree.Element.insert
1475
1476 index: Py_ssize_t
1477 subelement: object(subclass_of='&Element_Type')
1478 /
1479
1480[clinic start generated code]*/
1481
1482static PyObject *
1483_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1484 PyObject *subelement)
1485/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001487 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488
Victor Stinner5f0af232013-07-11 23:01:36 +02001489 if (!self->extra) {
1490 if (create_extra(self, NULL) < 0)
1491 return NULL;
1492 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494 if (index < 0) {
1495 index += self->extra->length;
1496 if (index < 0)
1497 index = 0;
1498 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 if (index > self->extra->length)
1500 index = self->extra->length;
1501
1502 if (element_resize(self, 1) < 0)
1503 return NULL;
1504
1505 for (i = self->extra->length; i > index; i--)
1506 self->extra->children[i] = self->extra->children[i-1];
1507
Serhiy Storchakacb985562015-05-04 15:32:48 +03001508 Py_INCREF(subelement);
1509 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510
1511 self->extra->length++;
1512
1513 Py_RETURN_NONE;
1514}
1515
Serhiy Storchakacb985562015-05-04 15:32:48 +03001516/*[clinic input]
1517_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_items_impl(ElementObject *self)
1523/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1524{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525 if (!self->extra || self->extra->attrib == Py_None)
1526 return PyList_New(0);
1527
1528 return PyDict_Items(self->extra->attrib);
1529}
1530
Serhiy Storchakacb985562015-05-04 15:32:48 +03001531/*[clinic input]
1532_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001533
Serhiy Storchakacb985562015-05-04 15:32:48 +03001534[clinic start generated code]*/
1535
1536static PyObject *
1537_elementtree_Element_keys_impl(ElementObject *self)
1538/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1539{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540 if (!self->extra || self->extra->attrib == Py_None)
1541 return PyList_New(0);
1542
1543 return PyDict_Keys(self->extra->attrib);
1544}
1545
Martin v. Löwis18e16552006-02-15 17:27:45 +00001546static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547element_length(ElementObject* self)
1548{
1549 if (!self->extra)
1550 return 0;
1551
1552 return self->extra->length;
1553}
1554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555/*[clinic input]
1556_elementtree.Element.makeelement
1557
1558 tag: object
1559 attrib: object
1560 /
1561
1562[clinic start generated code]*/
1563
1564static PyObject *
1565_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1566 PyObject *attrib)
1567/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568{
1569 PyObject* elem;
1570
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571 attrib = PyDict_Copy(attrib);
1572 if (!attrib)
1573 return NULL;
1574
Eli Bendersky092af1f2012-03-04 07:14:03 +02001575 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001576
1577 Py_DECREF(attrib);
1578
1579 return elem;
1580}
1581
Serhiy Storchakacb985562015-05-04 15:32:48 +03001582/*[clinic input]
1583_elementtree.Element.remove
1584
1585 subelement: object(subclass_of='&Element_Type')
1586 /
1587
1588[clinic start generated code]*/
1589
1590static PyObject *
1591_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1592/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001594 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001595 int rc;
1596 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598 if (!self->extra) {
1599 /* element has no children, so raise exception */
1600 PyErr_SetString(
1601 PyExc_ValueError,
1602 "list.remove(x): x not in list"
1603 );
1604 return NULL;
1605 }
1606
1607 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001608 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001610 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001611 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001613 if (rc < 0)
1614 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 }
1616
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001617 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001618 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619 PyErr_SetString(
1620 PyExc_ValueError,
1621 "list.remove(x): x not in list"
1622 );
1623 return NULL;
1624 }
1625
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001626 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627
1628 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 for (; i < self->extra->length; i++)
1630 self->extra->children[i] = self->extra->children[i+1];
1631
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001632 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633 Py_RETURN_NONE;
1634}
1635
1636static PyObject*
1637element_repr(ElementObject* self)
1638{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001639 int status;
1640
1641 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001642 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001643
1644 status = Py_ReprEnter((PyObject *)self);
1645 if (status == 0) {
1646 PyObject *res;
1647 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1648 Py_ReprLeave((PyObject *)self);
1649 return res;
1650 }
1651 if (status > 0)
1652 PyErr_Format(PyExc_RuntimeError,
1653 "reentrant call inside %s.__repr__",
1654 Py_TYPE(self)->tp_name);
1655 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001656}
1657
Serhiy Storchakacb985562015-05-04 15:32:48 +03001658/*[clinic input]
1659_elementtree.Element.set
1660
1661 key: object
1662 value: object
1663 /
1664
1665[clinic start generated code]*/
1666
1667static PyObject *
1668_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1669 PyObject *value)
1670/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001671{
1672 PyObject* attrib;
1673
Victor Stinner5f0af232013-07-11 23:01:36 +02001674 if (!self->extra) {
1675 if (create_extra(self, NULL) < 0)
1676 return NULL;
1677 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678
1679 attrib = element_get_attrib(self);
1680 if (!attrib)
1681 return NULL;
1682
1683 if (PyDict_SetItem(attrib, key, value) < 0)
1684 return NULL;
1685
1686 Py_RETURN_NONE;
1687}
1688
1689static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001690element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001691{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001692 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001693 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694 PyObject* old;
1695
1696 if (!self->extra || index < 0 || index >= self->extra->length) {
1697 PyErr_SetString(
1698 PyExc_IndexError,
1699 "child assignment index out of range");
1700 return -1;
1701 }
1702
1703 old = self->extra->children[index];
1704
1705 if (item) {
1706 Py_INCREF(item);
1707 self->extra->children[index] = item;
1708 } else {
1709 self->extra->length--;
1710 for (i = index; i < self->extra->length; i++)
1711 self->extra->children[i] = self->extra->children[i+1];
1712 }
1713
1714 Py_DECREF(old);
1715
1716 return 0;
1717}
1718
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719static PyObject*
1720element_subscr(PyObject* self_, PyObject* item)
1721{
1722 ElementObject* self = (ElementObject*) self_;
1723
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724 if (PyIndex_Check(item)) {
1725 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
1727 if (i == -1 && PyErr_Occurred()) {
1728 return NULL;
1729 }
1730 if (i < 0 && self->extra)
1731 i += self->extra->length;
1732 return element_getitem(self_, i);
1733 }
1734 else if (PySlice_Check(item)) {
1735 Py_ssize_t start, stop, step, slicelen, cur, i;
1736 PyObject* list;
1737
1738 if (!self->extra)
1739 return PyList_New(0);
1740
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001741 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 return NULL;
1743 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001744 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1745 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746
1747 if (slicelen <= 0)
1748 return PyList_New(0);
1749 else {
1750 list = PyList_New(slicelen);
1751 if (!list)
1752 return NULL;
1753
1754 for (cur = start, i = 0; i < slicelen;
1755 cur += step, i++) {
1756 PyObject* item = self->extra->children[cur];
1757 Py_INCREF(item);
1758 PyList_SET_ITEM(list, i, item);
1759 }
1760
1761 return list;
1762 }
1763 }
1764 else {
1765 PyErr_SetString(PyExc_TypeError,
1766 "element indices must be integers");
1767 return NULL;
1768 }
1769}
1770
1771static int
1772element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1773{
1774 ElementObject* self = (ElementObject*) self_;
1775
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776 if (PyIndex_Check(item)) {
1777 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778
1779 if (i == -1 && PyErr_Occurred()) {
1780 return -1;
1781 }
1782 if (i < 0 && self->extra)
1783 i += self->extra->length;
1784 return element_setitem(self_, i, value);
1785 }
1786 else if (PySlice_Check(item)) {
1787 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1788
1789 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001790 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001791
Victor Stinner5f0af232013-07-11 23:01:36 +02001792 if (!self->extra) {
1793 if (create_extra(self, NULL) < 0)
1794 return -1;
1795 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001797 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001798 return -1;
1799 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001800 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1801 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802
Eli Bendersky865756a2012-03-09 13:38:15 +02001803 if (value == NULL) {
1804 /* Delete slice */
1805 size_t cur;
1806 Py_ssize_t i;
1807
1808 if (slicelen <= 0)
1809 return 0;
1810
1811 /* Since we're deleting, the direction of the range doesn't matter,
1812 * so for simplicity make it always ascending.
1813 */
1814 if (step < 0) {
1815 stop = start + 1;
1816 start = stop + step * (slicelen - 1) - 1;
1817 step = -step;
1818 }
1819
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001820 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001821
1822 /* recycle is a list that will contain all the children
1823 * scheduled for removal.
1824 */
1825 if (!(recycle = PyList_New(slicelen))) {
1826 PyErr_NoMemory();
1827 return -1;
1828 }
1829
1830 /* This loop walks over all the children that have to be deleted,
1831 * with cur pointing at them. num_moved is the amount of children
1832 * until the next deleted child that have to be "shifted down" to
1833 * occupy the deleted's places.
1834 * Note that in the ith iteration, shifting is done i+i places down
1835 * because i children were already removed.
1836 */
1837 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1838 /* Compute how many children have to be moved, clipping at the
1839 * list end.
1840 */
1841 Py_ssize_t num_moved = step - 1;
1842 if (cur + step >= (size_t)self->extra->length) {
1843 num_moved = self->extra->length - cur - 1;
1844 }
1845
1846 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1847
1848 memmove(
1849 self->extra->children + cur - i,
1850 self->extra->children + cur + 1,
1851 num_moved * sizeof(PyObject *));
1852 }
1853
1854 /* Leftover "tail" after the last removed child */
1855 cur = start + (size_t)slicelen * step;
1856 if (cur < (size_t)self->extra->length) {
1857 memmove(
1858 self->extra->children + cur - slicelen,
1859 self->extra->children + cur,
1860 (self->extra->length - cur) * sizeof(PyObject *));
1861 }
1862
1863 self->extra->length -= slicelen;
1864
1865 /* Discard the recycle list with all the deleted sub-elements */
1866 Py_XDECREF(recycle);
1867 return 0;
1868 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869
1870 /* A new slice is actually being assigned */
1871 seq = PySequence_Fast(value, "");
1872 if (!seq) {
1873 PyErr_Format(
1874 PyExc_TypeError,
1875 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1876 );
1877 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001879 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880
1881 if (step != 1 && newlen != slicelen)
1882 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001883 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001885 "attempt to assign sequence of size %zd "
1886 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 newlen, slicelen
1888 );
1889 return -1;
1890 }
1891
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001892 /* Resize before creating the recycle bin, to prevent refleaks. */
1893 if (newlen > slicelen) {
1894 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001895 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896 return -1;
1897 }
1898 }
1899
1900 if (slicelen > 0) {
1901 /* to avoid recursive calls to this method (via decref), move
1902 old items to the recycle bin here, and get rid of them when
1903 we're done modifying the element */
1904 recycle = PyList_New(slicelen);
1905 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001906 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001907 return -1;
1908 }
1909 for (cur = start, i = 0; i < slicelen;
1910 cur += step, i++)
1911 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1912 }
1913
1914 if (newlen < slicelen) {
1915 /* delete slice */
1916 for (i = stop; i < self->extra->length; i++)
1917 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1918 } else if (newlen > slicelen) {
1919 /* insert slice */
1920 for (i = self->extra->length-1; i >= stop; i--)
1921 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1922 }
1923
1924 /* replace the slice */
1925 for (cur = start, i = 0; i < newlen;
1926 cur += step, i++) {
1927 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1928 Py_INCREF(element);
1929 self->extra->children[cur] = element;
1930 }
1931
1932 self->extra->length += newlen - slicelen;
1933
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001934 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001935
1936 /* discard the recycle bin, and everything in it */
1937 Py_XDECREF(recycle);
1938
1939 return 0;
1940 }
1941 else {
1942 PyErr_SetString(PyExc_TypeError,
1943 "element indices must be integers");
1944 return -1;
1945 }
1946}
1947
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001948static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001949element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001950{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001951 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001952 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001953 return res;
1954}
1955
Serhiy Storchakadde08152015-11-25 15:28:13 +02001956static PyObject*
1957element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001958{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001959 PyObject *res = element_get_text(self);
1960 Py_XINCREF(res);
1961 return res;
1962}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001963
Serhiy Storchakadde08152015-11-25 15:28:13 +02001964static PyObject*
1965element_tail_getter(ElementObject *self, void *closure)
1966{
1967 PyObject *res = element_get_tail(self);
1968 Py_XINCREF(res);
1969 return res;
1970}
1971
1972static PyObject*
1973element_attrib_getter(ElementObject *self, void *closure)
1974{
1975 PyObject *res;
1976 if (!self->extra) {
1977 if (create_extra(self, NULL) < 0)
1978 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001979 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001980 res = element_get_attrib(self);
1981 Py_XINCREF(res);
1982 return res;
1983}
Victor Stinner4d463432013-07-11 23:05:03 +02001984
Serhiy Storchakadde08152015-11-25 15:28:13 +02001985/* macro for setter validation */
1986#define _VALIDATE_ATTR_VALUE(V) \
1987 if ((V) == NULL) { \
1988 PyErr_SetString( \
1989 PyExc_AttributeError, \
1990 "can't delete element attribute"); \
1991 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001992 }
1993
Serhiy Storchakadde08152015-11-25 15:28:13 +02001994static int
1995element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1996{
1997 _VALIDATE_ATTR_VALUE(value);
1998 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001999 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002000 return 0;
2001}
2002
2003static int
2004element_text_setter(ElementObject *self, PyObject *value, void *closure)
2005{
2006 _VALIDATE_ATTR_VALUE(value);
2007 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002008 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002009 return 0;
2010}
2011
2012static int
2013element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2014{
2015 _VALIDATE_ATTR_VALUE(value);
2016 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002017 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002018 return 0;
2019}
2020
2021static int
2022element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2023{
2024 _VALIDATE_ATTR_VALUE(value);
2025 if (!self->extra) {
2026 if (create_extra(self, NULL) < 0)
2027 return -1;
2028 }
2029 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002030 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002031 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002032}
2033
2034static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002035 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036 0, /* sq_concat */
2037 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002038 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002039 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002040 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002041 0,
2042};
2043
Eli Bendersky64d11e62012-06-15 07:42:50 +03002044/******************************* Element iterator ****************************/
2045
2046/* ElementIterObject represents the iteration state over an XML element in
2047 * pre-order traversal. To keep track of which sub-element should be returned
2048 * next, a stack of parents is maintained. This is a standard stack-based
2049 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002050 * The stack is managed using a continuous array.
2051 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002052 * the current one is exhausted, and the next child to examine in that parent.
2053 */
2054typedef struct ParentLocator_t {
2055 ElementObject *parent;
2056 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002057} ParentLocator;
2058
2059typedef struct {
2060 PyObject_HEAD
2061 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002062 Py_ssize_t parent_stack_used;
2063 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002064 ElementObject *root_element;
2065 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002066 int gettext;
2067} ElementIterObject;
2068
2069
2070static void
2071elementiter_dealloc(ElementIterObject *it)
2072{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002073 Py_ssize_t i = it->parent_stack_used;
2074 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002075 /* bpo-31095: UnTrack is needed before calling any callbacks */
2076 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077 while (i--)
2078 Py_XDECREF(it->parent_stack[i].parent);
2079 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002080
2081 Py_XDECREF(it->sought_tag);
2082 Py_XDECREF(it->root_element);
2083
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084 PyObject_GC_Del(it);
2085}
2086
2087static int
2088elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2089{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002090 Py_ssize_t i = it->parent_stack_used;
2091 while (i--)
2092 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002093
2094 Py_VISIT(it->root_element);
2095 Py_VISIT(it->sought_tag);
2096 return 0;
2097}
2098
2099/* Helper function for elementiter_next. Add a new parent to the parent stack.
2100 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002101static int
2102parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002104 ParentLocator *item;
2105
2106 if (it->parent_stack_used >= it->parent_stack_size) {
2107 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2108 ParentLocator *parent_stack = it->parent_stack;
2109 PyMem_Resize(parent_stack, ParentLocator, new_size);
2110 if (parent_stack == NULL)
2111 return -1;
2112 it->parent_stack = parent_stack;
2113 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002115 item = it->parent_stack + it->parent_stack_used++;
2116 Py_INCREF(parent);
2117 item->parent = parent;
2118 item->child_index = 0;
2119 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120}
2121
2122static PyObject *
2123elementiter_next(ElementIterObject *it)
2124{
2125 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002126 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002127 * A short note on gettext: this function serves both the iter() and
2128 * itertext() methods to avoid code duplication. However, there are a few
2129 * small differences in the way these iterations work. Namely:
2130 * - itertext() only yields text from nodes that have it, and continues
2131 * iterating when a node doesn't have text (so it doesn't return any
2132 * node like iter())
2133 * - itertext() also has to handle tail, after finishing with all the
2134 * children of a node.
2135 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002136 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002137 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002138 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002139
2140 while (1) {
2141 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002142 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002143 * iterator is exhausted.
2144 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002145 if (!it->parent_stack_used) {
2146 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002147 PyErr_SetNone(PyExc_StopIteration);
2148 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 }
2150
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151 elem = it->root_element; /* steals a reference */
2152 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002153 }
2154 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002155 /* See if there are children left to traverse in the current parent. If
2156 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002157 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002158 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2159 Py_ssize_t child_index = item->child_index;
2160 ElementObjectExtra *extra;
2161 elem = item->parent;
2162 extra = elem->extra;
2163 if (!extra || child_index >= extra->length) {
2164 it->parent_stack_used--;
2165 /* Note that extra condition on it->parent_stack_used here;
2166 * this is because itertext() is supposed to only return *inner*
2167 * text, not text following the element it began iteration with.
2168 */
2169 if (it->gettext && it->parent_stack_used) {
2170 text = element_get_tail(elem);
2171 goto gettext;
2172 }
2173 Py_DECREF(elem);
2174 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002175 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176
Serhiy Storchakab11c5662018-10-14 10:32:19 +03002177 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002178 PyErr_Format(PyExc_AttributeError,
2179 "'%.100s' object has no attribute 'iter'",
2180 Py_TYPE(extra->children[child_index])->tp_name);
2181 return NULL;
2182 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002183 elem = (ElementObject *)extra->children[child_index];
2184 item->child_index++;
2185 Py_INCREF(elem);
2186 }
2187
2188 if (parent_stack_push_new(it, elem) < 0) {
2189 Py_DECREF(elem);
2190 PyErr_NoMemory();
2191 return NULL;
2192 }
2193 if (it->gettext) {
2194 text = element_get_text(elem);
2195 goto gettext;
2196 }
2197
2198 if (it->sought_tag == Py_None)
2199 return (PyObject *)elem;
2200
2201 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2202 if (rc > 0)
2203 return (PyObject *)elem;
2204
2205 Py_DECREF(elem);
2206 if (rc < 0)
2207 return NULL;
2208 continue;
2209
2210gettext:
2211 if (!text) {
2212 Py_DECREF(elem);
2213 return NULL;
2214 }
2215 if (text == Py_None) {
2216 Py_DECREF(elem);
2217 }
2218 else {
2219 Py_INCREF(text);
2220 Py_DECREF(elem);
2221 rc = PyObject_IsTrue(text);
2222 if (rc > 0)
2223 return text;
2224 Py_DECREF(text);
2225 if (rc < 0)
2226 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002227 }
2228 }
2229
2230 return NULL;
2231}
2232
2233
2234static PyTypeObject ElementIter_Type = {
2235 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002236 /* Using the module's name since the pure-Python implementation does not
2237 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002238 "_elementtree._element_iterator", /* tp_name */
2239 sizeof(ElementIterObject), /* tp_basicsize */
2240 0, /* tp_itemsize */
2241 /* methods */
2242 (destructor)elementiter_dealloc, /* tp_dealloc */
2243 0, /* tp_print */
2244 0, /* tp_getattr */
2245 0, /* tp_setattr */
2246 0, /* tp_reserved */
2247 0, /* tp_repr */
2248 0, /* tp_as_number */
2249 0, /* tp_as_sequence */
2250 0, /* tp_as_mapping */
2251 0, /* tp_hash */
2252 0, /* tp_call */
2253 0, /* tp_str */
2254 0, /* tp_getattro */
2255 0, /* tp_setattro */
2256 0, /* tp_as_buffer */
2257 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2258 0, /* tp_doc */
2259 (traverseproc)elementiter_traverse, /* tp_traverse */
2260 0, /* tp_clear */
2261 0, /* tp_richcompare */
2262 0, /* tp_weaklistoffset */
2263 PyObject_SelfIter, /* tp_iter */
2264 (iternextfunc)elementiter_next, /* tp_iternext */
2265 0, /* tp_methods */
2266 0, /* tp_members */
2267 0, /* tp_getset */
2268 0, /* tp_base */
2269 0, /* tp_dict */
2270 0, /* tp_descr_get */
2271 0, /* tp_descr_set */
2272 0, /* tp_dictoffset */
2273 0, /* tp_init */
2274 0, /* tp_alloc */
2275 0, /* tp_new */
2276};
2277
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002278#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002279
2280static PyObject *
2281create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2282{
2283 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002284
2285 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2286 if (!it)
2287 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002288
Victor Stinner4d463432013-07-11 23:05:03 +02002289 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002290 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002291 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002292 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002293 it->root_element = self;
2294
Eli Bendersky64d11e62012-06-15 07:42:50 +03002295 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002296
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002297 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002298 if (it->parent_stack == NULL) {
2299 Py_DECREF(it);
2300 PyErr_NoMemory();
2301 return NULL;
2302 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002303 it->parent_stack_used = 0;
2304 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002305
Eli Bendersky64d11e62012-06-15 07:42:50 +03002306 return (PyObject *)it;
2307}
2308
2309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310/* ==================================================================== */
2311/* the tree builder type */
2312
2313typedef struct {
2314 PyObject_HEAD
2315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002317
Antoine Pitrouee329312012-10-04 19:53:29 +02002318 PyObject *this; /* current node */
2319 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 PyObject *stack; /* element stack */
2324 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky48d358b2012-05-30 17:57:50 +03002326 PyObject *element_factory;
2327
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002329 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002330 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2331 PyObject *end_event_obj;
2332 PyObject *start_ns_event_obj;
2333 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334} TreeBuilderObject;
2335
Christian Heimes90aa7642007-12-19 02:45:37 +00002336#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337
2338/* -------------------------------------------------------------------- */
2339/* constructor and destructor */
2340
Eli Bendersky58d548d2012-05-29 15:45:16 +03002341static PyObject *
2342treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2345 if (t != NULL) {
2346 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347
Eli Bendersky58d548d2012-05-29 15:45:16 +03002348 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002349 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002351 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002354 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 t->stack = PyList_New(20);
2356 if (!t->stack) {
2357 Py_DECREF(t->this);
2358 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002359 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002360 return NULL;
2361 }
2362 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002364 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002365 t->start_event_obj = t->end_event_obj = NULL;
2366 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2367 }
2368 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369}
2370
Serhiy Storchakacb985562015-05-04 15:32:48 +03002371/*[clinic input]
2372_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002373
Serhiy Storchakacb985562015-05-04 15:32:48 +03002374 element_factory: object = NULL
2375
2376[clinic start generated code]*/
2377
2378static int
2379_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2380 PyObject *element_factory)
2381/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2382{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002383 if (element_factory) {
2384 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002385 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386 }
2387
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389}
2390
Eli Bendersky48d358b2012-05-30 17:57:50 +03002391static int
2392treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2393{
2394 Py_VISIT(self->root);
2395 Py_VISIT(self->this);
2396 Py_VISIT(self->last);
2397 Py_VISIT(self->data);
2398 Py_VISIT(self->stack);
2399 Py_VISIT(self->element_factory);
2400 return 0;
2401}
2402
2403static int
2404treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002405{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002406 Py_CLEAR(self->end_ns_event_obj);
2407 Py_CLEAR(self->start_ns_event_obj);
2408 Py_CLEAR(self->end_event_obj);
2409 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002410 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002411 Py_CLEAR(self->stack);
2412 Py_CLEAR(self->data);
2413 Py_CLEAR(self->last);
2414 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002415 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002416 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002417 return 0;
2418}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420static void
2421treebuilder_dealloc(TreeBuilderObject *self)
2422{
2423 PyObject_GC_UnTrack(self);
2424 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002425 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426}
2427
2428/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002429/* helpers for handling of arbitrary element-like objects */
2430
2431static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002432treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002433 PyObject **dest, _Py_Identifier *name)
2434{
2435 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002436 PyObject *tmp = JOIN_OBJ(*dest);
2437 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2438 *data = NULL;
2439 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002440 return 0;
2441 }
2442 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002443 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002444 int r;
2445 if (joined == NULL)
2446 return -1;
2447 r = _PyObject_SetAttrId(element, name, joined);
2448 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002449 if (r < 0)
2450 return -1;
2451 Py_CLEAR(*data);
2452 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002453 }
2454}
2455
Serhiy Storchaka576def02017-03-30 09:47:31 +03002456LOCAL(int)
2457treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002458{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002459 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002460
Serhiy Storchaka576def02017-03-30 09:47:31 +03002461 if (!self->data) {
2462 return 0;
2463 }
2464
2465 if (self->this == element) {
2466 _Py_IDENTIFIER(text);
2467 return treebuilder_set_element_text_or_tail(
2468 element, &self->data,
2469 &((ElementObject *) element)->text, &PyId_text);
2470 }
2471 else {
2472 _Py_IDENTIFIER(tail);
2473 return treebuilder_set_element_text_or_tail(
2474 element, &self->data,
2475 &((ElementObject *) element)->tail, &PyId_tail);
2476 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002477}
2478
2479static int
2480treebuilder_add_subelement(PyObject *element, PyObject *child)
2481{
2482 _Py_IDENTIFIER(append);
2483 if (Element_CheckExact(element)) {
2484 ElementObject *elem = (ElementObject *) element;
2485 return element_add_subelement(elem, child);
2486 }
2487 else {
2488 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002489 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002490 if (res == NULL)
2491 return -1;
2492 Py_DECREF(res);
2493 return 0;
2494 }
2495}
2496
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002497LOCAL(int)
2498treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2499 PyObject *node)
2500{
2501 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002502 PyObject *res;
2503 PyObject *event = PyTuple_Pack(2, action, node);
2504 if (event == NULL)
2505 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002506 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002507 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002508 if (res == NULL)
2509 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002510 Py_DECREF(res);
2511 }
2512 return 0;
2513}
2514
Antoine Pitrouee329312012-10-04 19:53:29 +02002515/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516/* handlers */
2517
2518LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2520 PyObject* attrib)
2521{
2522 PyObject* node;
2523 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002524 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525
Serhiy Storchaka576def02017-03-30 09:47:31 +03002526 if (treebuilder_flush_data(self) < 0) {
2527 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 }
2529
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002530 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002531 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002532 } else if (attrib == Py_None) {
2533 attrib = PyDict_New();
2534 if (!attrib)
2535 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002536 node = PyObject_CallFunctionObjArgs(self->element_factory,
2537 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002538 Py_DECREF(attrib);
2539 }
2540 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002541 node = PyObject_CallFunctionObjArgs(self->element_factory,
2542 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002543 }
2544 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002546 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547
Antoine Pitrouee329312012-10-04 19:53:29 +02002548 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549
2550 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002551 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002552 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 } else {
2554 if (self->root) {
2555 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002556 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557 "multiple elements on top level"
2558 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002559 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 }
2561 Py_INCREF(node);
2562 self->root = node;
2563 }
2564
2565 if (self->index < PyList_GET_SIZE(self->stack)) {
2566 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002567 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 Py_INCREF(this);
2569 } else {
2570 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002571 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 }
2573 self->index++;
2574
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002576 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002578 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002580 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2581 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582
2583 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002584
2585 error:
2586 Py_DECREF(node);
2587 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588}
2589
2590LOCAL(PyObject*)
2591treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2592{
2593 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002594 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002595 /* ignore calls to data before the first call to start */
2596 Py_RETURN_NONE;
2597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598 /* store the first item as is */
2599 Py_INCREF(data); self->data = data;
2600 } else {
2601 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002602 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2603 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002604 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 /* expat often generates single character data sections; handle
2606 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002607 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2608 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002609 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002610 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 } else if (PyList_CheckExact(self->data)) {
2612 if (PyList_Append(self->data, data) < 0)
2613 return NULL;
2614 } else {
2615 PyObject* list = PyList_New(2);
2616 if (!list)
2617 return NULL;
2618 PyList_SET_ITEM(list, 0, self->data);
2619 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2620 self->data = list;
2621 }
2622 }
2623
2624 Py_RETURN_NONE;
2625}
2626
2627LOCAL(PyObject*)
2628treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2629{
2630 PyObject* item;
2631
Serhiy Storchaka576def02017-03-30 09:47:31 +03002632 if (treebuilder_flush_data(self) < 0) {
2633 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634 }
2635
2636 if (self->index == 0) {
2637 PyErr_SetString(
2638 PyExc_IndexError,
2639 "pop from empty stack"
2640 );
2641 return NULL;
2642 }
2643
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002644 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002645 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002646 self->index--;
2647 self->this = PyList_GET_ITEM(self->stack, self->index);
2648 Py_INCREF(self->this);
2649 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002651 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2652 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653
2654 Py_INCREF(self->last);
2655 return (PyObject*) self->last;
2656}
2657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658/* -------------------------------------------------------------------- */
2659/* methods (in alphabetical order) */
2660
Serhiy Storchakacb985562015-05-04 15:32:48 +03002661/*[clinic input]
2662_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663
Serhiy Storchakacb985562015-05-04 15:32:48 +03002664 data: object
2665 /
2666
2667[clinic start generated code]*/
2668
2669static PyObject *
2670_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2671/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2672{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 return treebuilder_handle_data(self, data);
2674}
2675
Serhiy Storchakacb985562015-05-04 15:32:48 +03002676/*[clinic input]
2677_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679 tag: object
2680 /
2681
2682[clinic start generated code]*/
2683
2684static PyObject *
2685_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2686/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2687{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 return treebuilder_handle_end(self, tag);
2689}
2690
2691LOCAL(PyObject*)
2692treebuilder_done(TreeBuilderObject* self)
2693{
2694 PyObject* res;
2695
2696 /* FIXME: check stack size? */
2697
2698 if (self->root)
2699 res = self->root;
2700 else
2701 res = Py_None;
2702
2703 Py_INCREF(res);
2704 return res;
2705}
2706
Serhiy Storchakacb985562015-05-04 15:32:48 +03002707/*[clinic input]
2708_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
Serhiy Storchakacb985562015-05-04 15:32:48 +03002710[clinic start generated code]*/
2711
2712static PyObject *
2713_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2714/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2715{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 return treebuilder_done(self);
2717}
2718
Serhiy Storchakacb985562015-05-04 15:32:48 +03002719/*[clinic input]
2720_elementtree.TreeBuilder.start
2721
2722 tag: object
2723 attrs: object = None
2724 /
2725
2726[clinic start generated code]*/
2727
2728static PyObject *
2729_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2730 PyObject *attrs)
2731/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002733 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734}
2735
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736/* ==================================================================== */
2737/* the expat interface */
2738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002741
2742/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2743 * cached globally without being in per-module state.
2744 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002745static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747
Eli Bendersky52467b12012-06-01 07:13:08 +03002748static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2749 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751typedef struct {
2752 PyObject_HEAD
2753
2754 XML_Parser parser;
2755
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002756 PyObject *target;
2757 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002759 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002761 PyObject *handle_start;
2762 PyObject *handle_data;
2763 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002765 PyObject *handle_comment;
2766 PyObject *handle_pi;
2767 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002769 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771} XMLParserObject;
2772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773/* helpers */
2774
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775LOCAL(PyObject*)
2776makeuniversal(XMLParserObject* self, const char* string)
2777{
2778 /* convert a UTF-8 tag/attribute name from the expat parser
2779 to a universal name string */
2780
Antoine Pitrouc1948842012-10-01 23:40:37 +02002781 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782 PyObject* key;
2783 PyObject* value;
2784
2785 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002786 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 if (!key)
2788 return NULL;
2789
2790 value = PyDict_GetItem(self->names, key);
2791
2792 if (value) {
2793 Py_INCREF(value);
2794 } else {
2795 /* new name. convert to universal name, and decode as
2796 necessary */
2797
2798 PyObject* tag;
2799 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002800 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
2802 /* look for namespace separator */
2803 for (i = 0; i < size; i++)
2804 if (string[i] == '}')
2805 break;
2806 if (i != size) {
2807 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002808 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002809 if (tag == NULL) {
2810 Py_DECREF(key);
2811 return NULL;
2812 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002813 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814 p[0] = '{';
2815 memcpy(p+1, string, size);
2816 size++;
2817 } else {
2818 /* plain name; use key as tag */
2819 Py_INCREF(key);
2820 tag = key;
2821 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002822
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002824 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002825 value = PyUnicode_DecodeUTF8(p, size, "strict");
2826 Py_DECREF(tag);
2827 if (!value) {
2828 Py_DECREF(key);
2829 return NULL;
2830 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831
2832 /* add to names dictionary */
2833 if (PyDict_SetItem(self->names, key, value) < 0) {
2834 Py_DECREF(key);
2835 Py_DECREF(value);
2836 return NULL;
2837 }
2838 }
2839
2840 Py_DECREF(key);
2841 return value;
2842}
2843
Eli Bendersky5b77d812012-03-16 08:20:05 +02002844/* Set the ParseError exception with the given parameters.
2845 * If message is not NULL, it's used as the error string. Otherwise, the
2846 * message string is the default for the given error_code.
2847*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002848static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002849expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2850 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002851{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002852 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002853 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002855 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002856 message ? message : EXPAT(ErrorString)(error_code),
2857 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002858 if (errmsg == NULL)
2859 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002861 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002862 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002863 if (!error)
2864 return;
2865
Eli Bendersky5b77d812012-03-16 08:20:05 +02002866 /* Add code and position attributes */
2867 code = PyLong_FromLong((long)error_code);
2868 if (!code) {
2869 Py_DECREF(error);
2870 return;
2871 }
2872 if (PyObject_SetAttrString(error, "code", code) == -1) {
2873 Py_DECREF(error);
2874 Py_DECREF(code);
2875 return;
2876 }
2877 Py_DECREF(code);
2878
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002879 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002880 if (!position) {
2881 Py_DECREF(error);
2882 return;
2883 }
2884 if (PyObject_SetAttrString(error, "position", position) == -1) {
2885 Py_DECREF(error);
2886 Py_DECREF(position);
2887 return;
2888 }
2889 Py_DECREF(position);
2890
Eli Bendersky532d03e2013-08-10 08:00:39 -07002891 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002892 Py_DECREF(error);
2893}
2894
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895/* -------------------------------------------------------------------- */
2896/* handlers */
2897
2898static void
2899expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2900 int data_len)
2901{
2902 PyObject* key;
2903 PyObject* value;
2904 PyObject* res;
2905
2906 if (data_len < 2 || data_in[0] != '&')
2907 return;
2908
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002909 if (PyErr_Occurred())
2910 return;
2911
Neal Norwitz0269b912007-08-08 06:56:02 +00002912 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913 if (!key)
2914 return;
2915
2916 value = PyDict_GetItem(self->entity, key);
2917
2918 if (value) {
2919 if (TreeBuilder_CheckExact(self->target))
2920 res = treebuilder_handle_data(
2921 (TreeBuilderObject*) self->target, value
2922 );
2923 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002924 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 else
2926 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002927 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002928 } else if (!PyErr_Occurred()) {
2929 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002930 char message[128] = "undefined entity ";
2931 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002933 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002935 EXPAT(GetErrorColumnNumber)(self->parser),
2936 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 );
2938 }
2939
2940 Py_DECREF(key);
2941}
2942
2943static void
2944expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2945 const XML_Char **attrib_in)
2946{
2947 PyObject* res;
2948 PyObject* tag;
2949 PyObject* attrib;
2950 int ok;
2951
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002952 if (PyErr_Occurred())
2953 return;
2954
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 /* tag name */
2956 tag = makeuniversal(self, tag_in);
2957 if (!tag)
2958 return; /* parser will look for errors */
2959
2960 /* attributes */
2961 if (attrib_in[0]) {
2962 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002963 if (!attrib) {
2964 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002966 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 while (attrib_in[0] && attrib_in[1]) {
2968 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002969 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 if (!key || !value) {
2971 Py_XDECREF(value);
2972 Py_XDECREF(key);
2973 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002974 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 return;
2976 }
2977 ok = PyDict_SetItem(attrib, key, value);
2978 Py_DECREF(value);
2979 Py_DECREF(key);
2980 if (ok < 0) {
2981 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002982 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 return;
2984 }
2985 attrib_in += 2;
2986 }
2987 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002988 Py_INCREF(Py_None);
2989 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002990 }
2991
2992 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 /* shortcut */
2994 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2995 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002996 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002997 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002998 if (attrib == Py_None) {
2999 Py_DECREF(attrib);
3000 attrib = PyDict_New();
3001 if (!attrib) {
3002 Py_DECREF(tag);
3003 return;
3004 }
3005 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003006 res = PyObject_CallFunctionObjArgs(self->handle_start,
3007 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003008 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 res = NULL;
3010
3011 Py_DECREF(tag);
3012 Py_DECREF(attrib);
3013
3014 Py_XDECREF(res);
3015}
3016
3017static void
3018expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3019 int data_len)
3020{
3021 PyObject* data;
3022 PyObject* res;
3023
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003024 if (PyErr_Occurred())
3025 return;
3026
Neal Norwitz0269b912007-08-08 06:56:02 +00003027 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003028 if (!data)
3029 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003030
3031 if (TreeBuilder_CheckExact(self->target))
3032 /* shortcut */
3033 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3034 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003035 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 else
3037 res = NULL;
3038
3039 Py_DECREF(data);
3040
3041 Py_XDECREF(res);
3042}
3043
3044static void
3045expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3046{
3047 PyObject* tag;
3048 PyObject* res = NULL;
3049
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003050 if (PyErr_Occurred())
3051 return;
3052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003053 if (TreeBuilder_CheckExact(self->target))
3054 /* shortcut */
3055 /* the standard tree builder doesn't look at the end tag */
3056 res = treebuilder_handle_end(
3057 (TreeBuilderObject*) self->target, Py_None
3058 );
3059 else if (self->handle_end) {
3060 tag = makeuniversal(self, tag_in);
3061 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003062 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063 Py_DECREF(tag);
3064 }
3065 }
3066
3067 Py_XDECREF(res);
3068}
3069
3070static void
3071expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3072 const XML_Char *uri)
3073{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003074 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3075 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003076
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003077 if (PyErr_Occurred())
3078 return;
3079
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003080 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003081 return;
3082
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003083 if (!uri)
3084 uri = "";
3085 if (!prefix)
3086 prefix = "";
3087
3088 parcel = Py_BuildValue("ss", prefix, uri);
3089 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003090 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003091 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3092 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093}
3094
3095static void
3096expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3097{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003098 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3099
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003100 if (PyErr_Occurred())
3101 return;
3102
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003103 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003104 return;
3105
3106 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107}
3108
3109static void
3110expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3111{
3112 PyObject* comment;
3113 PyObject* res;
3114
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003115 if (PyErr_Occurred())
3116 return;
3117
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003118 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003119 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003121 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3122 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123 Py_XDECREF(res);
3124 Py_DECREF(comment);
3125 }
3126 }
3127}
3128
Eli Bendersky45839902013-01-13 05:14:47 -08003129static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003130expat_start_doctype_handler(XMLParserObject *self,
3131 const XML_Char *doctype_name,
3132 const XML_Char *sysid,
3133 const XML_Char *pubid,
3134 int has_internal_subset)
3135{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003136 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003137 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003138 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003139
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003140 if (PyErr_Occurred())
3141 return;
3142
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003143 doctype_name_obj = makeuniversal(self, doctype_name);
3144 if (!doctype_name_obj)
3145 return;
3146
3147 if (sysid) {
3148 sysid_obj = makeuniversal(self, sysid);
3149 if (!sysid_obj) {
3150 Py_DECREF(doctype_name_obj);
3151 return;
3152 }
3153 } else {
3154 Py_INCREF(Py_None);
3155 sysid_obj = Py_None;
3156 }
3157
3158 if (pubid) {
3159 pubid_obj = makeuniversal(self, pubid);
3160 if (!pubid_obj) {
3161 Py_DECREF(doctype_name_obj);
3162 Py_DECREF(sysid_obj);
3163 return;
3164 }
3165 } else {
3166 Py_INCREF(Py_None);
3167 pubid_obj = Py_None;
3168 }
3169
3170 /* If the target has a handler for doctype, call it. */
3171 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003172 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3173 doctype_name_obj, pubid_obj,
3174 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003175 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003176 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003177 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3178 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3179 "The doctype() method of XMLParser is ignored. "
3180 "Define doctype() method on the TreeBuilder target.",
3181 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003182 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003183 }
3184
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003185 Py_DECREF(doctype_name_obj);
3186 Py_DECREF(pubid_obj);
3187 Py_DECREF(sysid_obj);
3188}
3189
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190static void
3191expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3192 const XML_Char* data_in)
3193{
3194 PyObject* target;
3195 PyObject* data;
3196 PyObject* res;
3197
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003198 if (PyErr_Occurred())
3199 return;
3200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003202 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3203 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003205 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3206 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207 Py_XDECREF(res);
3208 Py_DECREF(data);
3209 Py_DECREF(target);
3210 } else {
3211 Py_XDECREF(data);
3212 Py_XDECREF(target);
3213 }
3214 }
3215}
3216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218
Eli Bendersky52467b12012-06-01 07:13:08 +03003219static PyObject *
3220xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221{
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3223 if (self) {
3224 self->parser = NULL;
3225 self->target = self->entity = self->names = NULL;
3226 self->handle_start = self->handle_data = self->handle_end = NULL;
3227 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003228 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 return (PyObject *)self;
3231}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232
scoderc8d8e152017-09-14 22:00:03 +02003233static int
3234ignore_attribute_error(PyObject *value)
3235{
3236 if (value == NULL) {
3237 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3238 return -1;
3239 }
3240 PyErr_Clear();
3241 }
3242 return 0;
3243}
3244
Serhiy Storchakacb985562015-05-04 15:32:48 +03003245/*[clinic input]
3246_elementtree.XMLParser.__init__
3247
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003248 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003249 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003250 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003251
3252[clinic start generated code]*/
3253
Eli Bendersky52467b12012-06-01 07:13:08 +03003254static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003255_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3256 const char *encoding)
3257/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003258{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003259 self->entity = PyDict_New();
3260 if (!self->entity)
3261 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262
Serhiy Storchakacb985562015-05-04 15:32:48 +03003263 self->names = PyDict_New();
3264 if (!self->names) {
3265 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003266 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003268
Serhiy Storchakacb985562015-05-04 15:32:48 +03003269 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3270 if (!self->parser) {
3271 Py_CLEAR(self->entity);
3272 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003274 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003276 /* expat < 2.1.0 has no XML_SetHashSalt() */
3277 if (EXPAT(SetHashSalt) != NULL) {
3278 EXPAT(SetHashSalt)(self->parser,
3279 (unsigned long)_Py_HashSecret.expat.hashsalt);
3280 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281
Eli Bendersky52467b12012-06-01 07:13:08 +03003282 if (target) {
3283 Py_INCREF(target);
3284 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003285 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003287 Py_CLEAR(self->entity);
3288 Py_CLEAR(self->names);
3289 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003290 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003292 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294
Serhiy Storchakacb985562015-05-04 15:32:48 +03003295 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003296 if (ignore_attribute_error(self->handle_start)) {
3297 return -1;
3298 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003299 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003300 if (ignore_attribute_error(self->handle_data)) {
3301 return -1;
3302 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003304 if (ignore_attribute_error(self->handle_end)) {
3305 return -1;
3306 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003307 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003308 if (ignore_attribute_error(self->handle_comment)) {
3309 return -1;
3310 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003311 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003312 if (ignore_attribute_error(self->handle_pi)) {
3313 return -1;
3314 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003315 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003316 if (ignore_attribute_error(self->handle_close)) {
3317 return -1;
3318 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003319 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003320 if (ignore_attribute_error(self->handle_doctype)) {
3321 return -1;
3322 }
Eli Bendersky45839902013-01-13 05:14:47 -08003323
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003325 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003327 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003328 (XML_StartElementHandler) expat_start_handler,
3329 (XML_EndElementHandler) expat_end_handler
3330 );
3331 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003332 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 (XML_DefaultHandler) expat_default_handler
3334 );
3335 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337 (XML_CharacterDataHandler) expat_data_handler
3338 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003339 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003341 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 (XML_CommentHandler) expat_comment_handler
3343 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003344 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003345 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003346 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347 (XML_ProcessingInstructionHandler) expat_pi_handler
3348 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003349 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003350 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003351 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3352 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003354 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003355 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357
Eli Bendersky52467b12012-06-01 07:13:08 +03003358 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359}
3360
Eli Bendersky52467b12012-06-01 07:13:08 +03003361static int
3362xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3363{
3364 Py_VISIT(self->handle_close);
3365 Py_VISIT(self->handle_pi);
3366 Py_VISIT(self->handle_comment);
3367 Py_VISIT(self->handle_end);
3368 Py_VISIT(self->handle_data);
3369 Py_VISIT(self->handle_start);
3370
3371 Py_VISIT(self->target);
3372 Py_VISIT(self->entity);
3373 Py_VISIT(self->names);
3374
3375 return 0;
3376}
3377
3378static int
3379xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380{
Victor Stinnere727d412017-09-18 05:29:37 -07003381 if (self->parser != NULL) {
3382 XML_Parser parser = self->parser;
3383 self->parser = NULL;
3384 EXPAT(ParserFree)(parser);
3385 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386
Antoine Pitrouc1948842012-10-01 23:40:37 +02003387 Py_CLEAR(self->handle_close);
3388 Py_CLEAR(self->handle_pi);
3389 Py_CLEAR(self->handle_comment);
3390 Py_CLEAR(self->handle_end);
3391 Py_CLEAR(self->handle_data);
3392 Py_CLEAR(self->handle_start);
3393 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394
Antoine Pitrouc1948842012-10-01 23:40:37 +02003395 Py_CLEAR(self->target);
3396 Py_CLEAR(self->entity);
3397 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398
Eli Bendersky52467b12012-06-01 07:13:08 +03003399 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400}
3401
Eli Bendersky52467b12012-06-01 07:13:08 +03003402static void
3403xmlparser_dealloc(XMLParserObject* self)
3404{
3405 PyObject_GC_UnTrack(self);
3406 xmlparser_gc_clear(self);
3407 Py_TYPE(self)->tp_free((PyObject *)self);
3408}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409
3410LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003411expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003412{
3413 int ok;
3414
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003415 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3417
3418 if (PyErr_Occurred())
3419 return NULL;
3420
3421 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003422 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003423 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003424 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003425 EXPAT(GetErrorColumnNumber)(self->parser),
3426 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427 );
3428 return NULL;
3429 }
3430
3431 Py_RETURN_NONE;
3432}
3433
Serhiy Storchakacb985562015-05-04 15:32:48 +03003434/*[clinic input]
3435_elementtree.XMLParser.close
3436
3437[clinic start generated code]*/
3438
3439static PyObject *
3440_elementtree_XMLParser_close_impl(XMLParserObject *self)
3441/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442{
3443 /* end feeding data to parser */
3444
3445 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003447 if (!res)
3448 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003450 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003451 Py_DECREF(res);
3452 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003453 }
3454 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003455 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003456 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003457 }
3458 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003459 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003460 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461}
3462
Serhiy Storchakacb985562015-05-04 15:32:48 +03003463/*[clinic input]
3464_elementtree.XMLParser.feed
3465
3466 data: object
3467 /
3468
3469[clinic start generated code]*/
3470
3471static PyObject *
3472_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3473/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474{
3475 /* feed data to parser */
3476
Serhiy Storchakacb985562015-05-04 15:32:48 +03003477 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003478 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003479 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3480 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003481 return NULL;
3482 if (data_len > INT_MAX) {
3483 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3484 return NULL;
3485 }
3486 /* Explicitly set UTF-8 encoding. Return code ignored. */
3487 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003488 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003489 }
3490 else {
3491 Py_buffer view;
3492 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003493 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003494 return NULL;
3495 if (view.len > INT_MAX) {
3496 PyBuffer_Release(&view);
3497 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3498 return NULL;
3499 }
3500 res = expat_parse(self, view.buf, (int)view.len, 0);
3501 PyBuffer_Release(&view);
3502 return res;
3503 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504}
3505
Serhiy Storchakacb985562015-05-04 15:32:48 +03003506/*[clinic input]
3507_elementtree.XMLParser._parse_whole
3508
3509 file: object
3510 /
3511
3512[clinic start generated code]*/
3513
3514static PyObject *
3515_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3516/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517{
Eli Benderskya3699232013-05-19 18:47:23 -07003518 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003519 PyObject* reader;
3520 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003521 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003522 PyObject* res;
3523
Serhiy Storchakacb985562015-05-04 15:32:48 +03003524 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525 if (!reader)
3526 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003527
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003528 /* read from open file object */
3529 for (;;) {
3530
3531 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3532
3533 if (!buffer) {
3534 /* read failed (e.g. due to KeyboardInterrupt) */
3535 Py_DECREF(reader);
3536 return NULL;
3537 }
3538
Eli Benderskyf996e772012-03-16 05:53:30 +02003539 if (PyUnicode_CheckExact(buffer)) {
3540 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003541 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003542 Py_DECREF(buffer);
3543 break;
3544 }
3545 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003546 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003547 if (!temp) {
3548 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003549 Py_DECREF(reader);
3550 return NULL;
3551 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003552 buffer = temp;
3553 }
3554 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 Py_DECREF(buffer);
3556 break;
3557 }
3558
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003559 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3560 Py_DECREF(buffer);
3561 Py_DECREF(reader);
3562 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3563 return NULL;
3564 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003565 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003566 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003567 );
3568
3569 Py_DECREF(buffer);
3570
3571 if (!res) {
3572 Py_DECREF(reader);
3573 return NULL;
3574 }
3575 Py_DECREF(res);
3576
3577 }
3578
3579 Py_DECREF(reader);
3580
3581 res = expat_parse(self, "", 0, 1);
3582
3583 if (res && TreeBuilder_CheckExact(self->target)) {
3584 Py_DECREF(res);
3585 return treebuilder_done((TreeBuilderObject*) self->target);
3586 }
3587
3588 return res;
3589}
3590
Serhiy Storchakacb985562015-05-04 15:32:48 +03003591/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003592_elementtree.XMLParser._setevents
3593
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003594 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003595 events_to_report: object = None
3596 /
3597
3598[clinic start generated code]*/
3599
3600static PyObject *
3601_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3602 PyObject *events_queue,
3603 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003604/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605{
3606 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003607 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003608 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003609 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610
3611 if (!TreeBuilder_CheckExact(self->target)) {
3612 PyErr_SetString(
3613 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003614 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615 "targets"
3616 );
3617 return NULL;
3618 }
3619
3620 target = (TreeBuilderObject*) self->target;
3621
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003622 events_append = PyObject_GetAttrString(events_queue, "append");
3623 if (events_append == NULL)
3624 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003625 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626
3627 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003628 Py_CLEAR(target->start_event_obj);
3629 Py_CLEAR(target->end_event_obj);
3630 Py_CLEAR(target->start_ns_event_obj);
3631 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003632
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003635 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636 Py_RETURN_NONE;
3637 }
3638
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003639 if (!(events_seq = PySequence_Fast(events_to_report,
3640 "events must be a sequence"))) {
3641 return NULL;
3642 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003644 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003645 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003646 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003647 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003648 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003649 } else if (PyBytes_Check(event_name_obj)) {
3650 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003651 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003652 if (event_name == NULL) {
3653 Py_DECREF(events_seq);
3654 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3655 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003656 }
3657
3658 Py_INCREF(event_name_obj);
3659 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003660 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003661 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003662 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003664 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665 EXPAT(SetNamespaceDeclHandler)(
3666 self->parser,
3667 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3668 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3669 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003670 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003671 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672 EXPAT(SetNamespaceDeclHandler)(
3673 self->parser,
3674 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3675 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3676 );
3677 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003678 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003679 Py_DECREF(events_seq);
3680 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 return NULL;
3682 }
3683 }
3684
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003685 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687}
3688
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003689static PyMemberDef xmlparser_members[] = {
3690 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3691 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3692 {NULL}
3693};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003695static PyObject*
3696xmlparser_version_getter(XMLParserObject *self, void *closure)
3697{
3698 return PyUnicode_FromFormat(
3699 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3700 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003701}
3702
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003703static PyGetSetDef xmlparser_getsetlist[] = {
3704 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3705 {NULL},
3706};
3707
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708#include "clinic/_elementtree.c.h"
3709
3710static PyMethodDef element_methods[] = {
3711
3712 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3713
3714 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3715 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3716
3717 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3718 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3719 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3720
3721 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3722 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3723 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3724 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3725
3726 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3727 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3728 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3729
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003730 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003731 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3732
3733 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3734 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3735
3736 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3737
3738 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3739 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3740 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3741 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3742 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3743
3744 {NULL, NULL}
3745};
3746
3747static PyMappingMethods element_as_mapping = {
3748 (lenfunc) element_length,
3749 (binaryfunc) element_subscr,
3750 (objobjargproc) element_ass_subscr,
3751};
3752
Serhiy Storchakadde08152015-11-25 15:28:13 +02003753static PyGetSetDef element_getsetlist[] = {
3754 {"tag",
3755 (getter)element_tag_getter,
3756 (setter)element_tag_setter,
3757 "A string identifying what kind of data this element represents"},
3758 {"text",
3759 (getter)element_text_getter,
3760 (setter)element_text_setter,
3761 "A string of text directly after the start tag, or None"},
3762 {"tail",
3763 (getter)element_tail_getter,
3764 (setter)element_tail_setter,
3765 "A string of text directly after the end tag, or None"},
3766 {"attrib",
3767 (getter)element_attrib_getter,
3768 (setter)element_attrib_setter,
3769 "A dictionary containing the element's attributes"},
3770 {NULL},
3771};
3772
Serhiy Storchakacb985562015-05-04 15:32:48 +03003773static PyTypeObject Element_Type = {
3774 PyVarObject_HEAD_INIT(NULL, 0)
3775 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3776 /* methods */
3777 (destructor)element_dealloc, /* tp_dealloc */
3778 0, /* tp_print */
3779 0, /* tp_getattr */
3780 0, /* tp_setattr */
3781 0, /* tp_reserved */
3782 (reprfunc)element_repr, /* tp_repr */
3783 0, /* tp_as_number */
3784 &element_as_sequence, /* tp_as_sequence */
3785 &element_as_mapping, /* tp_as_mapping */
3786 0, /* tp_hash */
3787 0, /* tp_call */
3788 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003789 PyObject_GenericGetAttr, /* tp_getattro */
3790 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003791 0, /* tp_as_buffer */
3792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3793 /* tp_flags */
3794 0, /* tp_doc */
3795 (traverseproc)element_gc_traverse, /* tp_traverse */
3796 (inquiry)element_gc_clear, /* tp_clear */
3797 0, /* tp_richcompare */
3798 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3799 0, /* tp_iter */
3800 0, /* tp_iternext */
3801 element_methods, /* tp_methods */
3802 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003803 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003804 0, /* tp_base */
3805 0, /* tp_dict */
3806 0, /* tp_descr_get */
3807 0, /* tp_descr_set */
3808 0, /* tp_dictoffset */
3809 (initproc)element_init, /* tp_init */
3810 PyType_GenericAlloc, /* tp_alloc */
3811 element_new, /* tp_new */
3812 0, /* tp_free */
3813};
3814
3815static PyMethodDef treebuilder_methods[] = {
3816 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3817 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3818 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3819 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3820 {NULL, NULL}
3821};
3822
3823static PyTypeObject TreeBuilder_Type = {
3824 PyVarObject_HEAD_INIT(NULL, 0)
3825 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3826 /* methods */
3827 (destructor)treebuilder_dealloc, /* tp_dealloc */
3828 0, /* tp_print */
3829 0, /* tp_getattr */
3830 0, /* tp_setattr */
3831 0, /* tp_reserved */
3832 0, /* tp_repr */
3833 0, /* tp_as_number */
3834 0, /* tp_as_sequence */
3835 0, /* tp_as_mapping */
3836 0, /* tp_hash */
3837 0, /* tp_call */
3838 0, /* tp_str */
3839 0, /* tp_getattro */
3840 0, /* tp_setattro */
3841 0, /* tp_as_buffer */
3842 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3843 /* tp_flags */
3844 0, /* tp_doc */
3845 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3846 (inquiry)treebuilder_gc_clear, /* tp_clear */
3847 0, /* tp_richcompare */
3848 0, /* tp_weaklistoffset */
3849 0, /* tp_iter */
3850 0, /* tp_iternext */
3851 treebuilder_methods, /* tp_methods */
3852 0, /* tp_members */
3853 0, /* tp_getset */
3854 0, /* tp_base */
3855 0, /* tp_dict */
3856 0, /* tp_descr_get */
3857 0, /* tp_descr_set */
3858 0, /* tp_dictoffset */
3859 _elementtree_TreeBuilder___init__, /* tp_init */
3860 PyType_GenericAlloc, /* tp_alloc */
3861 treebuilder_new, /* tp_new */
3862 0, /* tp_free */
3863};
3864
3865static PyMethodDef xmlparser_methods[] = {
3866 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3867 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3868 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3869 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003870 {NULL, NULL}
3871};
3872
Neal Norwitz227b5332006-03-22 09:28:35 +00003873static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003874 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003875 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003876 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003877 (destructor)xmlparser_dealloc, /* tp_dealloc */
3878 0, /* tp_print */
3879 0, /* tp_getattr */
3880 0, /* tp_setattr */
3881 0, /* tp_reserved */
3882 0, /* tp_repr */
3883 0, /* tp_as_number */
3884 0, /* tp_as_sequence */
3885 0, /* tp_as_mapping */
3886 0, /* tp_hash */
3887 0, /* tp_call */
3888 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003889 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003890 0, /* tp_setattro */
3891 0, /* tp_as_buffer */
3892 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3893 /* tp_flags */
3894 0, /* tp_doc */
3895 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3896 (inquiry)xmlparser_gc_clear, /* tp_clear */
3897 0, /* tp_richcompare */
3898 0, /* tp_weaklistoffset */
3899 0, /* tp_iter */
3900 0, /* tp_iternext */
3901 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003902 xmlparser_members, /* tp_members */
3903 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003904 0, /* tp_base */
3905 0, /* tp_dict */
3906 0, /* tp_descr_get */
3907 0, /* tp_descr_set */
3908 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003909 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003910 PyType_GenericAlloc, /* tp_alloc */
3911 xmlparser_new, /* tp_new */
3912 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003913};
3914
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003915/* ==================================================================== */
3916/* python module interface */
3917
3918static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003919 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003920 {NULL, NULL}
3921};
3922
Martin v. Löwis1a214512008-06-11 05:26:20 +00003923
Eli Bendersky532d03e2013-08-10 08:00:39 -07003924static struct PyModuleDef elementtreemodule = {
3925 PyModuleDef_HEAD_INIT,
3926 "_elementtree",
3927 NULL,
3928 sizeof(elementtreestate),
3929 _functions,
3930 NULL,
3931 elementtree_traverse,
3932 elementtree_clear,
3933 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003934};
3935
Neal Norwitzf6657e62006-12-28 04:47:50 +00003936PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003937PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003938{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003939 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003940 elementtreestate *st;
3941
3942 m = PyState_FindModule(&elementtreemodule);
3943 if (m) {
3944 Py_INCREF(m);
3945 return m;
3946 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003947
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003948 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003949 if (PyType_Ready(&ElementIter_Type) < 0)
3950 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003951 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003952 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003953 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003954 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003955 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003956 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003957
Eli Bendersky532d03e2013-08-10 08:00:39 -07003958 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003959 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003960 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003961 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003962
Eli Bendersky828efde2012-04-05 05:40:58 +03003963 if (!(temp = PyImport_ImportModule("copy")))
3964 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003965 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003966 Py_XDECREF(temp);
3967
Victor Stinnerb136f112017-07-10 22:28:02 +02003968 if (st->deepcopy_obj == NULL) {
3969 return NULL;
3970 }
3971
3972 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07003973 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003974 return NULL;
3975
Eli Bendersky20d41742012-06-01 09:48:37 +03003976 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003977 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3978 if (expat_capi) {
3979 /* check that it's usable */
3980 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003981 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003982 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3983 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003984 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003985 PyErr_SetString(PyExc_ImportError,
3986 "pyexpat version is incompatible");
3987 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003988 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003989 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003990 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003991 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003992
Eli Bendersky532d03e2013-08-10 08:00:39 -07003993 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003994 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003995 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003996 Py_INCREF(st->parseerror_obj);
3997 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003998
Eli Bendersky092af1f2012-03-04 07:14:03 +02003999 Py_INCREF((PyObject *)&Element_Type);
4000 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4001
Eli Bendersky58d548d2012-05-29 15:45:16 +03004002 Py_INCREF((PyObject *)&TreeBuilder_Type);
4003 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4004
Eli Bendersky52467b12012-06-01 07:13:08 +03004005 Py_INCREF((PyObject *)&XMLParser_Type);
4006 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004007
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004008 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004009}